From cbf4c7a3ab01d1cab9ff0f7ae0b6df0de3edfb30 Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Fri, 29 Nov 2019 16:28:38 +0100
Subject: [PATCH 001/179] Added Segments.

---
 src/TNL/Containers/Segments.h       |  29 ++++++++
 src/TNL/Containers/Segments/CSR.h   |  83 +++++++++++++++++++++
 src/TNL/Containers/Segments/CSR.hpp | 110 ++++++++++++++++++++++++++++
 3 files changed, 222 insertions(+)
 create mode 100644 src/TNL/Containers/Segments.h
 create mode 100644 src/TNL/Containers/Segments/CSR.h
 create mode 100644 src/TNL/Containers/Segments/CSR.hpp

diff --git a/src/TNL/Containers/Segments.h b/src/TNL/Containers/Segments.h
new file mode 100644
index 000000000..99ea22357
--- /dev/null
+++ b/src/TNL/Containers/Segments.h
@@ -0,0 +1,29 @@
+/***************************************************************************
+                          Segments.h -  description
+                             -------------------
+    begin                : Nov 29, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+namespace TNL {
+namespace Containers {
+
+template< typename Value,
+          typename Organization >
+class Segments
+{
+   public:
+
+      using ValueType = Value;
+      using OrganizationType = Organization;
+      using IndexType = typename Organization::IndexType;
+
+};
+
+}  // namespace Conatiners
+} // namespace TNL
\ No newline at end of file
diff --git a/src/TNL/Containers/Segments/CSR.h b/src/TNL/Containers/Segments/CSR.h
new file mode 100644
index 000000000..3aa53e76c
--- /dev/null
+++ b/src/TNL/Containers/Segments/CSR.h
@@ -0,0 +1,83 @@
+/***************************************************************************
+                          CSR.h -  description
+                             -------------------
+    begin                : Nov 29, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Containers/Vector.h>
+
+namespace TNL {
+   namespace Containers {
+      namespace Segments {
+
+
+template< typename Device,
+          typename Index >
+class Segments
+{
+   public:
+
+      using DeviceType = Device;
+      using IndexType = Index;
+      using OffsetsHolder = Containers::Vector< IndexType, DeviceType, IndexType >;
+
+      CSR();
+
+      CSR( const SizesHolder& sizes );
+
+      CSR( const CSR& csr );
+
+      CSR( const CSR&& csr );
+
+      /**
+       * \brief Set number of segments
+       */
+      //void setSegmentsCount();
+
+      /**
+       * \brief Set sizes of particular segmenets.
+       */
+      template< typename SizesHolder = OffsetsHolder >
+      void setSizes( const SizesHolder& sizes )
+
+      /**
+       * \brief Number segments.
+       */
+      Index getSize() const;
+
+      Index getStorageSize() const;
+
+      IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const;
+
+      void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const;
+
+      /***
+       * \brief Go over all segments and for each segment element call
+       * function 'f' with arguments 'args'
+       */
+      template< typename Function, typename... Args >
+      void forAll( Function& f, Args args ) const;
+
+      /***
+       * \brief Go over all segments and perform a reduction in each of them.
+       */
+      template< typename Fetch, typename Reduction, typename ResultKeeper, typename... Args >
+      void segmentsReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, Args args );
+
+   protected:
+
+      OffsetsHolder offsets;
+
+};
+
+      } // namespace Segements
+   }  // namespace Conatiners
+} // namespace TNL
+
+#include <TNL/Containers/Segments/CSR.h>
\ No newline at end of file
diff --git a/src/TNL/Containers/Segments/CSR.hpp b/src/TNL/Containers/Segments/CSR.hpp
new file mode 100644
index 000000000..ea45b40ba
--- /dev/null
+++ b/src/TNL/Containers/Segments/CSR.hpp
@@ -0,0 +1,110 @@
+/***************************************************************************
+                          CSR.hpp -  description
+                             -------------------
+    begin                : Nov 29, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Containers/Vector.h>
+#include <TNL/Algorithms/ParalleFor.h>
+#include <TNL/Containers/Segments/CSR.h>
+
+namespace TNL {
+   namespace Containers {
+      namespace Segments {
+
+
+template< typename Device,
+          typename Index >
+CSR< Device, Index >::
+CSR()
+{
+}
+
+template< typename Device,
+          typename Index >
+CSR< Device, Index >::
+CSR( const CSR& csr ) : offsets( csr.offsets )
+{
+}
+
+template< typename Device,
+          typename Index >
+CSR< Device, Index >::
+CSR( const CSR&& csr ) : offsets( std::move( csr.offsets ) )
+{
+
+}
+
+template< typename Device,
+          typename Index >
+CSR< Device, Index >::
+void setSegmentsCount( const IndexType& size )
+{
+   this->offsets.setSize( size + 1 );
+}
+
+template< typename Device,
+          typename Index >
+   template< typename SizesHolder = OffsetsHolder >
+CSR< Device, Index >::
+void setSizes( const SizesHolder& sizes )
+{
+   this->offsets.setSize( sizes.getSize() + 1 );
+   auto view = this->offsets.getView( 0, sizes.getSize() );
+   view = sizes;
+   this->offsets.setElement( sizes.getSize>(), 0 );
+   this->offsets.scan< Algorithms::ScanType::Exclusive >();
+}
+
+template< typename Device,
+          typename Index >
+CSR< Device, Index >::
+Index getSize() const
+{
+   return this->offsets.getSize() - 1;
+}
+
+template< typename Device,
+          typename Index >
+   template< typename Function, typename... Args >
+CSR< Device, Index >::
+void forAll( Function& f, Args args ) const
+{
+   const auto offsetsView = this->offsets.getView();
+   auto f = [=] __cuda_callable__ ( const IndexType i, f, args ) {
+      const IndexType begin = offsetsView[ i ];
+      const IndexType end = offsetsView[ i + 1 ];
+      for( IndexType j = begin; j < end; j++  )
+         f( i, j, args );
+   };
+   Algorithms::ParallelFor< Device >::exec( 0, this->getSize(), f );
+}
+
+template< typename Device,
+          typename Index >
+   template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
+CSR< Device, Index >::
+void segmentsReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, Real zero, Args args )
+{
+   const auto offsetsView = this->offsets.getView();
+   auto f = [=] __cuda_callable__ ( const IndexType i, f, args ) {
+      const IndexType begin = offsetsView[ i ];
+      const IndexType end = offsetsView[ i + 1 ];
+      Real aux( zero );
+      for( IndexType j = begin; j < end; j++  )
+         reduction( aux, fetch( i, j, args ) );
+      keeper( i, aux );
+   };
+   Algorithms::ParallelFor< Device >::exec( 0, this->getSize(), f );
+}
+
+
+      } // namespace Segements
+   }  // namespace Conatiners
+} // namespace TNL
\ No newline at end of file
-- 
GitLab


From 4fb3337049762632a0d0ccf35e0483f0c4413fa4 Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Fri, 29 Nov 2019 16:29:11 +0100
Subject: [PATCH 002/179] Added SparseMatrix to be implemented using the
 Segments.

---
 src/TNL/Matrices/SparseMatrix.h   | 163 ++++++++++++++++++++++
 src/TNL/Matrices/SparseMatrix.hpp | 221 ++++++++++++++++++++++++++++++
 2 files changed, 384 insertions(+)
 create mode 100644 src/TNL/Matrices/SparseMatrix.h
 create mode 100644 src/TNL/Matrices/SparseMatrix.hpp

diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h
new file mode 100644
index 000000000..acca39bf4
--- /dev/null
+++ b/src/TNL/Matrices/SparseMatrix.h
@@ -0,0 +1,163 @@
+/***************************************************************************
+                          SparseMatrix.h -  description
+                             -------------------
+    begin                : Nov 29, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+namespace TNL {
+namespace Matrices {
+
+template< typename Real,
+          typename Organization >
+class SparseMatrix : public Matrix< Real, typename Organization::Device, typename Organization::Index >
+{
+   public:
+
+      using RealType = Real;
+      using OrganizationType = Organization;
+      using DeviceType = typename Organization::DeviceType;
+      using IndexType = typename Organization::IndexType;
+
+      static String getSerializationType();
+
+      virtual String getSerializationTypeVirtual() const;
+
+      void setDimensions( const IndexType rows,
+                          const IndexType columns );
+
+      void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths );
+
+      IndexType getRowLength( const IndexType row ) const;
+
+      __cuda_callable__
+      IndexType getRowLengthFast( const IndexType row ) const;
+
+      IndexType getNonZeroRowLength( const IndexType row ) const;
+
+      __cuda_callable__
+      IndexType getNonZeroRowLengthFast( const IndexType row ) const;
+
+      template< typename Real2, typename Device2, typename Index2 >
+      void setLike( const CSR< Real2, Device2, Index2 >& matrix );
+
+      void reset();
+
+      __cuda_callable__
+      bool setElementFast( const IndexType row,
+                           const IndexType column,
+                           const RealType& value );
+
+      bool setElement( const IndexType row,
+                       const IndexType column,
+                       const RealType& value );
+      __cuda_callable__
+      bool addElementFast( const IndexType row,
+                           const IndexType column,
+                           const RealType& value,
+                           const RealType& thisElementMultiplicator = 1.0 );
+
+      bool addElement( const IndexType row,
+                       const IndexType column,
+                       const RealType& value,
+                       const RealType& thisElementMultiplicator = 1.0 );
+
+
+      __cuda_callable__
+      bool setRowFast( const IndexType row,
+                       const IndexType* columnIndexes,
+                       const RealType* values,
+                       const IndexType elements );
+
+      bool setRow( const IndexType row,
+                   const IndexType* columnIndexes,
+                   const RealType* values,
+                   const IndexType elements );
+
+
+      __cuda_callable__
+      bool addRowFast( const IndexType row,
+                       const IndexType* columns,
+                       const RealType* values,
+                       const IndexType numberOfElements,
+                       const RealType& thisElementMultiplicator = 1.0 );
+
+      bool addRow( const IndexType row,
+                   const IndexType* columns,
+                   const RealType* values,
+                   const IndexType numberOfElements,
+                   const RealType& thisElementMultiplicator = 1.0 );
+
+
+      __cuda_callable__
+      RealType getElementFast( const IndexType row,
+                               const IndexType column ) const;
+
+      RealType getElement( const IndexType row,
+                           const IndexType column ) const;
+
+      __cuda_callable__
+      void getRowFast( const IndexType row,
+                       IndexType* columns,
+                       RealType* values ) const;
+
+      __cuda_callable__
+      MatrixRow getRow( const IndexType rowIndex );
+
+      __cuda_callable__
+      ConstMatrixRow getRow( const IndexType rowIndex ) const;
+
+      template< typename Vector >
+      __cuda_callable__
+      typename Vector::RealType rowVectorProduct( const IndexType row,
+                                                  const Vector& vector ) const;
+
+      template< typename InVector,
+                typename OutVector >
+      void vectorProduct( const InVector& inVector,
+                          OutVector& outVector ) const;
+      // TODO: add const RealType& multiplicator = 1.0 )
+
+      template< typename Real2, typename Index2 >
+      void addMatrix( const CSR< Real2, Device, Index2 >& matrix,
+                      const RealType& matrixMultiplicator = 1.0,
+                      const RealType& thisMatrixMultiplicator = 1.0 );
+
+      template< typename Real2, typename Index2 >
+      void getTransposition( const CSR< Real2, Device, Index2 >& matrix,
+                             const RealType& matrixMultiplicator = 1.0 );
+
+      template< typename Vector1, typename Vector2 >
+      bool performSORIteration( const Vector1& b,
+                                const IndexType row,
+                                Vector2& x,
+                                const RealType& omega = 1.0 ) const;
+
+      // copy assignment
+      CSR& operator=( const CSR& matrix );
+
+      // cross-device copy assignment
+      template< typename Real2, typename Device2, typename Index2,
+                typename = typename Enabler< Device2 >::type >
+      CSR& operator=( const CSR< Real2, Device2, Index2 >& matrix );
+
+      void save( File& file ) const;
+
+      void load( File& file );
+
+      void save( const String& fileName ) const;
+
+      void load( const String& fileName );
+
+      void print( std::ostream& str ) const;
+
+
+};
+
+}  // namespace Conatiners
+} // namespace TNL
\ No newline at end of file
diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp
new file mode 100644
index 000000000..2d11bb21e
--- /dev/null
+++ b/src/TNL/Matrices/SparseMatrix.hpp
@@ -0,0 +1,221 @@
+/***************************************************************************
+                          SparseMatrix.h -  description
+                             -------------------
+    begin                : Nov 29, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+namespace TNL {
+namespace Matrices {
+
+template< typename Real,
+          typename Organization >
+static String getSerializationType();
+
+template< typename Real,
+          typename Organization >
+String getSerializationTypeVirtual() const;
+
+template< typename Real,
+          typename Organization >
+void setDimensions( const IndexType rows,
+                 const IndexType columns );
+
+template< typename Real,
+          typename Organization >
+void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths );
+
+template< typename Real,
+          typename Organization >
+IndexType getRowLength( const IndexType row ) const;
+
+template< typename Real,
+          typename Organization >
+__cuda_callable__
+IndexType getRowLengthFast( const IndexType row ) const;
+
+template< typename Real,
+          typename Organization >
+IndexType getNonZeroRowLength( const IndexType row ) const;
+
+template< typename Real,
+          typename Organization >
+__cuda_callable__
+IndexType getNonZeroRowLengthFast( const IndexType row ) const;
+
+template< typename Real,
+          typename Organization >
+template< typename Real2, typename Device2, typename Index2 >
+void setLike( const CSR< Real2, Device2, Index2 >& matrix );
+
+template< typename Real,
+          typename Organization >
+void reset();
+
+template< typename Real,
+          typename Organization >
+__cuda_callable__
+bool setElementFast( const IndexType row,
+                  const IndexType column,
+                  const RealType& value );
+
+template< typename Real,
+          typename Organization >
+bool setElement( const IndexType row,
+              const IndexType column,
+              const RealType& value );
+
+template< typename Real,
+          typename Organization >
+__cuda_callable__
+bool addElementFast( const IndexType row,
+                  const IndexType column,
+                  const RealType& value,
+                  const RealType& thisElementMultiplicator = 1.0 );
+
+template< typename Real,
+          typename Organization >
+bool addElement( const IndexType row,
+              const IndexType column,
+              const RealType& value,
+              const RealType& thisElementMultiplicator = 1.0 );
+
+
+template< typename Real,
+          typename Organization >
+__cuda_callable__
+bool setRowFast( const IndexType row,
+              const IndexType* columnIndexes,
+              const RealType* values,
+              const IndexType elements );
+
+template< typename Real,
+          typename Organization >
+bool setRow( const IndexType row,
+          const IndexType* columnIndexes,
+          const RealType* values,
+          const IndexType elements );
+
+
+template< typename Real,
+          typename Organization >
+__cuda_callable__
+bool addRowFast( const IndexType row,
+              const IndexType* columns,
+              const RealType* values,
+              const IndexType numberOfElements,
+              const RealType& thisElementMultiplicator = 1.0 );
+
+template< typename Real,
+          typename Organization >
+bool addRow( const IndexType row,
+          const IndexType* columns,
+          const RealType* values,
+          const IndexType numberOfElements,
+          const RealType& thisElementMultiplicator = 1.0 );
+
+
+template< typename Real,
+          typename Organization >
+__cuda_callable__
+RealType getElementFast( const IndexType row,
+                      const IndexType column ) const;
+
+template< typename Real,
+          typename Organization >
+RealType getElement( const IndexType row,
+                  const IndexType column ) const;
+
+__cuda_callable__
+template< typename Real,
+          typename Organization >
+void getRowFast( const IndexType row,
+              IndexType* columns,
+              RealType* values ) const;
+
+template< typename Real,
+          typename Organization >
+__cuda_callable__
+MatrixRow getRow( const IndexType rowIndex );
+
+template< typename Real,
+          typename Organization >
+__cuda_callable__
+ConstMatrixRow getRow( const IndexType rowIndex ) const;
+
+template< typename Real,
+          typename Organization >
+template< typename Vector >
+__cuda_callable__
+typename Vector::RealType rowVectorProduct( const IndexType row,
+                                         const Vector& vector ) const;
+
+template< typename Real,
+          typename Organization >
+template< typename InVector,
+       typename OutVector >
+void vectorProduct( const InVector& inVector,
+                 OutVector& outVector ) const;
+// TODO: add const RealType& multiplicator = 1.0 )
+
+template< typename Real,
+          typename Organization >
+template< typename Real2, typename Index2 >
+void addMatrix( const CSR< Real2, Device, Index2 >& matrix,
+             const RealType& matrixMultiplicator = 1.0,
+             const RealType& thisMatrixMultiplicator = 1.0 );
+
+template< typename Real,
+          typename Organization >
+template< typename Real2, typename Index2 >
+void getTransposition( const CSR< Real2, Device, Index2 >& matrix,
+                    const RealType& matrixMultiplicator = 1.0 );
+
+template< typename Real,
+          typename Organization >
+template< typename Vector1, typename Vector2 >
+bool performSORIteration( const Vector1& b,
+                       const IndexType row,
+                       Vector2& x,
+                       const RealType& omega = 1.0 ) const;
+
+// copy assignment
+template< typename Real,
+          typename Organization >
+CSR& operator=( const CSR& matrix );
+
+// cross-device copy assignment
+template< typename Real,
+          typename Organization >
+template< typename Real2, typename Device2, typename Index2,
+       typename = typename Enabler< Device2 >::type >
+CSR& operator=( const CSR< Real2, Device2, Index2 >& matrix );
+
+template< typename Real,
+          typename Organization >
+void save( File& file ) const;
+
+template< typename Real,
+          typename Organization >
+void load( File& file );
+
+template< typename Real,
+          typename Organization >
+void save( const String& fileName ) const;
+
+template< typename Real,
+          typename Organization >
+void load( const String& fileName );
+
+template< typename Real,
+          typename Organization >
+void print( std::ostream& str ) const;
+
+
+   } //namespace Matrices
+} // namespace  TNL
-- 
GitLab


From d849e7ac783b900ca6623df183a2bdefde3bacea Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Mon, 2 Dec 2019 17:57:39 +0100
Subject: [PATCH 003/179] Added allocator to Matrix and moved
 getNumberOfMatrixElements from Sparse to Matrix.

---
 src/TNL/Matrices/Matrix.h      |  23 +++---
 src/TNL/Matrices/Matrix_impl.h | 127 ++++++++++++++++++++++-----------
 src/TNL/Matrices/Sparse.h      |   2 -
 src/TNL/Matrices/Sparse_impl.h |   7 --
 4 files changed, 100 insertions(+), 59 deletions(-)

diff --git a/src/TNL/Matrices/Matrix.h b/src/TNL/Matrices/Matrix.h
index eb29f62c7..a877fd5c2 100644
--- a/src/TNL/Matrices/Matrix.h
+++ b/src/TNL/Matrices/Matrix.h
@@ -11,6 +11,7 @@
 #pragma once
 
 #include <TNL/Object.h>
+#include <TNL/Allocators/Default.h>
 #include <TNL/Devices/Host.h>
 #include <TNL/Containers/Vector.h>
 #include <TNL/Containers/VectorView.h>
@@ -23,22 +24,28 @@ namespace Matrices {
 
 template< typename Real = double,
           typename Device = Devices::Host,
-          typename Index = int >
+          typename Index = int,
+          typename RealAllocator = typename Allocators::Default< Device >::template Allocator< Real > >
 class Matrix : public Object
 {
 public:
-   typedef Real RealType;
+   using RealType = Real;
    typedef Device DeviceType;
    typedef Index IndexType;
    typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector;
    typedef Containers::VectorView< IndexType, DeviceType, IndexType > CompressedRowLengthsVectorView;
    typedef typename CompressedRowLengthsVectorView::ConstViewType ConstCompressedRowLengthsVectorView;
-   typedef Containers::Vector< RealType, DeviceType, IndexType > ValuesVector;
+   typedef Containers::Vector< RealType, DeviceType, IndexType, RealAllocator > ValuesVector;
+   using RealAllocatorType = RealAllocator;
 
-   Matrix();
+   Matrix( const RealAllocatorType& allocator = RealAllocatorType() );
+   
+   Matrix( const IndexType rows,
+           const IndexType columns,
+           const RealAllocatorType& allocator = RealAllocatorType() );
 
    virtual void setDimensions( const IndexType rows,
-                                 const IndexType columns );
+                               const IndexType columns );
 
    virtual void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ) = 0;
 
@@ -50,10 +57,10 @@ public:
 
    virtual void getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const;
 
-   template< typename Real2, typename Device2, typename Index2 >
-   void setLike( const Matrix< Real2, Device2, Index2 >& matrix );
+   template< typename Real2, typename Device2, typename Index2, typename RealAllocator2 >
+   void setLike( const Matrix< Real2, Device2, Index2, RealAllocator2 >& matrix );
 
-   virtual IndexType getNumberOfMatrixElements() const = 0;
+   IndexType getNumberOfMatrixElements() const;
 
    virtual IndexType getNumberOfNonzeroMatrixElements() const = 0;
 
diff --git a/src/TNL/Matrices/Matrix_impl.h b/src/TNL/Matrices/Matrix_impl.h
index 3371ee4ec..599e5ad33 100644
--- a/src/TNL/Matrices/Matrix_impl.h
+++ b/src/TNL/Matrices/Matrix_impl.h
@@ -21,17 +21,33 @@ namespace Matrices {
 
 template< typename Real,
           typename Device,
-          typename Index >
-Matrix< Real, Device, Index >::Matrix()
+          typename Index,
+          typename RealAllocator >
+Matrix< Real, Device, Index, RealAllocator >::
+Matrix( const RealAllocatorType& allocator )
 : rows( 0 ),
-  columns( 0 )
+  columns( 0 ),
+   values( allocator )
 {
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-void Matrix< Real, Device, Index >::setDimensions( const IndexType rows,
+          typename Index,
+          typename RealAllocator >
+Matrix< Real, Device, Index, RealAllocator >::
+Matrix( const IndexType rows_, const IndexType columns_, const RealAllocatorType& allocator )
+: rows( rows_ ),
+  columns( columns_ ),
+  values( allocator )
+{
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename RealAllocator >
+void Matrix< Real, Device, Index, RealAllocator >::setDimensions( const IndexType rows,
                                                    const IndexType columns )
 {
    TNL_ASSERT( rows > 0 && columns > 0,
@@ -42,8 +58,9 @@ void Matrix< Real, Device, Index >::setDimensions( const IndexType rows,
 
 template< typename Real,
           typename Device,
-          typename Index >
-void Matrix< Real, Device, Index >::getCompressedRowLengths( CompressedRowLengthsVector& rowLengths ) const
+          typename Index,
+          typename RealAllocator >
+void Matrix< Real, Device, Index, RealAllocator >::getCompressedRowLengths( CompressedRowLengthsVector& rowLengths ) const
 {
    rowLengths.setSize( this->getRows() );
    getCompressedRowLengths( rowLengths.getView() );
@@ -51,8 +68,9 @@ void Matrix< Real, Device, Index >::getCompressedRowLengths( CompressedRowLength
 
 template< typename Real,
           typename Device,
-          typename Index >
-void Matrix< Real, Device, Index >::getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const
+          typename Index,
+          typename RealAllocator >
+void Matrix< Real, Device, Index, RealAllocator >::getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const
 {
    TNL_ASSERT_EQ( rowLengths.getSize(), this->getRows(), "invalid size of the rowLengths vector" );
    for( IndexType row = 0; row < this->getRows(); row++ )
@@ -61,19 +79,31 @@ void Matrix< Real, Device, Index >::getCompressedRowLengths( CompressedRowLength
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          typename RealAllocator >
    template< typename Real2,
              typename Device2,
-             typename Index2 >
-void Matrix< Real, Device, Index >::setLike( const Matrix< Real2, Device2, Index2 >& matrix )
+             typename Index2,
+             typename RealAllocator2 >
+void Matrix< Real, Device, Index, RealAllocator >::setLike( const Matrix< Real2, Device2, Index2, RealAllocator2 >& matrix )
 {
    setDimensions( matrix.getRows(), matrix.getColumns() );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-Index Matrix< Real, Device, Index >::getNumberOfNonzeroMatrixElements() const
+          typename Index,
+          typename RealAllocator >
+Index Matrix< Real, Device, Index, RealAllocator >::getNumberOfMatrixElements() const
+{
+   return this->values.getSize();
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename RealAllocator >
+Index Matrix< Real, Device, Index, RealAllocator >::getNumberOfNonzeroMatrixElements() const
 {
     IndexType nonZeroElements( 0 );
     for( IndexType i = 0; this->values.getSize(); i++ )
@@ -85,27 +115,30 @@ Index Matrix< Real, Device, Index >::getNumberOfNonzeroMatrixElements() const
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          typename RealAllocator >
 __cuda_callable__
-Index Matrix< Real, Device, Index >::getRows() const
+Index Matrix< Real, Device, Index, RealAllocator >::getRows() const
 {
    return this->rows;
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          typename RealAllocator >
 __cuda_callable__
-Index Matrix< Real, Device, Index >::getColumns() const
+Index Matrix< Real, Device, Index, RealAllocator >::getColumns() const
 {
    return this->columns;
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-const typename Matrix< Real, Device, Index >::ValuesVector&
-Matrix< Real, Device, Index >::
+          typename Index,
+          typename RealAllocator >
+const typename Matrix< Real, Device, Index, RealAllocator >::ValuesVector&
+Matrix< Real, Device, Index, RealAllocator >::
 getValues() const
 {
    return this->values;
@@ -113,9 +146,10 @@ getValues() const
    
 template< typename Real,
           typename Device,
-          typename Index >
-typename Matrix< Real, Device, Index >::ValuesVector& 
-Matrix< Real, Device, Index >::
+          typename Index,
+          typename RealAllocator >
+typename Matrix< Real, Device, Index, RealAllocator >::ValuesVector& 
+Matrix< Real, Device, Index, RealAllocator >::
 getValues()
 {
    return this->values;
@@ -123,8 +157,9 @@ getValues()
 
 template< typename Real,
           typename Device,
-          typename Index >
-void Matrix< Real, Device, Index >::reset()
+          typename Index,
+          typename RealAllocator >
+void Matrix< Real, Device, Index, RealAllocator >::reset()
 {
    this->rows = 0;
    this->columns = 0;
@@ -132,9 +167,10 @@ void Matrix< Real, Device, Index >::reset()
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          typename RealAllocator >
    template< typename MatrixT >
-bool Matrix< Real, Device, Index >::operator == ( const MatrixT& matrix ) const
+bool Matrix< Real, Device, Index, RealAllocator >::operator == ( const MatrixT& matrix ) const
 {
    if( this->getRows() != matrix.getRows() ||
        this->getColumns() != matrix.getColumns() )
@@ -148,17 +184,19 @@ bool Matrix< Real, Device, Index >::operator == ( const MatrixT& matrix ) const
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          typename RealAllocator >
    template< typename MatrixT >
-bool Matrix< Real, Device, Index >::operator != ( const MatrixT& matrix ) const
+bool Matrix< Real, Device, Index, RealAllocator >::operator != ( const MatrixT& matrix ) const
 {
    return ! operator == ( matrix );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-void Matrix< Real, Device, Index >::save( File& file ) const
+          typename Index,
+          typename RealAllocator >
+void Matrix< Real, Device, Index, RealAllocator >::save( File& file ) const
 {
    Object::save( file );
    file.save( &this->rows );
@@ -168,8 +206,9 @@ void Matrix< Real, Device, Index >::save( File& file ) const
 
 template< typename Real,
           typename Device,
-          typename Index >
-void Matrix< Real, Device, Index >::load( File& file )
+          typename Index,
+          typename RealAllocator >
+void Matrix< Real, Device, Index, RealAllocator >::load( File& file )
 {
    Object::load( file );
    file.load( &this->rows );
@@ -179,17 +218,19 @@ void Matrix< Real, Device, Index >::load( File& file )
 
 template< typename Real,
           typename Device,
-          typename Index >
-void Matrix< Real, Device, Index >::print( std::ostream& str ) const
+          typename Index,
+          typename RealAllocator >
+void Matrix< Real, Device, Index, RealAllocator >::print( std::ostream& str ) const
 {
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          typename RealAllocator >
 __cuda_callable__
 const Index&
-Matrix< Real, Device, Index >::
+Matrix< Real, Device, Index, RealAllocator >::
 getNumberOfColors() const
 {
    return this->numberOfColors;
@@ -197,9 +238,10 @@ getNumberOfColors() const
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          typename RealAllocator >
 void 
-Matrix< Real, Device, Index >::
+Matrix< Real, Device, Index, RealAllocator >::
 computeColorsVector(Containers::Vector<Index, Device, Index> &colorsVector)
 {
     for( IndexType i = this->getRows() - 1; i >= 0; i-- )
@@ -234,9 +276,10 @@ computeColorsVector(Containers::Vector<Index, Device, Index> &colorsVector)
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          typename RealAllocator >
 void
-Matrix< Real, Device, Index >::
+Matrix< Real, Device, Index, RealAllocator >::
 copyFromHostToCuda( Matrix< Real, Devices::Host, Index >& matrix )
 {
     this->numberOfColors = matrix.getNumberOfColors();
diff --git a/src/TNL/Matrices/Sparse.h b/src/TNL/Matrices/Sparse.h
index 7dc3798d2..c19002443 100644
--- a/src/TNL/Matrices/Sparse.h
+++ b/src/TNL/Matrices/Sparse.h
@@ -37,8 +37,6 @@ class Sparse : public Matrix< Real, Device, Index >
    template< typename Real2, typename Device2, typename Index2 >
    void setLike( const Sparse< Real2, Device2, Index2 >& matrix );
 
-   IndexType getNumberOfMatrixElements() const;
-
    IndexType getNumberOfNonzeroMatrixElements() const;
 
    IndexType getMaxRowLength() const;
diff --git a/src/TNL/Matrices/Sparse_impl.h b/src/TNL/Matrices/Sparse_impl.h
index d1643db19..dda95e68b 100644
--- a/src/TNL/Matrices/Sparse_impl.h
+++ b/src/TNL/Matrices/Sparse_impl.h
@@ -36,13 +36,6 @@ void Sparse< Real, Device, Index >::setLike( const Sparse< Real2, Device2, Index
    this->allocateMatrixElements( matrix.getNumberOfMatrixElements() );
 }
 
-template< typename Real,
-          typename Device,
-          typename Index >
-Index Sparse< Real, Device, Index >::getNumberOfMatrixElements() const
-{
-   return this->values.getSize();
-}
 
 template< typename Real,
           typename Device,
-- 
GitLab


From 6f645c5a2afae4711f71a243cf9dc251d0be66bd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Mon, 2 Dec 2019 17:58:48 +0100
Subject: [PATCH 004/179] Added unit tests for CSR matrix using Segments.

---
 src/UnitTests/Matrices/CMakeLists.txt         |  16 ++
 .../SparseMatrixTest_CSR_segments.cpp         |   1 +
 .../Matrices/SparseMatrixTest_CSR_segments.cu |   1 +
 .../Matrices/SparseMatrixTest_CSR_segments.h  | 141 ++++++++++++++++++
 4 files changed, 159 insertions(+)
 create mode 100644 src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.cpp
 create mode 100644 src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.cu
 create mode 100644 src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h

diff --git a/src/UnitTests/Matrices/CMakeLists.txt b/src/UnitTests/Matrices/CMakeLists.txt
index 2a08be219..f278934a6 100644
--- a/src/UnitTests/Matrices/CMakeLists.txt
+++ b/src/UnitTests/Matrices/CMakeLists.txt
@@ -25,6 +25,12 @@ IF( BUILD_CUDA )
 
    CUDA_ADD_EXECUTABLE( DenseMatrixTest DenseMatrixTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
    TARGET_LINK_LIBRARIES( DenseMatrixTest ${GTEST_BOTH_LIBRARIES} )
+
+   ####
+   # Segments tests
+   CUDA_ADD_EXECUTABLE( SparseMatrixTest_CSR_segments SparseMatrixTest_CSR_segments.cu OPTIONS ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( SparseMatrixTest_CSR_segments ${GTEST_BOTH_LIBRARIES} )
+
 ELSE(  BUILD_CUDA )
    ADD_EXECUTABLE( SparseMatrixCopyTest SparseMatrixCopyTest.cpp )
    TARGET_COMPILE_OPTIONS( SparseMatrixCopyTest PRIVATE ${CXX_TESTS_FLAGS} )
@@ -61,6 +67,13 @@ ELSE(  BUILD_CUDA )
    ADD_EXECUTABLE( DenseMatrixTest DenseMatrixTest.cpp )
    TARGET_COMPILE_OPTIONS( DenseMatrixTest PRIVATE ${CXX_TESTS_FLAGS} )
    TARGET_LINK_LIBRARIES( DenseMatrixTest ${GTEST_BOTH_LIBRARIES} )
+
+   ####
+   # Segments tests
+   ADD_EXECUTABLE( SparseMatrixTest_CSR_segments SparseMatrixTest_CSR_segments.cpp )
+   TARGET_COMPILE_OPTIONS( SparseMatrixTest_CSR_segments PRIVATE ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( SparseMatrixTest_CSR_segments ${GTEST_BOTH_LIBRARIES} )
+
 ENDIF( BUILD_CUDA )
 
 
@@ -76,6 +89,9 @@ ADD_TEST( SparseMatrixTest_SlicedEllpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixT
 # TODO: DenseMatrixTest is not finished
 #ADD_TEST( DenseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/DenseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} )
 
+####
+# Segments tests
+ADD_TEST( SparseMatrixTest_CSR_segments ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_CSR_segments${CMAKE_EXECUTABLE_SUFFIX} )
 
 if( ${BUILD_MPI} )
    if( BUILD_CUDA )
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.cpp b/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.cpp
new file mode 100644
index 000000000..771c74b9a
--- /dev/null
+++ b/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.cpp
@@ -0,0 +1 @@
+#include "SparseMatrixTest_CSR_segments.h"
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.cu b/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.cu
new file mode 100644
index 000000000..771c74b9a
--- /dev/null
+++ b/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.cu
@@ -0,0 +1 @@
+#include "SparseMatrixTest_CSR_segments.h"
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h
new file mode 100644
index 000000000..00654de3c
--- /dev/null
+++ b/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h
@@ -0,0 +1,141 @@
+/***************************************************************************
+                          SparseMatrixTest_CSR.h -  description
+                             -------------------
+    begin                : Nov 2, 2018
+    copyright            : (C) 2018 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include <TNL/Containers/Segments/CSR.h>
+#include <TNL/Matrices/SparseMatrix.h>
+
+
+#include "SparseMatrixTest.hpp"
+#include <iostream>
+
+#ifdef HAVE_GTEST
+#include <gtest/gtest.h>
+
+// test fixture for typed tests
+template< typename Matrix >
+class CSRMatrixTest : public ::testing::Test
+{
+protected:
+   using CSRMatrixType = Matrix;
+};
+
+// types for which MatrixTest is instantiated
+using CSRMatrixTypes = ::testing::Types
+<
+    TNL::Matrices::SparseMatrix< int,     TNL::Containers::Segments::CSR, TNL::Devices::Host, short >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Containers::Segments::CSR, TNL::Devices::Host, short >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Containers::Segments::CSR, TNL::Devices::Host, short >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Containers::Segments::CSR, TNL::Devices::Host, short >,
+    TNL::Matrices::SparseMatrix< int,     TNL::Containers::Segments::CSR, TNL::Devices::Host, int   >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Containers::Segments::CSR, TNL::Devices::Host, int   >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Containers::Segments::CSR, TNL::Devices::Host, int   >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Containers::Segments::CSR, TNL::Devices::Host, int   >,
+    TNL::Matrices::SparseMatrix< int,     TNL::Containers::Segments::CSR, TNL::Devices::Host, long  >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Containers::Segments::CSR, TNL::Devices::Host, long  >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Containers::Segments::CSR, TNL::Devices::Host, long  >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Containers::Segments::CSR, TNL::Devices::Host, long  >
+#ifdef HAVE_CUDA                                                                             
+   ,TNL::Matrices::SparseMatrix< int,     TNL::Containers::Segments::CSR, TNL::Devices::Cuda, short >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Containers::Segments::CSR, TNL::Devices::Cuda, short >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Containers::Segments::CSR, TNL::Devices::Cuda, short >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Containers::Segments::CSR, TNL::Devices::Cuda, short >,
+    TNL::Matrices::SparseMatrix< int,     TNL::Containers::Segments::CSR, TNL::Devices::Cuda, int   >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Containers::Segments::CSR, TNL::Devices::Cuda, int   >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Containers::Segments::CSR, TNL::Devices::Cuda, int   >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Containers::Segments::CSR, TNL::Devices::Cuda, int   >,
+    TNL::Matrices::SparseMatrix< int,     TNL::Containers::Segments::CSR, TNL::Devices::Cuda, long  >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Containers::Segments::CSR, TNL::Devices::Cuda, long  >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Containers::Segments::CSR, TNL::Devices::Cuda, long  >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Containers::Segments::CSR, TNL::Devices::Cuda, long  >
+#endif
+>;
+
+TYPED_TEST_SUITE( CSRMatrixTest, CSRMatrixTypes);
+
+TYPED_TEST( CSRMatrixTest, setDimensionsTest )
+{
+    using CSRMatrixType = typename TestFixture::CSRMatrixType;
+
+    test_SetDimensions< CSRMatrixType >();
+}
+
+//TYPED_TEST( CSRMatrixTest, setCompressedRowLengthsTest )
+//{
+////    using CSRMatrixType = typename TestFixture::CSRMatrixType;
+//
+////    test_SetCompressedRowLengths< CSRMatrixType >();
+//
+//    bool testRan = false;
+//    EXPECT_TRUE( testRan );
+//    std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n";
+//    std::cout << "      This test is dependent on the input format. \n";
+//    std::cout << "      Almost every format allocates elements per row differently.\n\n";
+//    std::cout << "\n    TODO: Finish implementation of getNonZeroRowLength (Only non-zero elements, not the number of allocated elements.)\n\n";
+//}
+
+TYPED_TEST( CSRMatrixTest, setLikeTest )
+{
+    using CSRMatrixType = typename TestFixture::CSRMatrixType;
+
+    test_SetLike< CSRMatrixType, CSRMatrixType >();
+}
+
+TYPED_TEST( CSRMatrixTest, resetTest )
+{
+    using CSRMatrixType = typename TestFixture::CSRMatrixType;
+
+    test_Reset< CSRMatrixType >();
+}
+
+TYPED_TEST( CSRMatrixTest, setElementTest )
+{
+    using CSRMatrixType = typename TestFixture::CSRMatrixType;
+
+    test_SetElement< CSRMatrixType >();
+}
+
+TYPED_TEST( CSRMatrixTest, addElementTest )
+{
+    using CSRMatrixType = typename TestFixture::CSRMatrixType;
+
+    test_AddElement< CSRMatrixType >();
+}
+
+TYPED_TEST( CSRMatrixTest, setRowTest )
+{
+    using CSRMatrixType = typename TestFixture::CSRMatrixType;
+
+    test_SetRow< CSRMatrixType >();
+}
+
+TYPED_TEST( CSRMatrixTest, vectorProductTest )
+{
+    using CSRMatrixType = typename TestFixture::CSRMatrixType;
+
+    test_VectorProduct< CSRMatrixType >();
+}
+
+TYPED_TEST( CSRMatrixTest, saveAndLoadTest )
+{
+    using CSRMatrixType = typename TestFixture::CSRMatrixType;
+
+    test_SaveAndLoad< CSRMatrixType >( "test_SparseMatrixTest_CSR" );
+}
+
+TYPED_TEST( CSRMatrixTest, printTest )
+{
+    using CSRMatrixType = typename TestFixture::CSRMatrixType;
+
+    test_Print< CSRMatrixType >();
+}
+
+#endif
+
+#include "../main.h"
-- 
GitLab


From 3b986213231d04c1f16a2955b37545266a8073a3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Mon, 2 Dec 2019 17:59:26 +0100
Subject: [PATCH 005/179] Implementing SparseMatrix using Segments.

---
 src/TNL/Containers/Segments/CSR.h |  25 +-
 src/TNL/Matrices/SparseMatrix.h   |  74 ++-
 src/TNL/Matrices/SparseMatrix.hpp | 744 +++++++++++++++++++++++-------
 3 files changed, 632 insertions(+), 211 deletions(-)

diff --git a/src/TNL/Containers/Segments/CSR.h b/src/TNL/Containers/Segments/CSR.h
index 3aa53e76c..f86def78e 100644
--- a/src/TNL/Containers/Segments/CSR.h
+++ b/src/TNL/Containers/Segments/CSR.h
@@ -19,7 +19,7 @@ namespace TNL {
 
 template< typename Device,
           typename Index >
-class Segments
+class CSR
 {
    public:
 
@@ -29,29 +29,24 @@ class Segments
 
       CSR();
 
-      CSR( const SizesHolder& sizes );
+      CSR( const Vector< IndexType, DeviceType, IndexType >& sizes );
 
-      CSR( const CSR& csr );
+      CSR( const CSR& segments );
 
-      CSR( const CSR&& csr );
-
-      /**
-       * \brief Set number of segments
-       */
-      //void setSegmentsCount();
+      CSR( const CSR&& segments );
 
       /**
        * \brief Set sizes of particular segmenets.
        */
       template< typename SizesHolder = OffsetsHolder >
-      void setSizes( const SizesHolder& sizes )
+      void setSizes( const SizesHolder& sizes );
 
       /**
        * \brief Number segments.
        */
-      Index getSize() const;
+      IndexType getSize() const;
 
-      Index getStorageSize() const;
+      IndexType getStorageSize() const;
 
       IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const;
 
@@ -62,13 +57,13 @@ class Segments
        * function 'f' with arguments 'args'
        */
       template< typename Function, typename... Args >
-      void forAll( Function& f, Args args ) const;
+      void forAll( Function& f, Args... args ) const;
 
       /***
        * \brief Go over all segments and perform a reduction in each of them.
        */
       template< typename Fetch, typename Reduction, typename ResultKeeper, typename... Args >
-      void segmentsReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, Args args );
+      void segmentsReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, Args... args );
 
    protected:
 
@@ -80,4 +75,4 @@ class Segments
    }  // namespace Conatiners
 } // namespace TNL
 
-#include <TNL/Containers/Segments/CSR.h>
\ No newline at end of file
+#include <TNL/Containers/Segments/CSR.h>
diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h
index acca39bf4..e266dc66d 100644
--- a/src/TNL/Matrices/SparseMatrix.h
+++ b/src/TNL/Matrices/SparseMatrix.h
@@ -10,19 +10,46 @@
 
 #pragma once
 
+#include <TNL/Matrices/Matrix.h>
+#include <TNL/Allocators/Default.h>
+
 namespace TNL {
 namespace Matrices {
 
 template< typename Real,
-          typename Organization >
-class SparseMatrix : public Matrix< Real, typename Organization::Device, typename Organization::Index >
+          template< typename, typename > class Segments,
+          typename Device = Devices::Host,
+          typename Index = int,
+          typename RealAllocator = typename Allocators::Default< Device >::template Allocator< Real >,
+          typename IndexAllocator = typename Allocators::Default< Device >::template Allocator< Index > >
+class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
 {
    public:
 
       using RealType = Real;
-      using OrganizationType = Organization;
-      using DeviceType = typename Organization::DeviceType;
-      using IndexType = typename Organization::IndexType;
+      template< typename Device_, typename Index_ >
+      using SegmentsTemplate = Segments< Device_, Index_ >;
+      using SegmentsType = Segments< Device, Index >;
+      using DeviceType = Device;
+      using IndexType = Index;
+      using RealAllocatorType = RealAllocator;
+      using IndexAllocatorType = IndexAllocator;
+      using CompressedRowLengthsVectorView = Containers::VectorView< IndexType, DeviceType, IndexType >;
+      using ConstCompressedRowLengthsVectorView = typename CompressedRowLengthsVectorView::ConstViewType;
+      using ValuesVectorType = typename Matrix< Real, Device, Index, RealAllocator >::ValuesVector;
+      using ColumnsVectorType = Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocatorType >;
+
+      SparseMatrix( const RealAllocatorType& realAllocator = RealAllocatorType(),
+                    const IndexAllocatorType& indexAllocator = IndexAllocatorType() );
+
+      SparseMatrix( const SparseMatrix& m );
+
+      SparseMatrix( const SparseMatrix&& m );
+
+      SparseMatrix( const IndexType rows,
+                    const IndexType columns,
+                    const RealAllocatorType& realAllocator = RealAllocatorType(),
+                    const IndexAllocatorType& indexAllocator = IndexAllocatorType() );
 
       static String getSerializationType();
 
@@ -43,8 +70,10 @@ class SparseMatrix : public Matrix< Real, typename Organization::Device, typenam
       __cuda_callable__
       IndexType getNonZeroRowLengthFast( const IndexType row ) const;
 
-      template< typename Real2, typename Device2, typename Index2 >
-      void setLike( const CSR< Real2, Device2, Index2 >& matrix );
+      template< typename Real2, template< typename, typename > class Segments2, typename Device2, typename Index2, typename RealAllocator2, typename IndexAllocator2 >
+      void setLike( const SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2, IndexAllocator2 >& matrix );
+
+      IndexType getNumberOfNonzeroMatrixElements() const;
 
       void reset();
 
@@ -106,11 +135,11 @@ class SparseMatrix : public Matrix< Real, typename Organization::Device, typenam
                        IndexType* columns,
                        RealType* values ) const;
 
-      __cuda_callable__
+      /*__cuda_callable__
       MatrixRow getRow( const IndexType rowIndex );
 
       __cuda_callable__
-      ConstMatrixRow getRow( const IndexType rowIndex ) const;
+      ConstMatrixRow getRow( const IndexType rowIndex ) const;*/
 
       template< typename Vector >
       __cuda_callable__
@@ -123,14 +152,15 @@ class SparseMatrix : public Matrix< Real, typename Organization::Device, typenam
                           OutVector& outVector ) const;
       // TODO: add const RealType& multiplicator = 1.0 )
 
-      template< typename Real2, typename Index2 >
-      void addMatrix( const CSR< Real2, Device, Index2 >& matrix,
+      /*template< typename Real2, typename Index2 >
+      void addMatrix( const SparseMatrix< Real2, Segments, Device, Index2 >& matrix,
                       const RealType& matrixMultiplicator = 1.0,
                       const RealType& thisMatrixMultiplicator = 1.0 );
 
       template< typename Real2, typename Index2 >
-      void getTransposition( const CSR< Real2, Device, Index2 >& matrix,
+      void getTransposition( const SparseMatrix< Real2, Segments, Device, Index2 >& matrix,
                              const RealType& matrixMultiplicator = 1.0 );
+       */
 
       template< typename Vector1, typename Vector2 >
       bool performSORIteration( const Vector1& b,
@@ -139,12 +169,16 @@ class SparseMatrix : public Matrix< Real, typename Organization::Device, typenam
                                 const RealType& omega = 1.0 ) const;
 
       // copy assignment
-      CSR& operator=( const CSR& matrix );
+      SparseMatrix& operator=( const SparseMatrix& matrix );
 
       // cross-device copy assignment
-      template< typename Real2, typename Device2, typename Index2,
-                typename = typename Enabler< Device2 >::type >
-      CSR& operator=( const CSR< Real2, Device2, Index2 >& matrix );
+      template< typename Real2, 
+                template< typename, typename > class Segments2,
+                typename Device2,
+                typename Index2,
+                typename RealAllocator2,
+                typename IndexAllocator2 >
+      SparseMatrix& operator=( const SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2, IndexAllocator2 >& matrix );
 
       void save( File& file ) const;
 
@@ -155,9 +189,13 @@ class SparseMatrix : public Matrix< Real, typename Organization::Device, typenam
       void load( const String& fileName );
 
       void print( std::ostream& str ) const;
+      
+   protected:
 
-
+      ColumnsVectorType columnsVector;
 };
 
 }  // namespace Conatiners
-} // namespace TNL
\ No newline at end of file
+} // namespace TNL
+
+#include <TNL/Matrices/SparseMatrix.hpp>
diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp
index 2d11bb21e..abfc1619d 100644
--- a/src/TNL/Matrices/SparseMatrix.hpp
+++ b/src/TNL/Matrices/SparseMatrix.hpp
@@ -10,211 +10,599 @@
 
 #pragma once
 
+#include <TNL/Matrices/SparseMatrix.h>
+
 namespace TNL {
 namespace Matrices {
 
-template< typename Real,
-          typename Organization >
-static String getSerializationType();
-
-template< typename Real,
-          typename Organization >
-String getSerializationTypeVirtual() const;
-
-template< typename Real,
-          typename Organization >
-void setDimensions( const IndexType rows,
-                 const IndexType columns );
-
-template< typename Real,
-          typename Organization >
-void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths );
-
-template< typename Real,
-          typename Organization >
-IndexType getRowLength( const IndexType row ) const;
-
-template< typename Real,
-          typename Organization >
+   template< typename Real,
+          template< typename, typename > class Segments,
+          typename Device,
+          typename Index,
+          typename RealAllocator,
+          typename IndexAllocator >
+SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+SparseMatrix( const RealAllocatorType& realAllocator,
+              const IndexAllocatorType& indexAllocator )
+   : Matrix< Real, Device, Index, RealAllocator >( realAllocator ), columnsVector( indexAllocator )
+{
+}
+
+template< typename Real,
+          template< typename, typename > class Segments,
+          typename Device,
+          typename Index,
+          typename RealAllocator,
+          typename IndexAllocator >
+SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+SparseMatrix( const SparseMatrix& m )
+   : Matrix< Real, Device, Index, RealAllocator >( m ), columnsVector( m.columnsVector )
+{
+}
+
+template< typename Real,
+          template< typename, typename > class Segments,
+          typename Device,
+          typename Index,
+          typename RealAllocator,
+          typename IndexAllocator >
+SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+SparseMatrix( const SparseMatrix&& m )
+   : Matrix< Real, Device, Index, RealAllocator >( std::move( m ) ), columnsVector( std::move( m.columnsVector ) )
+{
+}
+
+template< typename Real,
+          template< typename, typename > class Segments,
+          typename Device,
+          typename Index,
+          typename RealAllocator,
+          typename IndexAllocator >
+SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+SparseMatrix( const IndexType rows,
+              const IndexType columns,
+              const RealAllocatorType& realAllocator,
+              const IndexAllocatorType& indexAllocator )
+: Matrix< Real, Device, Index, RealAllocator >( rows, columns, realAllocator ), columnsVector( indexAllocator )
+{  
+}
+   
+template< typename Real,
+          template< typename, typename > class Segments,
+          typename Device,
+          typename Index,
+          typename RealAllocator,
+          typename IndexAllocator >
+String
+SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+getSerializationType()
+{
+   return String( "Matrices::SparseMatrix< " ) +
+             TNL::getSerializationType< RealType >() + ", " +
+             TNL::getSerializationType< SegmentsType >() + ", [any_device], " +
+             TNL::getSerializationType< IndexType >() + ", [any_allocator] >";
+}
+
+template< typename Real,
+          template< typename, typename > class Segments,
+          typename Device,
+          typename Index,
+          typename RealAllocator,
+          typename IndexAllocator >
+String
+SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+getSerializationTypeVirtual() const
+{
+   return this->getSerializationType();
+}
+
+template< typename Real,
+          template< typename, typename > class Segments,
+          typename Device,
+          typename Index,
+          typename RealAllocator,
+          typename IndexAllocator >
+void
+SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+setDimensions( const IndexType rows,
+               const IndexType columns )
+{
+   
+}
+
+template< typename Real,
+          template< typename, typename > class Segments,
+          typename Device,
+          typename Index,
+          typename RealAllocator,
+          typename IndexAllocator >
+void
+SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths )
+{
+   
+}
+
+template< typename Real,
+          template< typename, typename > class Segments,
+          typename Device,
+          typename Index,
+          typename RealAllocator,
+          typename IndexAllocator >
+Index
+SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+getRowLength( const IndexType row ) const
+{
+   
+}
+
+template< typename Real,
+          template< typename, typename > class Segments,
+          typename Device,
+          typename Index,
+          typename RealAllocator,
+          typename IndexAllocator >
 __cuda_callable__
-IndexType getRowLengthFast( const IndexType row ) const;
-
-template< typename Real,
-          typename Organization >
-IndexType getNonZeroRowLength( const IndexType row ) const;
-
-template< typename Real,
-          typename Organization >
+Index
+SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+getRowLengthFast( const IndexType row ) const
+{
+   
+}
+
+template< typename Real,
+          template< typename, typename > class Segments,
+          typename Device,
+          typename Index,
+          typename RealAllocator,
+          typename IndexAllocator >
+Index
+SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+getNonZeroRowLength( const IndexType row ) const
+{
+   
+}
+
+template< typename Real,
+          template< typename, typename > class Segments,
+          typename Device,
+          typename Index,
+          typename RealAllocator,
+          typename IndexAllocator >
 __cuda_callable__
-IndexType getNonZeroRowLengthFast( const IndexType row ) const;
-
-template< typename Real,
-          typename Organization >
-template< typename Real2, typename Device2, typename Index2 >
-void setLike( const CSR< Real2, Device2, Index2 >& matrix );
-
-template< typename Real,
-          typename Organization >
-void reset();
-
-template< typename Real,
-          typename Organization >
+Index
+SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+getNonZeroRowLengthFast( const IndexType row ) const
+{
+   
+}
+
+template< typename Real,
+          template< typename, typename > class Segments,
+          typename Device,
+          typename Index,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename Real2, template< typename, typename > class Segments2,  typename Device2, typename Index2, typename RealAllocator2, typename IndexAllocator2 >
+void
+SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+setLike( const SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2, IndexAllocator2 >& matrix )
+{
+   
+}
+
+template< typename Real,
+          template< typename, typename > class Segments,
+          typename Device,
+          typename Index,
+          typename RealAllocator,
+          typename IndexAllocator >
+Index
+SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+getNumberOfNonzeroMatrixElements() const
+{
+}
+
+template< typename Real,
+          template< typename, typename > class Segments,
+          typename Device,
+          typename Index,
+          typename RealAllocator,
+          typename IndexAllocator >
+void
+SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+reset()
+{
+   
+}
+
+template< typename Real,
+          template< typename, typename > class Segments,
+          typename Device,
+          typename Index,
+          typename RealAllocator,
+          typename IndexAllocator >
 __cuda_callable__
-bool setElementFast( const IndexType row,
-                  const IndexType column,
-                  const RealType& value );
-
-template< typename Real,
-          typename Organization >
-bool setElement( const IndexType row,
-              const IndexType column,
-              const RealType& value );
-
-template< typename Real,
-          typename Organization >
+bool
+SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+setElementFast( const IndexType row,
+                const IndexType column,
+                const RealType& value )
+{
+   
+}
+
+template< typename Real,
+          template< typename, typename > class Segments,
+          typename Device,
+          typename Index,
+          typename RealAllocator,
+          typename IndexAllocator >
+bool
+SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+setElement( const IndexType row,
+            const IndexType column,
+            const RealType& value )
+{
+   
+}
+
+template< typename Real,
+          template< typename, typename > class Segments,
+          typename Device,
+          typename Index,
+          typename RealAllocator,
+          typename IndexAllocator >
 __cuda_callable__
-bool addElementFast( const IndexType row,
-                  const IndexType column,
-                  const RealType& value,
-                  const RealType& thisElementMultiplicator = 1.0 );
-
-template< typename Real,
-          typename Organization >
-bool addElement( const IndexType row,
-              const IndexType column,
-              const RealType& value,
-              const RealType& thisElementMultiplicator = 1.0 );
-
-
-template< typename Real,
-          typename Organization >
+bool
+SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+addElementFast( const IndexType row,
+                const IndexType column,
+                const RealType& value,
+                const RealType& thisElementMultiplicator )
+{
+   
+}
+
+template< typename Real,
+          template< typename, typename > class Segments,
+          typename Device,
+          typename Index,
+          typename RealAllocator,
+          typename IndexAllocator >
+bool
+SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+addElement( const IndexType row,
+            const IndexType column,
+            const RealType& value,
+            const RealType& thisElementMultiplicator )
+{
+   
+}
+
+
+template< typename Real,
+          template< typename, typename > class Segments,
+          typename Device,
+          typename Index,
+          typename RealAllocator,
+          typename IndexAllocator >
 __cuda_callable__
-bool setRowFast( const IndexType row,
-              const IndexType* columnIndexes,
-              const RealType* values,
-              const IndexType elements );
-
-template< typename Real,
-          typename Organization >
-bool setRow( const IndexType row,
-          const IndexType* columnIndexes,
-          const RealType* values,
-          const IndexType elements );
-
-
-template< typename Real,
-          typename Organization >
+bool
+SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+setRowFast( const IndexType row,
+            const IndexType* columnIndexes,
+            const RealType* values,
+            const IndexType elements )
+{
+   
+}
+
+template< typename Real,
+          template< typename, typename > class Segments,
+          typename Device,
+          typename Index,
+          typename RealAllocator,
+          typename IndexAllocator >
+bool
+SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+setRow( const IndexType row,
+        const IndexType* columnIndexes,
+        const RealType* values,
+        const IndexType elements )
+{
+   
+}
+
+
+template< typename Real,
+          template< typename, typename > class Segments,
+          typename Device,
+          typename Index,
+          typename RealAllocator,
+          typename IndexAllocator >
 __cuda_callable__
-bool addRowFast( const IndexType row,
-              const IndexType* columns,
-              const RealType* values,
-              const IndexType numberOfElements,
-              const RealType& thisElementMultiplicator = 1.0 );
-
-template< typename Real,
-          typename Organization >
-bool addRow( const IndexType row,
-          const IndexType* columns,
-          const RealType* values,
-          const IndexType numberOfElements,
-          const RealType& thisElementMultiplicator = 1.0 );
-
-
-template< typename Real,
-          typename Organization >
+bool
+SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+addRowFast( const IndexType row,
+            const IndexType* columns,
+            const RealType* values,
+            const IndexType numberOfElements,
+            const RealType& thisElementMultiplicator )
+{
+   
+}
+
+template< typename Real,
+          template< typename, typename > class Segments,
+          typename Device,
+          typename Index,
+          typename RealAllocator,
+          typename IndexAllocator >
+bool
+SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+addRow( const IndexType row,
+        const IndexType* columns,
+        const RealType* values,
+        const IndexType numberOfElements,
+        const RealType& thisElementMultiplicator )
+{
+   
+}
+
+
+template< typename Real,
+          template< typename, typename > class Segments,
+          typename Device,
+          typename Index,
+          typename RealAllocator,
+          typename IndexAllocator >
 __cuda_callable__
-RealType getElementFast( const IndexType row,
-                      const IndexType column ) const;
-
-template< typename Real,
-          typename Organization >
-RealType getElement( const IndexType row,
-                  const IndexType column ) const;
+Real
+SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+getElementFast( const IndexType row,
+                const IndexType column ) const
+{
+   
+}
+
+template< typename Real,
+          template< typename, typename > class Segments,
+          typename Device,
+          typename Index,
+          typename RealAllocator,
+          typename IndexAllocator >
+Real
+SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+getElement( const IndexType row,
+            const IndexType column ) const
+{
+   
+}
 
 __cuda_callable__
 template< typename Real,
-          typename Organization >
-void getRowFast( const IndexType row,
-              IndexType* columns,
-              RealType* values ) const;
-
-template< typename Real,
-          typename Organization >
+          template< typename, typename > class Segments,
+          typename Device,
+          typename Index,
+          typename RealAllocator,
+          typename IndexAllocator >
+void
+SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+getRowFast( const IndexType row,
+            IndexType* columns,
+            RealType* values ) const
+{
+   
+}
+
+/*template< typename Real,
+          template< typename, typename > class Segments,
+          typename Device,
+          typename Index,
+          typename RealAllocator,
+          typename IndexAllocator >
 __cuda_callable__
-MatrixRow getRow( const IndexType rowIndex );
-
-template< typename Real,
-          typename Organization >
+MatrixRow 
+SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+getRow( const IndexType rowIndex )
+{
+   
+}
+
+template< typename Real,
+          template< typename, typename > class Segments,
+          typename Device,
+          typename Index,
+          typename RealAllocator,
+          typename IndexAllocator >
 __cuda_callable__
-ConstMatrixRow getRow( const IndexType rowIndex ) const;
-
-template< typename Real,
-          typename Organization >
-template< typename Vector >
+ConstMatrixRow
+SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+getRow( const IndexType rowIndex ) const
+{
+   
+}*/
+
+template< typename Real,
+          template< typename, typename > class Segments,
+          typename Device,
+          typename Index,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename Vector >
 __cuda_callable__
-typename Vector::RealType rowVectorProduct( const IndexType row,
-                                         const Vector& vector ) const;
-
-template< typename Real,
-          typename Organization >
+typename Vector::RealType
+SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+rowVectorProduct( const IndexType row,
+                  const Vector& vector ) const
+{
+   
+}
+
+template< typename Real,
+          template< typename, typename > class Segments,
+          typename Device,
+          typename Index,
+          typename RealAllocator,
+          typename IndexAllocator >
 template< typename InVector,
        typename OutVector >
-void vectorProduct( const InVector& inVector,
-                 OutVector& outVector ) const;
+void
+SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+vectorProduct( const InVector& inVector,
+               OutVector& outVector ) const
 // TODO: add const RealType& multiplicator = 1.0 )
-
-template< typename Real,
-          typename Organization >
-template< typename Real2, typename Index2 >
-void addMatrix( const CSR< Real2, Device, Index2 >& matrix,
-             const RealType& matrixMultiplicator = 1.0,
-             const RealType& thisMatrixMultiplicator = 1.0 );
-
-template< typename Real,
-          typename Organization >
+{
+   
+}
+
+/*template< typename Real,
+          template< typename, typename > class Segments,
+          typename Device,
+          typename Index,
+          typename RealAllocator,
+          typename IndexAllocator >
+template< typename Real2, template< typename, typename > class Segments2, typename Index2, typename RealAllocator2, typename IndexAllocator2 >
+void
+SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+addMatrix( const SparseMatrix< Real2, Segments2, Device, Index2, RealAllocator2, IndexAllocator2 >& matrix,
+           const RealType& matrixMultiplicator,
+           const RealType& thisMatrixMultiplicator )
+{
+   
+}
+
+template< typename Real,
+          template< typename, typename > class Segments,
+          typename Device,
+          typename Index,
+          typename RealAllocator,
+          typename IndexAllocator >
 template< typename Real2, typename Index2 >
-void getTransposition( const CSR< Real2, Device, Index2 >& matrix,
-                    const RealType& matrixMultiplicator = 1.0 );
-
-template< typename Real,
-          typename Organization >
+void
+SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+getTransposition( const SparseMatrix< Real2, Device, Index2 >& matrix,
+                  const RealType& matrixMultiplicator )
+{
+   
+}*/
+
+template< typename Real,
+          template< typename, typename > class Segments,
+          typename Device,
+          typename Index,
+          typename RealAllocator,
+          typename IndexAllocator >
 template< typename Vector1, typename Vector2 >
-bool performSORIteration( const Vector1& b,
-                       const IndexType row,
-                       Vector2& x,
-                       const RealType& omega = 1.0 ) const;
+bool
+SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+performSORIteration( const Vector1& b,
+                     const IndexType row,
+                     Vector2& x,
+                     const RealType& omega ) const
+{
+   
+}
 
 // copy assignment
 template< typename Real,
-          typename Organization >
-CSR& operator=( const CSR& matrix );
+          template< typename, typename > class Segments,
+          typename Device,
+          typename Index,
+          typename RealAllocator,
+          typename IndexAllocator >
+SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >&
+SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+operator=( const SparseMatrix& matrix )
+{
+   
+}
 
 // cross-device copy assignment
 template< typename Real,
-          typename Organization >
-template< typename Real2, typename Device2, typename Index2,
-       typename = typename Enabler< Device2 >::type >
-CSR& operator=( const CSR< Real2, Device2, Index2 >& matrix );
-
-template< typename Real,
-          typename Organization >
-void save( File& file ) const;
-
-template< typename Real,
-          typename Organization >
-void load( File& file );
-
-template< typename Real,
-          typename Organization >
-void save( const String& fileName ) const;
-
-template< typename Real,
-          typename Organization >
-void load( const String& fileName );
-
-template< typename Real,
-          typename Organization >
-void print( std::ostream& str ) const;
+          template< typename, typename > class Segments,
+          typename Device,
+          typename Index,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename Real2,
+             template< typename, typename > class Segments2,
+             typename Device2,
+             typename Index2,
+             typename RealAllocator2,
+             typename IndexAllocator2 >
+SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >&
+SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+operator=( const SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2, IndexAllocator2 >& matrix )
+{
+   
+}
+
+template< typename Real,
+          template< typename, typename > class Segments,
+          typename Device,
+          typename Index,
+          typename RealAllocator,
+          typename IndexAllocator >
+void
+SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+save( File& file ) const
+{
+   
+}
+
+template< typename Real,
+          template< typename, typename > class Segments,
+          typename Device,
+          typename Index,
+          typename RealAllocator,
+          typename IndexAllocator >
+void
+SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+load( File& file )
+{
+   
+}
+
+template< typename Real,
+          template< typename, typename > class Segments,
+          typename Device,
+          typename Index,
+          typename RealAllocator,
+          typename IndexAllocator >
+void
+SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+save( const String& fileName ) const
+{
+   
+}
+
+template< typename Real,
+          template< typename, typename > class Segments,
+          typename Device,
+          typename Index,
+          typename RealAllocator,
+          typename IndexAllocator >
+void
+SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+load( const String& fileName )
+{
+   
+}
+
+template< typename Real,
+          template< typename, typename > class Segments,
+          typename Device,
+          typename Index,
+          typename RealAllocator,
+          typename IndexAllocator >
+void
+SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+print( std::ostream& str ) const
+{
+   
+}
 
 
    } //namespace Matrices
-- 
GitLab


From b85f28d3dd64e728dd9d4c66b4fc416fa0478b3d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Mon, 2 Dec 2019 22:23:34 +0100
Subject: [PATCH 006/179] Implementing SparseMatrix.

---
 src/TNL/Containers/Segments/CSR.h             |  25 +++-
 src/TNL/Containers/Segments/CSR.hpp           | 123 ++++++++++++++----
 src/TNL/Matrices/Matrix_impl.h                |   1 +
 src/TNL/Matrices/SparseMatrix.h               |  16 ++-
 src/TNL/Matrices/SparseMatrix.hpp             | 118 +++++++++++++----
 src/TNL/Matrices/Sparse_impl.h                |   1 -
 .../Matrices/SparseMatrixTest_CSR_segments.h  |   4 +-
 7 files changed, 227 insertions(+), 61 deletions(-)

diff --git a/src/TNL/Containers/Segments/CSR.h b/src/TNL/Containers/Segments/CSR.h
index f86def78e..2f194c76d 100644
--- a/src/TNL/Containers/Segments/CSR.h
+++ b/src/TNL/Containers/Segments/CSR.h
@@ -44,26 +44,43 @@ class CSR
       /**
        * \brief Number segments.
        */
+      __cuda_callable__
       IndexType getSize() const;
 
+      __cuda_callable__
+      IndexType getSegmentSize( const IndexType segmentIdx ) const;
+
+      __cuda_callable__
       IndexType getStorageSize() const;
 
+      __cuda_callable__
       IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const;
 
+      __cuda_callable__
       void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const;
 
       /***
        * \brief Go over all segments and for each segment element call
-       * function 'f' with arguments 'args'
+       * function 'f' with arguments 'args'. The return type of 'f' is bool.
+       * When its true, the for-loop continues. Once 'f' returns false, the for-loop
+       * is terminated.
        */
+      template< typename Function, typename... Args >
+      void forSegments( IndexType first, IndexType last, Function& f, Args... args ) const;
+
       template< typename Function, typename... Args >
       void forAll( Function& f, Args... args ) const;
 
+
       /***
        * \brief Go over all segments and perform a reduction in each of them.
        */
-      template< typename Fetch, typename Reduction, typename ResultKeeper, typename... Args >
-      void segmentsReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, Args... args );
+      template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
+      void segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args );
+
+      template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
+      void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args );
+
 
    protected:
 
@@ -75,4 +92,4 @@ class CSR
    }  // namespace Conatiners
 } // namespace TNL
 
-#include <TNL/Containers/Segments/CSR.h>
+#include <TNL/Containers/Segments/CSR.hpp>
diff --git a/src/TNL/Containers/Segments/CSR.hpp b/src/TNL/Containers/Segments/CSR.hpp
index ea45b40ba..84a0fcb34 100644
--- a/src/TNL/Containers/Segments/CSR.hpp
+++ b/src/TNL/Containers/Segments/CSR.hpp
@@ -11,7 +11,7 @@
 #pragma once
 
 #include <TNL/Containers/Vector.h>
-#include <TNL/Algorithms/ParalleFor.h>
+#include <TNL/Algorithms/ParallelFor.h>
 #include <TNL/Containers/Segments/CSR.h>
 
 namespace TNL {
@@ -43,68 +43,147 @@ CSR( const CSR&& csr ) : offsets( std::move( csr.offsets ) )
 
 template< typename Device,
           typename Index >
+   template< typename SizesHolder >
+void
 CSR< Device, Index >::
-void setSegmentsCount( const IndexType& size )
+setSizes( const SizesHolder& sizes )
 {
-   this->offsets.setSize( size + 1 );
+   this->offsets.setSize( sizes.getSize() + 1 );
+   auto view = this->offsets.getView( 0, sizes.getSize() );
+   view = sizes;
+   this->offsets.setElement( sizes.getSize(), 0 );
+   this->offsets.template scan< Algorithms::ScanType::Exclusive >();
 }
 
 template< typename Device,
           typename Index >
-   template< typename SizesHolder = OffsetsHolder >
+__cuda_callable__
+Index
 CSR< Device, Index >::
-void setSizes( const SizesHolder& sizes )
+getSize() const
 {
-   this->offsets.setSize( sizes.getSize() + 1 );
-   auto view = this->offsets.getView( 0, sizes.getSize() );
-   view = sizes;
-   this->offsets.setElement( sizes.getSize>(), 0 );
-   this->offsets.scan< Algorithms::ScanType::Exclusive >();
+   return this->offsets.getSize() - 1;
 }
 
 template< typename Device,
           typename Index >
+__cuda_callable__
+Index
 CSR< Device, Index >::
-Index getSize() const
+getSegmentSize( const IndexType segmentIdx ) const
+{
+   if( ! std::is_same< DeviceType, Devices::Host >::value )
+   {
+#ifdef __CUDA_ARCH__
+      return offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ];
+#else
+      return offsets.getElement( segmentIdx + 1 ) - offsets.getElement( segmentIdx );
+#endif
+   }
+   return offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ];
+}
+
+template< typename Device,
+          typename Index >
+__cuda_callable__
+Index
+CSR< Device, Index >::
+getStorageSize() const
+{
+   if( ! std::is_same< DeviceType, Devices::Host >::value )
+   {
+#ifdef __CUDA_ARCH__
+      return offsets[ this->getSize() ];
+#else
+      return offsets.getElement( this->getSize() );
+#endif
+   }
+   return offsets[ this->getSize() ];
+}
+
+template< typename Device,
+          typename Index >
+__cuda_callable__
+Index
+CSR< Device, Index >::
+getGlobalIndex( const Index segmentIdx, const Index localIdx ) const
+{
+   if( ! std::is_same< DeviceType, Devices::Host >::value )
+   {
+#ifdef __CUDA_ARCH__
+      return offsets[ segmentIdx ] + localIdx;
+#else
+      return offsets.getElement( segmentIdx ) + localIdx;
+#endif
+   }
+   return offsets[ segmentIdx ] + localIdx;
+}
+
+template< typename Device,
+          typename Index >
+__cuda_callable__
+void
+CSR< Device, Index >::
+getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const
 {
-   return this->offsets.getSize() - 1;
 }
 
 template< typename Device,
           typename Index >
    template< typename Function, typename... Args >
+void
 CSR< Device, Index >::
-void forAll( Function& f, Args args ) const
+forSegments( IndexType first, IndexType last, Function& f, Args... args ) const
 {
    const auto offsetsView = this->offsets.getView();
-   auto f = [=] __cuda_callable__ ( const IndexType i, f, args ) {
+   auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) {
       const IndexType begin = offsetsView[ i ];
       const IndexType end = offsetsView[ i + 1 ];
       for( IndexType j = begin; j < end; j++  )
-         f( i, j, args );
+         if( ! f( i, j, args... ) )
+            break;
    };
-   Algorithms::ParallelFor< Device >::exec( 0, this->getSize(), f );
+   Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
+}
+
+template< typename Device,
+          typename Index >
+   template< typename Function, typename... Args >
+void
+CSR< Device, Index >::
+forAll( Function& f, Args... args ) const
+{
+   this->forSegments( 0, this->getSize(), f, args... );
 }
 
 template< typename Device,
           typename Index >
    template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
+void
 CSR< Device, Index >::
-void segmentsReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, Real zero, Args args )
+segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args )
 {
    const auto offsetsView = this->offsets.getView();
-   auto f = [=] __cuda_callable__ ( const IndexType i, f, args ) {
+   auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) {
       const IndexType begin = offsetsView[ i ];
       const IndexType end = offsetsView[ i + 1 ];
       Real aux( zero );
       for( IndexType j = begin; j < end; j++  )
-         reduction( aux, fetch( i, j, args ) );
+         reduction( aux, fetch( i, j, args... ) );
       keeper( i, aux );
    };
-   Algorithms::ParallelFor< Device >::exec( 0, this->getSize(), f );
+   Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
 }
 
-
+template< typename Device,
+          typename Index >
+   template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
+void
+CSR< Device, Index >::
+allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args )
+{
+   this->segmentsReduction( 0, this->getSize(), fetch, reduction, keeper, zero, args... );
+}
       } // namespace Segements
    }  // namespace Conatiners
-} // namespace TNL
\ No newline at end of file
+} // namespace TNL
diff --git a/src/TNL/Matrices/Matrix_impl.h b/src/TNL/Matrices/Matrix_impl.h
index 599e5ad33..a93c7a893 100644
--- a/src/TNL/Matrices/Matrix_impl.h
+++ b/src/TNL/Matrices/Matrix_impl.h
@@ -163,6 +163,7 @@ void Matrix< Real, Device, Index, RealAllocator >::reset()
 {
    this->rows = 0;
    this->columns = 0;
+   this->values.reset();
 }
 
 template< typename Real,
diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h
index e266dc66d..7581ef090 100644
--- a/src/TNL/Matrices/SparseMatrix.h
+++ b/src/TNL/Matrices/SparseMatrix.h
@@ -55,9 +55,6 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
 
       virtual String getSerializationTypeVirtual() const;
 
-      void setDimensions( const IndexType rows,
-                          const IndexType columns );
-
       void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths );
 
       IndexType getRowLength( const IndexType row ) const;
@@ -85,6 +82,7 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
       bool setElement( const IndexType row,
                        const IndexType column,
                        const RealType& value );
+
       __cuda_callable__
       bool addElementFast( const IndexType row,
                            const IndexType column,
@@ -189,10 +187,18 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
       void load( const String& fileName );
 
       void print( std::ostream& str ) const;
-      
+
+      __cuda_callable__
+      IndexType getPaddingIndex() const;
    protected:
 
-      ColumnsVectorType columnsVector;
+      ColumnsVectorType columnIndexes;
+
+      SegmentsType segments;
+
+      IndexAllocator indexAlloctor;
+
+      RealAllocator realAllocator;
 };
 
 }  // namespace Conatiners
diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp
index abfc1619d..1ccb602ef 100644
--- a/src/TNL/Matrices/SparseMatrix.hpp
+++ b/src/TNL/Matrices/SparseMatrix.hpp
@@ -24,7 +24,7 @@ namespace Matrices {
 SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
 SparseMatrix( const RealAllocatorType& realAllocator,
               const IndexAllocatorType& indexAllocator )
-   : Matrix< Real, Device, Index, RealAllocator >( realAllocator ), columnsVector( indexAllocator )
+   : Matrix< Real, Device, Index, RealAllocator >( realAllocator ), columnIndexes( indexAllocator )
 {
 }
 
@@ -36,7 +36,7 @@ template< typename Real,
           typename IndexAllocator >
 SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
 SparseMatrix( const SparseMatrix& m )
-   : Matrix< Real, Device, Index, RealAllocator >( m ), columnsVector( m.columnsVector )
+   : Matrix< Real, Device, Index, RealAllocator >( m ), columnIndexes( m.columnIndexes )
 {
 }
 
@@ -48,7 +48,7 @@ template< typename Real,
           typename IndexAllocator >
 SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
 SparseMatrix( const SparseMatrix&& m )
-   : Matrix< Real, Device, Index, RealAllocator >( std::move( m ) ), columnsVector( std::move( m.columnsVector ) )
+   : Matrix< Real, Device, Index, RealAllocator >( std::move( m ) ), columnIndexes( std::move( m.columnIndexes ) )
 {
 }
 
@@ -63,10 +63,10 @@ SparseMatrix( const IndexType rows,
               const IndexType columns,
               const RealAllocatorType& realAllocator,
               const IndexAllocatorType& indexAllocator )
-: Matrix< Real, Device, Index, RealAllocator >( rows, columns, realAllocator ), columnsVector( indexAllocator )
+: Matrix< Real, Device, Index, RealAllocator >( rows, columns, realAllocator ), columnIndexes( indexAllocator )
 {  
 }
-   
+
 template< typename Real,
           template< typename, typename > class Segments,
           typename Device,
@@ -96,20 +96,6 @@ getSerializationTypeVirtual() const
    return this->getSerializationType();
 }
 
-template< typename Real,
-          template< typename, typename > class Segments,
-          typename Device,
-          typename Index,
-          typename RealAllocator,
-          typename IndexAllocator >
-void
-SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
-setDimensions( const IndexType rows,
-               const IndexType columns )
-{
-   
-}
-
 template< typename Real,
           template< typename, typename > class Segments,
           typename Device,
@@ -120,7 +106,12 @@ void
 SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
 setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths )
 {
-   
+   TNL_ASSERT_EQ( rowLengths.getSize(), this->getRows(), "Number of matrix rows does not fit with rowLengths vector size." );
+   this->segments.setSizes( rowLengths );
+   this->values.setSize( this->segments.getStorageSize() );
+   this->values = ( RealType ) 0;
+   this->columnIndexes.setSize( this->segments.getStorageSize() );
+   this->columnIndexes = this->getPaddingIndex();
 }
 
 template< typename Real,
@@ -188,7 +179,7 @@ void
 SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
 setLike( const SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2, IndexAllocator2 >& matrix )
 {
-   
+   Matrix< Real, Device, Index, RealAllocator >::setLike( matrix );
 }
 
 template< typename Real,
@@ -213,7 +204,9 @@ void
 SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
 reset()
 {
-   
+   Matrix< Real, Device, Index >::reset();
+   this->columnIndexes.reset();
+
 }
 
 template< typename Real,
@@ -229,7 +222,7 @@ setElementFast( const IndexType row,
                 const IndexType column,
                 const RealType& value )
 {
-   
+   return this->addElementFast( row, column, value, 0.0 );
 }
 
 template< typename Real,
@@ -244,7 +237,7 @@ setElement( const IndexType row,
             const IndexType column,
             const RealType& value )
 {
-   
+   return this->addElement( row, column, value, 0.0 );
 }
 
 template< typename Real,
@@ -277,7 +270,56 @@ addElement( const IndexType row,
             const RealType& value,
             const RealType& thisElementMultiplicator )
 {
-   
+   TNL_ASSERT( row >= 0 && row < this->rows &&
+               column >= 0 && column < this->columns,
+               std::cerr << " row = " << row
+                    << " column = " << column
+                    << " this->rows = " << this->rows
+                    << " this->columns = " << this->columns );
+
+   const IndexType rowSize = this->segments.getSegmentSize( row );
+   IndexType col( this->getPaddingIndex() );
+   IndexType i;
+   IndexType globalIdx;
+   for( i = 0; i < rowSize; i++ )
+   {
+      globalIdx = this->segments.getGlobalIndex( row, i );
+      TNL_ASSERT_LT( globalIdx, this->columnIndexes.getSize(), "" );
+      col = this->columnIndexes.getElement( globalIdx );
+      if( col == column )
+      {
+         this->values.setElement( globalIdx, thisElementMultiplicator * this->values.getElement( globalIdx ) + value );
+         return true;
+      }
+      if( col == this->getPaddingIndex() || col > column )
+         break;
+   }
+   if( i == rowSize )
+      return false;
+   if( col == this->getPaddingIndex() )
+   {
+      this->columnIndexes.setElement( globalIdx, column );
+      this->values.setElement( globalIdx, value );
+      return true;
+   }
+   else
+   {
+      IndexType j = rowSize - 1;
+      while( j > i )
+      {
+         const IndexType globalIdx1 = this->segments.getGlobalIndex( row, j );
+         const IndexType globalIdx2 = this->segments.getGlobalIndex( row, j - 1 );
+         TNL_ASSERT_LT( globalIdx1, this->columnIndexes.getSize(), "" );
+         TNL_ASSERT_LT( globalIdx2, this->columnIndexes.getSize(), "" );
+         this->columnIndexes.setElement( globalIdx1, this->columnIndexes.getElement( globalIdx2 ) );
+         this->values.setElement( globalIdx1, this->values.getElement( globalIdx2 ) );
+         j--;
+      }
+      
+      this->columnIndexes.setElement( globalIdx, column );
+      this->values.setElement( globalIdx, value );
+      return true;
+   }
 }
 
 
@@ -377,16 +419,25 @@ SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
 getElement( const IndexType row,
             const IndexType column ) const
 {
-   
+   const IndexType rowSize = this->segments.getSegmentSize( row );
+   for( IndexType i = 0; i < rowSize; i++ )
+   {
+      const IndexType globalIdx = this->segments.getGlobalIndex( row, i );
+      TNL_ASSERT_LT( globalIdx, this->columnIndexes.getSize(), "" );
+      const IndexType col = this->columnIndexes.getElement( globalIdx );
+      if( col == column )
+         return this->values.getElement( globalIdx );
+   }
+   return 0.0;
 }
 
-__cuda_callable__
 template< typename Real,
           template< typename, typename > class Segments,
           typename Device,
           typename Index,
           typename RealAllocator,
           typename IndexAllocator >
+__cuda_callable__
 void
 SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
 getRowFast( const IndexType row,
@@ -604,6 +655,19 @@ print( std::ostream& str ) const
    
 }
 
+template< typename Real,
+          template< typename, typename > class Segments,
+          typename Device,
+          typename Index,
+          typename RealAllocator,
+          typename IndexAllocator >
+__cuda_callable__
+Index
+SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+getPaddingIndex() const
+{
+   return -1;
+}
 
    } //namespace Matrices
 } // namespace  TNL
diff --git a/src/TNL/Matrices/Sparse_impl.h b/src/TNL/Matrices/Sparse_impl.h
index dda95e68b..889d92e62 100644
--- a/src/TNL/Matrices/Sparse_impl.h
+++ b/src/TNL/Matrices/Sparse_impl.h
@@ -75,7 +75,6 @@ template< typename Real,
 void Sparse< Real, Device, Index >::reset()
 {
    Matrix< Real, Device, Index >::reset();
-   this->values.reset();
    this->columnIndexes.reset();
 }
 
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h
index 00654de3c..a14148151 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h
+++ b/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h
@@ -108,7 +108,7 @@ TYPED_TEST( CSRMatrixTest, addElementTest )
     test_AddElement< CSRMatrixType >();
 }
 
-TYPED_TEST( CSRMatrixTest, setRowTest )
+/*TYPED_TEST( CSRMatrixTest, setRowTest )
 {
     using CSRMatrixType = typename TestFixture::CSRMatrixType;
 
@@ -134,7 +134,7 @@ TYPED_TEST( CSRMatrixTest, printTest )
     using CSRMatrixType = typename TestFixture::CSRMatrixType;
 
     test_Print< CSRMatrixType >();
-}
+}*/
 
 #endif
 
-- 
GitLab


From 176fedf929095d5b5ae9011b1fd313ac1e9d496a Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Tue, 3 Dec 2019 10:48:45 +0100
Subject: [PATCH 007/179] Implementing vector product in SparseMatrix.

---
 src/TNL/Containers/Segments/CSR.hpp           |   5 +-
 src/TNL/Matrices/SparseMatrix.h               |  16 ++-
 src/TNL/Matrices/SparseMatrix.hpp             | 107 +++++++++---------
 .../Matrices/SparseMatrixTest_CSR_segments.h  |   6 +-
 4 files changed, 65 insertions(+), 69 deletions(-)

diff --git a/src/TNL/Containers/Segments/CSR.hpp b/src/TNL/Containers/Segments/CSR.hpp
index 84a0fcb34..4dcccac24 100644
--- a/src/TNL/Containers/Segments/CSR.hpp
+++ b/src/TNL/Containers/Segments/CSR.hpp
@@ -163,11 +163,12 @@ void
 CSR< Device, Index >::
 segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args )
 {
-   const auto offsetsView = this->offsets.getView();
+   using RealType = decltype( fetch( IndexType(), IndexType() ) );
+   auto offsetsView = this->offsets.getConstView();
    auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) {
       const IndexType begin = offsetsView[ i ];
       const IndexType end = offsetsView[ i + 1 ];
-      Real aux( zero );
+      RealType aux( zero );
       for( IndexType j = begin; j < end; j++  )
          reduction( aux, fetch( i, j, args... ) );
       keeper( i, aux );
diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h
index 7581ef090..6b6a58f9a 100644
--- a/src/TNL/Matrices/SparseMatrix.h
+++ b/src/TNL/Matrices/SparseMatrix.h
@@ -133,22 +133,20 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
                        IndexType* columns,
                        RealType* values ) const;
 
-      /*__cuda_callable__
-      MatrixRow getRow( const IndexType rowIndex );
-
-      __cuda_callable__
-      ConstMatrixRow getRow( const IndexType rowIndex ) const;*/
-
       template< typename Vector >
       __cuda_callable__
       typename Vector::RealType rowVectorProduct( const IndexType row,
                                                   const Vector& vector ) const;
 
+      /***
+       * \brief This method computes outVector = matrixMultiplicator * ( *this ) * inVector + inVectorAddition * inVector
+       */
       template< typename InVector,
                 typename OutVector >
       void vectorProduct( const InVector& inVector,
-                          OutVector& outVector ) const;
-      // TODO: add const RealType& multiplicator = 1.0 )
+                          OutVector& outVector,
+                          const RealType& matrixMultiplicator = 1.0,
+                          const RealType& inVectorAddition = 0.0 ) const;
 
       /*template< typename Real2, typename Index2 >
       void addMatrix( const SparseMatrix< Real2, Segments, Device, Index2 >& matrix,
@@ -170,7 +168,7 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
       SparseMatrix& operator=( const SparseMatrix& matrix );
 
       // cross-device copy assignment
-      template< typename Real2, 
+      template< typename Real2,
                 template< typename, typename > class Segments2,
                 typename Device2,
                 typename Index2,
diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp
index 1ccb602ef..c89aeac17 100644
--- a/src/TNL/Matrices/SparseMatrix.hpp
+++ b/src/TNL/Matrices/SparseMatrix.hpp
@@ -64,7 +64,7 @@ SparseMatrix( const IndexType rows,
               const RealAllocatorType& realAllocator,
               const IndexAllocatorType& indexAllocator )
 : Matrix< Real, Device, Index, RealAllocator >( rows, columns, realAllocator ), columnIndexes( indexAllocator )
-{  
+{
 }
 
 template< typename Real,
@@ -124,7 +124,7 @@ Index
 SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
 getRowLength( const IndexType row ) const
 {
-   
+
 }
 
 template< typename Real,
@@ -138,7 +138,7 @@ Index
 SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
 getRowLengthFast( const IndexType row ) const
 {
-   
+
 }
 
 template< typename Real,
@@ -151,7 +151,7 @@ Index
 SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
 getNonZeroRowLength( const IndexType row ) const
 {
-   
+
 }
 
 template< typename Real,
@@ -165,7 +165,7 @@ Index
 SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
 getNonZeroRowLengthFast( const IndexType row ) const
 {
-   
+
 }
 
 template< typename Real,
@@ -254,7 +254,7 @@ addElementFast( const IndexType row,
                 const RealType& value,
                 const RealType& thisElementMultiplicator )
 {
-   
+
 }
 
 template< typename Real,
@@ -315,7 +315,7 @@ addElement( const IndexType row,
          this->values.setElement( globalIdx1, this->values.getElement( globalIdx2 ) );
          j--;
       }
-      
+
       this->columnIndexes.setElement( globalIdx, column );
       this->values.setElement( globalIdx, value );
       return true;
@@ -337,7 +337,6 @@ setRowFast( const IndexType row,
             const RealType* values,
             const IndexType elements )
 {
-   
 }
 
 template< typename Real,
@@ -353,7 +352,19 @@ setRow( const IndexType row,
         const RealType* values,
         const IndexType elements )
 {
-   
+   const IndexType rowLength = this->segments.getSegmentSize( row );
+   if( elements > rowLength )
+      return false;
+
+   for( IndexType i = 0; i < elements; i++ )
+   {
+      const IndexType globalIdx = this->segments.getGlobalIndex( row, i );
+      this->columnIndexes.setElement( globalIdx, columnIndexes[ i ] );
+      this->values.setElement( globalIdx, values[ i ] );
+   }
+   for( IndexType i = elements; i < rowLength; i++ )
+      this->columnIndexes.setElement( this->segments.getGlobalIndex( row, i ), this->getPaddingIndex() );
+   return true;
 }
 
 
@@ -372,7 +383,7 @@ addRowFast( const IndexType row,
             const IndexType numberOfElements,
             const RealType& thisElementMultiplicator )
 {
-   
+
 }
 
 template< typename Real,
@@ -389,7 +400,7 @@ addRow( const IndexType row,
         const IndexType numberOfElements,
         const RealType& thisElementMultiplicator )
 {
-   
+
 }
 
 
@@ -405,7 +416,7 @@ SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
 getElementFast( const IndexType row,
                 const IndexType column ) const
 {
-   
+
 }
 
 template< typename Real,
@@ -444,37 +455,9 @@ getRowFast( const IndexType row,
             IndexType* columns,
             RealType* values ) const
 {
-   
-}
 
-/*template< typename Real,
-          template< typename, typename > class Segments,
-          typename Device,
-          typename Index,
-          typename RealAllocator,
-          typename IndexAllocator >
-__cuda_callable__
-MatrixRow 
-SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
-getRow( const IndexType rowIndex )
-{
-   
 }
 
-template< typename Real,
-          template< typename, typename > class Segments,
-          typename Device,
-          typename Index,
-          typename RealAllocator,
-          typename IndexAllocator >
-__cuda_callable__
-ConstMatrixRow
-SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
-getRow( const IndexType rowIndex ) const
-{
-   
-}*/
-
 template< typename Real,
           template< typename, typename > class Segments,
           typename Device,
@@ -488,7 +471,7 @@ SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
 rowVectorProduct( const IndexType row,
                   const Vector& vector ) const
 {
-   
+
 }
 
 template< typename Real,
@@ -502,10 +485,24 @@ template< typename InVector,
 void
 SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
 vectorProduct( const InVector& inVector,
-               OutVector& outVector ) const
-// TODO: add const RealType& multiplicator = 1.0 )
-{
-   
+               OutVector& outVector,
+               const RealType& matrixMultiplicator,
+               const RealType& inVectorAddition ) const
+{
+   auto inVectorView = inVector.getConstView();
+   auto outVectorView = outVector.getView();
+   auto valuesView = this->values.getConstView();
+   auto columnIndexesView = this->columnIndexes.getConstView();
+   auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType offset ) -> RealType {
+      return valuesView[ offset ] * inVectorView[ columnIndexesView[ offset ] ];
+   };
+   auto reduction = [] __cuda_callable__ ( RealType& sum, const RealType& value ) {
+      sum += value;
+   };
+   auto keeper = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable {
+      outVectorView[ row ] = value;
+   };
+   const_cast< SegmentsType* >( &this->segments )->segmentsReduction( 0, this->getRows(), fetch, reduction, keeper, ( RealType ) 0.0 );
 }
 
 /*template< typename Real,
@@ -521,7 +518,7 @@ addMatrix( const SparseMatrix< Real2, Segments2, Device, Index2, RealAllocator2,
            const RealType& matrixMultiplicator,
            const RealType& thisMatrixMultiplicator )
 {
-   
+
 }
 
 template< typename Real,
@@ -536,7 +533,7 @@ SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
 getTransposition( const SparseMatrix< Real2, Device, Index2 >& matrix,
                   const RealType& matrixMultiplicator )
 {
-   
+
 }*/
 
 template< typename Real,
@@ -553,7 +550,7 @@ performSORIteration( const Vector1& b,
                      Vector2& x,
                      const RealType& omega ) const
 {
-   
+
 }
 
 // copy assignment
@@ -567,7 +564,7 @@ SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >&
 SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
 operator=( const SparseMatrix& matrix )
 {
-   
+
 }
 
 // cross-device copy assignment
@@ -587,7 +584,7 @@ SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >&
 SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
 operator=( const SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2, IndexAllocator2 >& matrix )
 {
-   
+
 }
 
 template< typename Real,
@@ -600,7 +597,7 @@ void
 SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
 save( File& file ) const
 {
-   
+
 }
 
 template< typename Real,
@@ -613,7 +610,7 @@ void
 SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
 load( File& file )
 {
-   
+
 }
 
 template< typename Real,
@@ -626,7 +623,7 @@ void
 SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
 save( const String& fileName ) const
 {
-   
+
 }
 
 template< typename Real,
@@ -639,7 +636,7 @@ void
 SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
 load( const String& fileName )
 {
-   
+
 }
 
 template< typename Real,
@@ -652,7 +649,7 @@ void
 SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
 print( std::ostream& str ) const
 {
-   
+
 }
 
 template< typename Real,
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h
index a14148151..4443d7f6c 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h
+++ b/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h
@@ -41,7 +41,7 @@ using CSRMatrixTypes = ::testing::Types
     TNL::Matrices::SparseMatrix< long,    TNL::Containers::Segments::CSR, TNL::Devices::Host, long  >,
     TNL::Matrices::SparseMatrix< float,   TNL::Containers::Segments::CSR, TNL::Devices::Host, long  >,
     TNL::Matrices::SparseMatrix< double,  TNL::Containers::Segments::CSR, TNL::Devices::Host, long  >
-#ifdef HAVE_CUDA                                                                             
+#ifdef HAVE_CUDA
    ,TNL::Matrices::SparseMatrix< int,     TNL::Containers::Segments::CSR, TNL::Devices::Cuda, short >,
     TNL::Matrices::SparseMatrix< long,    TNL::Containers::Segments::CSR, TNL::Devices::Cuda, short >,
     TNL::Matrices::SparseMatrix< float,   TNL::Containers::Segments::CSR, TNL::Devices::Cuda, short >,
@@ -108,7 +108,7 @@ TYPED_TEST( CSRMatrixTest, addElementTest )
     test_AddElement< CSRMatrixType >();
 }
 
-/*TYPED_TEST( CSRMatrixTest, setRowTest )
+TYPED_TEST( CSRMatrixTest, setRowTest )
 {
     using CSRMatrixType = typename TestFixture::CSRMatrixType;
 
@@ -122,7 +122,7 @@ TYPED_TEST( CSRMatrixTest, vectorProductTest )
     test_VectorProduct< CSRMatrixType >();
 }
 
-TYPED_TEST( CSRMatrixTest, saveAndLoadTest )
+/*TYPED_TEST( CSRMatrixTest, saveAndLoadTest )
 {
     using CSRMatrixType = typename TestFixture::CSRMatrixType;
 
-- 
GitLab


From bc152d3f45b048c90a35a0d8d8930a67fbf95ad0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Tue, 3 Dec 2019 11:13:28 +0100
Subject: [PATCH 008/179] Fixed const in segmentsReduction

---
 src/TNL/Containers/Segments/CSR.h   | 2 +-
 src/TNL/Containers/Segments/CSR.hpp | 6 +++---
 src/TNL/Matrices/SparseMatrix.hpp   | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/TNL/Containers/Segments/CSR.h b/src/TNL/Containers/Segments/CSR.h
index 2f194c76d..92b4f3949 100644
--- a/src/TNL/Containers/Segments/CSR.h
+++ b/src/TNL/Containers/Segments/CSR.h
@@ -76,7 +76,7 @@ class CSR
        * \brief Go over all segments and perform a reduction in each of them.
        */
       template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
-      void segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args );
+      void segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const;
 
       template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
       void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args );
diff --git a/src/TNL/Containers/Segments/CSR.hpp b/src/TNL/Containers/Segments/CSR.hpp
index 4dcccac24..e2fd099ae 100644
--- a/src/TNL/Containers/Segments/CSR.hpp
+++ b/src/TNL/Containers/Segments/CSR.hpp
@@ -161,11 +161,11 @@ template< typename Device,
    template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
 void
 CSR< Device, Index >::
-segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args )
+segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
 {
    using RealType = decltype( fetch( IndexType(), IndexType() ) );
-   auto offsetsView = this->offsets.getConstView();
-   auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) {
+   const auto offsetsView = this->offsets.getConstView();
+   auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable {
       const IndexType begin = offsetsView[ i ];
       const IndexType end = offsetsView[ i + 1 ];
       RealType aux( zero );
diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp
index c89aeac17..067f36001 100644
--- a/src/TNL/Matrices/SparseMatrix.hpp
+++ b/src/TNL/Matrices/SparseMatrix.hpp
@@ -502,7 +502,7 @@ vectorProduct( const InVector& inVector,
    auto keeper = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable {
       outVectorView[ row ] = value;
    };
-   const_cast< SegmentsType* >( &this->segments )->segmentsReduction( 0, this->getRows(), fetch, reduction, keeper, ( RealType ) 0.0 );
+   this->segments.segmentsReduction( 0, this->getRows(), fetch, reduction, keeper, ( RealType ) 0.0 );
 }
 
 /*template< typename Real,
-- 
GitLab


From ff7c9054e2205808d109d3349efefd748b2f809b Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Tue, 3 Dec 2019 12:58:55 +0100
Subject: [PATCH 009/179] All tests passed for SparseMatrix using Segments.

---
 src/TNL/Containers/Segments/CSR.h             |  5 +++-
 src/TNL/Containers/Segments/CSR.hpp           | 21 ++++++++++++++-
 src/TNL/Matrices/SparseMatrix.hpp             | 27 +++++++++++++++----
 .../Matrices/SparseMatrixTest_CSR_segments.h  |  6 ++---
 4 files changed, 49 insertions(+), 10 deletions(-)

diff --git a/src/TNL/Containers/Segments/CSR.h b/src/TNL/Containers/Segments/CSR.h
index 92b4f3949..e3eff2342 100644
--- a/src/TNL/Containers/Segments/CSR.h
+++ b/src/TNL/Containers/Segments/CSR.h
@@ -79,8 +79,11 @@ class CSR
       void segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const;
 
       template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
-      void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args );
+      void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const;
 
+      void save( File& file ) const;
+
+      void load( File& file );
 
    protected:
 
diff --git a/src/TNL/Containers/Segments/CSR.hpp b/src/TNL/Containers/Segments/CSR.hpp
index e2fd099ae..c99611958 100644
--- a/src/TNL/Containers/Segments/CSR.hpp
+++ b/src/TNL/Containers/Segments/CSR.hpp
@@ -181,10 +181,29 @@ template< typename Device,
    template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
 void
 CSR< Device, Index >::
-allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args )
+allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
 {
    this->segmentsReduction( 0, this->getSize(), fetch, reduction, keeper, zero, args... );
 }
+
+template< typename Device,
+          typename Index >
+void
+CSR< Device, Index >::
+save( File& file ) const
+{
+   file << this->offsets;
+}
+
+template< typename Device,
+          typename Index >
+void
+CSR< Device, Index >::
+load( File& file )
+{
+   file >> this->offsets;
+}
+
       } // namespace Segements
    }  // namespace Conatiners
 } // namespace TNL
diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp
index 067f36001..a43ddba82 100644
--- a/src/TNL/Matrices/SparseMatrix.hpp
+++ b/src/TNL/Matrices/SparseMatrix.hpp
@@ -597,7 +597,9 @@ void
 SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
 save( File& file ) const
 {
-
+   Matrix< RealType, DeviceType, IndexType >::save( file );
+   file << this->columnIndexes;
+   this->segments.save( file );
 }
 
 template< typename Real,
@@ -610,7 +612,9 @@ void
 SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
 load( File& file )
 {
-
+   Matrix< RealType, DeviceType, IndexType >::load( file );
+   file >> this->columnIndexes;
+   this->segments.load( file );
 }
 
 template< typename Real,
@@ -623,7 +627,7 @@ void
 SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
 save( const String& fileName ) const
 {
-
+   Object::save( fileName );
 }
 
 template< typename Real,
@@ -636,7 +640,7 @@ void
 SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
 load( const String& fileName )
 {
-
+   Object::load( fileName );
 }
 
 template< typename Real,
@@ -649,7 +653,20 @@ void
 SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
 print( std::ostream& str ) const
 {
-
+   for( IndexType row = 0; row < this->getRows(); row++ )
+   {
+      str <<"Row: " << row << " -> ";
+      const IndexType rowLength = this->segments.getSegmentSize( row );
+      for( IndexType i = 0; i < rowLength; i++ )
+      {
+         const IndexType globalIdx = this->segments.getGlobalIndex( row, i );
+         const IndexType column = this->columnIndexes.getElement( globalIdx );
+         if( column == this->getPaddingIndex() )
+            break;
+         str << " Col:" << column << "->" << this->values.getElement( globalIdx ) << "\t";
+      }
+      str << std::endl;
+   }
 }
 
 template< typename Real,
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h
index 4443d7f6c..a738af0e2 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h
+++ b/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h
@@ -122,11 +122,11 @@ TYPED_TEST( CSRMatrixTest, vectorProductTest )
     test_VectorProduct< CSRMatrixType >();
 }
 
-/*TYPED_TEST( CSRMatrixTest, saveAndLoadTest )
+TYPED_TEST( CSRMatrixTest, saveAndLoadTest )
 {
     using CSRMatrixType = typename TestFixture::CSRMatrixType;
 
-    test_SaveAndLoad< CSRMatrixType >( "test_SparseMatrixTest_CSR" );
+    test_SaveAndLoad< CSRMatrixType >( "test_SparseMatrixTest_CSR_segments" );
 }
 
 TYPED_TEST( CSRMatrixTest, printTest )
@@ -134,7 +134,7 @@ TYPED_TEST( CSRMatrixTest, printTest )
     using CSRMatrixType = typename TestFixture::CSRMatrixType;
 
     test_Print< CSRMatrixType >();
-}*/
+}
 
 #endif
 
-- 
GitLab


From 2bd090cccf878ce1cede67d855d15e0ce0b3e6d2 Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Tue, 3 Dec 2019 20:46:36 +0100
Subject: [PATCH 010/179] Added Ellpack segments.

---
 src/TNL/Containers/Segments/CSR.h       |   5 +-
 src/TNL/Containers/Segments/CSR.hpp     |   2 +-
 src/TNL/Containers/Segments/Ellpack.h   |  98 ++++++++++++
 src/TNL/Containers/Segments/Ellpack.hpp | 190 ++++++++++++++++++++++++
 4 files changed, 290 insertions(+), 5 deletions(-)
 create mode 100644 src/TNL/Containers/Segments/Ellpack.h
 create mode 100644 src/TNL/Containers/Segments/Ellpack.hpp

diff --git a/src/TNL/Containers/Segments/CSR.h b/src/TNL/Containers/Segments/CSR.h
index e3eff2342..52ca36e22 100644
--- a/src/TNL/Containers/Segments/CSR.h
+++ b/src/TNL/Containers/Segments/CSR.h
@@ -16,7 +16,6 @@ namespace TNL {
    namespace Containers {
       namespace Segments {
 
-
 template< typename Device,
           typename Index >
 class CSR
@@ -36,7 +35,7 @@ class CSR
       CSR( const CSR&& segments );
 
       /**
-       * \brief Set sizes of particular segmenets.
+       * \brief Set sizes of particular segments.
        */
       template< typename SizesHolder = OffsetsHolder >
       void setSizes( const SizesHolder& sizes );
@@ -88,9 +87,7 @@ class CSR
    protected:
 
       OffsetsHolder offsets;
-
 };
-
       } // namespace Segements
    }  // namespace Conatiners
 } // namespace TNL
diff --git a/src/TNL/Containers/Segments/CSR.hpp b/src/TNL/Containers/Segments/CSR.hpp
index c99611958..ecd52190c 100644
--- a/src/TNL/Containers/Segments/CSR.hpp
+++ b/src/TNL/Containers/Segments/CSR.hpp
@@ -136,7 +136,7 @@ CSR< Device, Index >::
 forSegments( IndexType first, IndexType last, Function& f, Args... args ) const
 {
    const auto offsetsView = this->offsets.getView();
-   auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) {
+   auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable {
       const IndexType begin = offsetsView[ i ];
       const IndexType end = offsetsView[ i + 1 ];
       for( IndexType j = begin; j < end; j++  )
diff --git a/src/TNL/Containers/Segments/Ellpack.h b/src/TNL/Containers/Segments/Ellpack.h
new file mode 100644
index 000000000..49f859afb
--- /dev/null
+++ b/src/TNL/Containers/Segments/Ellpack.h
@@ -0,0 +1,98 @@
+/***************************************************************************
+                          Ellpack.h -  description
+                             -------------------
+    begin                : Dec 3, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Containers/Vector.h>
+
+namespace TNL {
+   namespace Containers {
+      namespace Segments {
+
+template< typename Device,
+          typename Index,
+          int Alignment = 32 >
+class Ellpack
+{
+   public:
+
+      using DeviceType = Device;
+      using IndexType = Index;
+      using OffsetsHolder = Containers::Vector< IndexType, DeviceType, IndexType >;
+      static constexpr int getAlignment() { return Alignment; }
+
+      Ellpack();
+
+      Ellpack( const Vector< IndexType, DeviceType, IndexType >& sizes );
+
+      Ellpack( const Ellpack& segments );
+
+      Ellpack( const Ellpack&& segments );
+
+      /**
+       * \brief Set sizes of particular segments.
+       */
+      template< typename SizesHolder = OffsetsHolder >
+      void setSizes( const SizesHolder& sizes );
+
+      /**
+       * \brief Number segments.
+       */
+      __cuda_callable__
+      IndexType getSize() const;
+
+      __cuda_callable__
+      IndexType getSegmentSize( const IndexType segmentIdx ) const;
+
+      __cuda_callable__
+      IndexType getStorageSize() const;
+
+      __cuda_callable__
+      IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const;
+
+      __cuda_callable__
+      void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const;
+
+      /***
+       * \brief Go over all segments and for each segment element call
+       * function 'f' with arguments 'args'. The return type of 'f' is bool.
+       * When its true, the for-loop continues. Once 'f' returns false, the for-loop
+       * is terminated.
+       */
+      template< typename Function, typename... Args >
+      void forSegments( IndexType first, IndexType last, Function& f, Args... args ) const;
+
+      template< typename Function, typename... Args >
+      void forAll( Function& f, Args... args ) const;
+
+
+      /***
+       * \brief Go over all segments and perform a reduction in each of them.
+       */
+      template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
+      void segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const;
+
+      template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
+      void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const;
+
+      void save( File& file ) const;
+
+      void load( File& file );
+
+   protected:
+
+      IndexType segmentSize, size, alignedSize;
+};
+
+      } // namespace Segements
+   }  // namespace Conatiners
+} // namespace TNL
+
+#include <TNL/Containers/Segments/Ellpack.hpp>
diff --git a/src/TNL/Containers/Segments/Ellpack.hpp b/src/TNL/Containers/Segments/Ellpack.hpp
new file mode 100644
index 000000000..0b6240514
--- /dev/null
+++ b/src/TNL/Containers/Segments/Ellpack.hpp
@@ -0,0 +1,190 @@
+/***************************************************************************
+                          Ellpack.hpp -  description
+                             -------------------
+    begin                : Dec 3, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Containers/Vector.h>
+#include <TNL/Algorithms/ParallelFor.h>
+#include <TNL/Containers/Segments/Ellpack.h>
+
+namespace TNL {
+   namespace Containers {
+      namespace Segments {
+
+
+template< typename Device,
+          typename Index >
+Ellpack< Device, Index >::
+Ellpack() : size( 0 ), rowLength( 0 )
+{
+}
+
+template< typename Device,
+          typename Index >
+Ellpack< Device, Index >::
+Ellpack( const Ellpack& ellpack ) : offsets( ellpack.offsets )
+{
+}
+
+template< typename Device,
+          typename Index >
+Ellpack< Device, Index >::
+Ellpack( const Ellpack&& ellpack ) : offsets( std::move( ellpack.offsets ) )
+{
+
+}
+
+template< typename Device,
+          typename Index >
+   template< typename SizesHolder >
+void
+Ellpack< Device, Index >::
+setSizes( const SizesHolder& sizes )
+{
+   this->segmentSize = max( sizes );
+   this->size = sizes.getSize();
+}
+
+template< typename Device,
+          typename Index >
+__cuda_callable__
+Index
+Ellpack< Device, Index >::
+getSize() const
+{
+   return this->offsets.getSize() - 1;
+}
+
+template< typename Device,
+          typename Index >
+__cuda_callable__
+Index
+Ellpack< Device, Index >::
+getSegmentSize( const IndexType segmentIdx ) const
+{
+   return this->segmentSize;
+}
+
+template< typename Device,
+          typename Index >
+__cuda_callable__
+Index
+Ellpack< Device, Index >::
+getStorageSize() const
+{
+   return this->size * this->segmentSize;
+}
+
+template< typename Device,
+          typename Index >
+__cuda_callable__
+Index
+Ellpack< Device, Index >::
+getGlobalIndex( const Index segmentIdx, const Index localIdx ) const
+{
+   if( ! std::is_same< DeviceType, Devices::Host >::value )
+   {
+#ifdef __CUDA_ARCH__
+      return offsets[ segmentIdx ] + localIdx;
+#else
+      return offsets.getElement( segmentIdx ) + localIdx;
+#endif
+   }
+   return offsets[ segmentIdx ] + localIdx;
+}
+
+template< typename Device,
+          typename Index >
+__cuda_callable__
+void
+Ellpack< Device, Index >::
+getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const
+{
+}
+
+template< typename Device,
+          typename Index >
+   template< typename Function, typename... Args >
+void
+Ellpack< Device, Index >::
+forSegments( IndexType first, IndexType last, Function& f, Args... args ) const
+{
+   const auto offsetsView = this->offsets.getView();
+   auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) {
+      const IndexType begin = offsetsView[ i ];
+      const IndexType end = offsetsView[ i + 1 ];
+      for( IndexType j = begin; j < end; j++  )
+         if( ! f( i, j, args... ) )
+            break;
+   };
+   Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
+}
+
+template< typename Device,
+          typename Index >
+   template< typename Function, typename... Args >
+void
+Ellpack< Device, Index >::
+forAll( Function& f, Args... args ) const
+{
+   this->forSegments( 0, this->getSize(), f, args... );
+}
+
+template< typename Device,
+          typename Index >
+   template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
+void
+Ellpack< Device, Index >::
+segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
+{
+   using RealType = decltype( fetch( IndexType(), IndexType() ) );
+   const auto offsetsView = this->offsets.getConstView();
+   auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable {
+      const IndexType begin = offsetsView[ i ];
+      const IndexType end = offsetsView[ i + 1 ];
+      RealType aux( zero );
+      for( IndexType j = begin; j < end; j++  )
+         reduction( aux, fetch( i, j, args... ) );
+      keeper( i, aux );
+   };
+   Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
+}
+
+template< typename Device,
+          typename Index >
+   template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
+void
+Ellpack< Device, Index >::
+allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
+{
+   this->segmentsReduction( 0, this->getSize(), fetch, reduction, keeper, zero, args... );
+}
+
+template< typename Device,
+          typename Index >
+void
+Ellpack< Device, Index >::
+save( File& file ) const
+{
+   file << this->offsets;
+}
+
+template< typename Device,
+          typename Index >
+void
+Ellpack< Device, Index >::
+load( File& file )
+{
+   file >> this->offsets;
+}
+
+      } // namespace Segements
+   }  // namespace Conatiners
+} // namespace TNL
-- 
GitLab


From 3abf57d2212c9290f44fe2521d9c481370d6964c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Tue, 3 Dec 2019 21:36:17 +0100
Subject: [PATCH 011/179] Implementing Ellpack segments.

---
 src/TNL/Containers/Segments/CSR.hpp           |   2 +-
 src/TNL/Containers/Segments/Ellpack.h         |   1 +
 src/TNL/Containers/Segments/Ellpack.hpp       | 156 ++++++++++++------
 src/UnitTests/Matrices/CMakeLists.txt         |   6 +
 .../Matrices/SparseMatrixTest_CSR_segments.h  |   4 +-
 .../SparseMatrixTest_Ellpack_segments.cpp     |   1 +
 .../SparseMatrixTest_Ellpack_segments.cu      |   1 +
 .../SparseMatrixTest_Ellpack_segments.h       | 141 ++++++++++++++++
 8 files changed, 255 insertions(+), 57 deletions(-)
 create mode 100644 src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.cpp
 create mode 100644 src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.cu
 create mode 100644 src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h

diff --git a/src/TNL/Containers/Segments/CSR.hpp b/src/TNL/Containers/Segments/CSR.hpp
index ecd52190c..b40524e5e 100644
--- a/src/TNL/Containers/Segments/CSR.hpp
+++ b/src/TNL/Containers/Segments/CSR.hpp
@@ -204,6 +204,6 @@ load( File& file )
    file >> this->offsets;
 }
 
-      } // namespace Segements
+      } // namespace Segments
    }  // namespace Conatiners
 } // namespace TNL
diff --git a/src/TNL/Containers/Segments/Ellpack.h b/src/TNL/Containers/Segments/Ellpack.h
index 49f859afb..772566f51 100644
--- a/src/TNL/Containers/Segments/Ellpack.h
+++ b/src/TNL/Containers/Segments/Ellpack.h
@@ -18,6 +18,7 @@ namespace TNL {
 
 template< typename Device,
           typename Index,
+          bool RowMajorOrder = std::is_same< Device, Devices::Host >::value,
           int Alignment = 32 >
 class Ellpack
 {
diff --git a/src/TNL/Containers/Segments/Ellpack.hpp b/src/TNL/Containers/Segments/Ellpack.hpp
index 0b6240514..42d7eb8c1 100644
--- a/src/TNL/Containers/Segments/Ellpack.hpp
+++ b/src/TNL/Containers/Segments/Ellpack.hpp
@@ -20,128 +20,170 @@ namespace TNL {
 
 
 template< typename Device,
-          typename Index >
-Ellpack< Device, Index >::
-Ellpack() : size( 0 ), rowLength( 0 )
+          typename Index,
+          bool RowMajorOrder,
+          int Alignment >
+Ellpack< Device, Index, RowMajorOrder, Alignment >::
+Ellpack()
+   : segmentSize( 0 ), size( 0 ), alignedSize( 0 )
 {
 }
 
 template< typename Device,
-          typename Index >
-Ellpack< Device, Index >::
-Ellpack( const Ellpack& ellpack ) : offsets( ellpack.offsets )
+          typename Index,
+          bool RowMajorOrder,
+          int Alignment >
+Ellpack< Device, Index, RowMajorOrder, Alignment >::
+Ellpack( const Ellpack& ellpack )
+   : segmentSize( ellpack.segmentSize ), size( ellpack.size ), alignedSize( ellpack.alignedSize )
 {
 }
 
 template< typename Device,
-          typename Index >
-Ellpack< Device, Index >::
-Ellpack( const Ellpack&& ellpack ) : offsets( std::move( ellpack.offsets ) )
+          typename Index,
+          bool RowMajorOrder,
+          int Alignment >
+Ellpack< Device, Index, RowMajorOrder, Alignment >::
+Ellpack( const Ellpack&& ellpack )
+   : segmentSize( ellpack.segmentSize ), size( ellpack.size ), alignedSize( ellpack.alignedSize )
 {
-
 }
 
 template< typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder,
+          int Alignment >
    template< typename SizesHolder >
 void
-Ellpack< Device, Index >::
+Ellpack< Device, Index, RowMajorOrder, Alignment >::
 setSizes( const SizesHolder& sizes )
 {
    this->segmentSize = max( sizes );
    this->size = sizes.getSize();
+   if( RowMajorOrder )
+      this->alignedSize = this->size;
+   else
+      this->alignedSize = roundUpDivision( size / this->getAlignment() ) * this->getAlignment();
 }
 
 template< typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder,
+          int Alignment >
 __cuda_callable__
 Index
-Ellpack< Device, Index >::
+Ellpack< Device, Index, RowMajorOrder, Alignment >::
 getSize() const
 {
-   return this->offsets.getSize() - 1;
+   return this->size;
 }
 
 template< typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder,
+          int Alignment >
 __cuda_callable__
 Index
-Ellpack< Device, Index >::
+Ellpack< Device, Index, RowMajorOrder, Alignment >::
 getSegmentSize( const IndexType segmentIdx ) const
 {
    return this->segmentSize;
 }
 
 template< typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder,
+          int Alignment >
 __cuda_callable__
 Index
-Ellpack< Device, Index >::
+Ellpack< Device, Index, RowMajorOrder, Alignment >::
 getStorageSize() const
 {
-   return this->size * this->segmentSize;
+   return this->alignedSize * this->segmentSize;
 }
 
 template< typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder,
+          int Alignment >
 __cuda_callable__
 Index
-Ellpack< Device, Index >::
+Ellpack< Device, Index, RowMajorOrder, Alignment >::
 getGlobalIndex( const Index segmentIdx, const Index localIdx ) const
 {
-   if( ! std::is_same< DeviceType, Devices::Host >::value )
-   {
-#ifdef __CUDA_ARCH__
-      return offsets[ segmentIdx ] + localIdx;
-#else
-      return offsets.getElement( segmentIdx ) + localIdx;
-#endif
-   }
-   return offsets[ segmentIdx ] + localIdx;
+   if( RowMajorOrder )
+      return segmentIdx * this->segmentSize + localIdx;
+   else
+      return segmentIdx + this->alignedSize * localIdx;
 }
 
 template< typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder,
+          int Alignment >
 __cuda_callable__
 void
-Ellpack< Device, Index >::
+Ellpack< Device, Index, RowMajorOrder, Alignment >::
 getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const
 {
 }
 
 template< typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder,
+          int Alignment >
    template< typename Function, typename... Args >
 void
-Ellpack< Device, Index >::
+Ellpack< Device, Index, RowMajorOrder, Alignment >::
 forSegments( IndexType first, IndexType last, Function& f, Args... args ) const
 {
    const auto offsetsView = this->offsets.getView();
-   auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) {
-      const IndexType begin = offsetsView[ i ];
-      const IndexType end = offsetsView[ i + 1 ];
-      for( IndexType j = begin; j < end; j++  )
-         if( ! f( i, j, args... ) )
-            break;
-   };
-   Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
+   if( RowMajorOrder )
+   {
+      const IndexType segmentSize = this->segmentSize;
+      auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) {
+         const IndexType begin = i * segmentSize;
+         const IndexType end = begin + segmentSize;
+         for( IndexType j = begin; j < end; j++  )
+            if( ! f( i, j, args... ) )
+               break;
+      };
+      Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
+   }
+   else
+   {
+      const IndexType storageSize = this->getStorageSize();
+      const IndexType alignedSize = this->alignedSize;
+      auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) {
+         const IndexType begin = i;
+         const IndexType end = storageSize;
+         for( IndexType j = begin; j < end; j += alignedSize )
+            if( ! f( i, j, args... ) )
+               break;
+      };
+      Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
+   }
 }
 
 template< typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder,
+          int Alignment >
    template< typename Function, typename... Args >
 void
-Ellpack< Device, Index >::
+Ellpack< Device, Index, RowMajorOrder, Alignment >::
 forAll( Function& f, Args... args ) const
 {
    this->forSegments( 0, this->getSize(), f, args... );
 }
 
 template< typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder,
+          int Alignment >
    template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
 void
-Ellpack< Device, Index >::
+Ellpack< Device, Index, RowMajorOrder, Alignment >::
 segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
 {
    using RealType = decltype( fetch( IndexType(), IndexType() ) );
@@ -158,33 +200,39 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red
 }
 
 template< typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder,
+          int Alignment >
    template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
 void
-Ellpack< Device, Index >::
+Ellpack< Device, Index, RowMajorOrder, Alignment >::
 allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
 {
    this->segmentsReduction( 0, this->getSize(), fetch, reduction, keeper, zero, args... );
 }
 
 template< typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder,
+          int Alignment >
 void
-Ellpack< Device, Index >::
+Ellpack< Device, Index, RowMajorOrder, Alignment >::
 save( File& file ) const
 {
    file << this->offsets;
 }
 
 template< typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder,
+          int Alignment >
 void
-Ellpack< Device, Index >::
+Ellpack< Device, Index, RowMajorOrder, Alignment >::
 load( File& file )
 {
    file >> this->offsets;
 }
 
-      } // namespace Segements
+      } // namespace Segments
    }  // namespace Conatiners
 } // namespace TNL
diff --git a/src/UnitTests/Matrices/CMakeLists.txt b/src/UnitTests/Matrices/CMakeLists.txt
index f278934a6..996dd0430 100644
--- a/src/UnitTests/Matrices/CMakeLists.txt
+++ b/src/UnitTests/Matrices/CMakeLists.txt
@@ -74,6 +74,11 @@ ELSE(  BUILD_CUDA )
    TARGET_COMPILE_OPTIONS( SparseMatrixTest_CSR_segments PRIVATE ${CXX_TESTS_FLAGS} )
    TARGET_LINK_LIBRARIES( SparseMatrixTest_CSR_segments ${GTEST_BOTH_LIBRARIES} )
 
+   ADD_EXECUTABLE( SparseMatrixTest_Ellpack_segments SparseMatrixTest_Ellpack_segments.cpp )
+   TARGET_COMPILE_OPTIONS( SparseMatrixTest_Ellpack_segments PRIVATE ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( SparseMatrixTest_Ellpack_segments ${GTEST_BOTH_LIBRARIES} )
+
+
 ENDIF( BUILD_CUDA )
 
 
@@ -92,6 +97,7 @@ ADD_TEST( SparseMatrixTest_SlicedEllpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixT
 ####
 # Segments tests
 ADD_TEST( SparseMatrixTest_CSR_segments ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_CSR_segments${CMAKE_EXECUTABLE_SUFFIX} )
+ADD_TEST( SparseMatrixTest_Ellpack_segments ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_Ellpack_segments${CMAKE_EXECUTABLE_SUFFIX} )
 
 if( ${BUILD_MPI} )
    if( BUILD_CUDA )
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h
index a738af0e2..b53358469 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h
+++ b/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h
@@ -1,8 +1,8 @@
 /***************************************************************************
                           SparseMatrixTest_CSR.h -  description
                              -------------------
-    begin                : Nov 2, 2018
-    copyright            : (C) 2018 by Tomas Oberhuber et al.
+    begin                : Dec 2, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber et al.
     email                : tomas.oberhuber@fjfi.cvut.cz
  ***************************************************************************/
 
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.cpp b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.cpp
new file mode 100644
index 000000000..63219e9b0
--- /dev/null
+++ b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.cpp
@@ -0,0 +1 @@
+#include "SparseMatrixTest_Ellpack_segments.h"
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.cu b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.cu
new file mode 100644
index 000000000..63219e9b0
--- /dev/null
+++ b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.cu
@@ -0,0 +1 @@
+#include "SparseMatrixTest_Ellpack_segments.h"
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h
new file mode 100644
index 000000000..79cdf06cf
--- /dev/null
+++ b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h
@@ -0,0 +1,141 @@
+/***************************************************************************
+                          SparseMatrixTest_Ellpack.h -  description
+                             -------------------
+    begin                : Dec 3, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include <TNL/Containers/Segments/Ellpack.h>
+#include <TNL/Matrices/SparseMatrix.h>
+
+
+#include "SparseMatrixTest.hpp"
+#include <iostream>
+
+#ifdef HAVE_GTEST
+#include <gtest/gtest.h>
+
+// test fixture for typed tests
+template< typename Matrix >
+class EllpackMatrixTest : public ::testing::Test
+{
+protected:
+   using EllpackMatrixType = Matrix;
+};
+
+// types for which MatrixTest is instantiated
+using EllpackMatrixTypes = ::testing::Types
+<
+    TNL::Matrices::SparseMatrix< int,     TNL::Containers::Segments::Ellpack, TNL::Devices::Host, short >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Containers::Segments::Ellpack, TNL::Devices::Host, short >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Containers::Segments::Ellpack, TNL::Devices::Host, short >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Containers::Segments::Ellpack, TNL::Devices::Host, short >,
+    TNL::Matrices::SparseMatrix< int,     TNL::Containers::Segments::Ellpack, TNL::Devices::Host, int   >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Containers::Segments::Ellpack, TNL::Devices::Host, int   >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Containers::Segments::Ellpack, TNL::Devices::Host, int   >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Containers::Segments::Ellpack, TNL::Devices::Host, int   >,
+    TNL::Matrices::SparseMatrix< int,     TNL::Containers::Segments::Ellpack, TNL::Devices::Host, long  >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Containers::Segments::Ellpack, TNL::Devices::Host, long  >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Containers::Segments::Ellpack, TNL::Devices::Host, long  >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Containers::Segments::Ellpack, TNL::Devices::Host, long  >
+#ifdef HAVE_CUDA
+   ,TNL::Matrices::SparseMatrix< int,     TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, short >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, short >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, short >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, short >,
+    TNL::Matrices::SparseMatrix< int,     TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, int   >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, int   >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, int   >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, int   >,
+    TNL::Matrices::SparseMatrix< int,     TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, long  >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, long  >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, long  >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, long  >
+#endif
+>;
+
+TYPED_TEST_SUITE( EllpackMatrixTest, EllpackMatrixTypes);
+
+TYPED_TEST( EllpackMatrixTest, setDimensionsTest )
+{
+    using EllpackMatrixType = typename TestFixture::EllpackMatrixType;
+
+    test_SetDimensions< EllpackMatrixType >();
+}
+
+//TYPED_TEST( EllpackMatrixTest, setCompressedRowLengthsTest )
+//{
+////    using EllpackMatrixType = typename TestFixture::EllpackMatrixType;
+//
+////    test_SetCompressedRowLengths< EllpackMatrixType >();
+//
+//    bool testRan = false;
+//    EXPECT_TRUE( testRan );
+//    std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n";
+//    std::cout << "      This test is dependent on the input format. \n";
+//    std::cout << "      Almost every format allocates elements per row differently.\n\n";
+//    std::cout << "\n    TODO: Finish implementation of getNonZeroRowLength (Only non-zero elements, not the number of allocated elements.)\n\n";
+//}
+
+TYPED_TEST( EllpackMatrixTest, setLikeTest )
+{
+    using EllpackMatrixType = typename TestFixture::EllpackMatrixType;
+
+    test_SetLike< EllpackMatrixType, EllpackMatrixType >();
+}
+
+TYPED_TEST( EllpackMatrixTest, resetTest )
+{
+    using EllpackMatrixType = typename TestFixture::EllpackMatrixType;
+
+    test_Reset< EllpackMatrixType >();
+}
+
+TYPED_TEST( EllpackMatrixTest, setElementTest )
+{
+    using EllpackMatrixType = typename TestFixture::EllpackMatrixType;
+
+    test_SetElement< EllpackMatrixType >();
+}
+
+TYPED_TEST( EllpackMatrixTest, addElementTest )
+{
+    using EllpackMatrixType = typename TestFixture::EllpackMatrixType;
+
+    test_AddElement< EllpackMatrixType >();
+}
+
+TYPED_TEST( EllpackMatrixTest, setRowTest )
+{
+    using EllpackMatrixType = typename TestFixture::EllpackMatrixType;
+
+    test_SetRow< EllpackMatrixType >();
+}
+
+TYPED_TEST( EllpackMatrixTest, vectorProductTest )
+{
+    using EllpackMatrixType = typename TestFixture::EllpackMatrixType;
+
+    test_VectorProduct< EllpackMatrixType >();
+}
+
+TYPED_TEST( EllpackMatrixTest, saveAndLoadTest )
+{
+    using EllpackMatrixType = typename TestFixture::EllpackMatrixType;
+
+    test_SaveAndLoad< EllpackMatrixType >( "test_SparseMatrixTest_Ellpack_segments" );
+}
+
+TYPED_TEST( EllpackMatrixTest, printTest )
+{
+    using EllpackMatrixType = typename TestFixture::EllpackMatrixType;
+
+    test_Print< EllpackMatrixType >();
+}
+
+#endif
+
+#include "../main.h"
-- 
GitLab


From 322ac56e385cc398fb44a2cef78067d2cdaf82b6 Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Wed, 4 Dec 2019 18:08:16 +0100
Subject: [PATCH 012/179] Ellpack segments work well.

---
 src/TNL/Containers/Segments/Ellpack.h         |  1 +
 src/TNL/Containers/Segments/Ellpack.hpp       | 50 +++++++++++-----
 src/TNL/Matrices/SparseMatrix.hpp             | 12 ++--
 src/UnitTests/Matrices/CMakeLists.txt         |  3 +
 .../SparseMatrixTest_Ellpack_segments.h       | 59 +++++++++++--------
 5 files changed, 83 insertions(+), 42 deletions(-)

diff --git a/src/TNL/Containers/Segments/Ellpack.h b/src/TNL/Containers/Segments/Ellpack.h
index 772566f51..dc1a717b3 100644
--- a/src/TNL/Containers/Segments/Ellpack.h
+++ b/src/TNL/Containers/Segments/Ellpack.h
@@ -28,6 +28,7 @@ class Ellpack
       using IndexType = Index;
       using OffsetsHolder = Containers::Vector< IndexType, DeviceType, IndexType >;
       static constexpr int getAlignment() { return Alignment; }
+      static constexpr bool getRowMajorOrder() { return RowMajorOrder; }
 
       Ellpack();
 
diff --git a/src/TNL/Containers/Segments/Ellpack.hpp b/src/TNL/Containers/Segments/Ellpack.hpp
index 42d7eb8c1..8a23693ec 100644
--- a/src/TNL/Containers/Segments/Ellpack.hpp
+++ b/src/TNL/Containers/Segments/Ellpack.hpp
@@ -63,7 +63,7 @@ setSizes( const SizesHolder& sizes )
    if( RowMajorOrder )
       this->alignedSize = this->size;
    else
-      this->alignedSize = roundUpDivision( size / this->getAlignment() ) * this->getAlignment();
+      this->alignedSize = roundUpDivision( size, this->getAlignment() ) * this->getAlignment();
 }
 
 template< typename Device,
@@ -186,17 +186,35 @@ void
 Ellpack< Device, Index, RowMajorOrder, Alignment >::
 segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
 {
-   using RealType = decltype( fetch( IndexType(), IndexType() ) );
-   const auto offsetsView = this->offsets.getConstView();
-   auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable {
-      const IndexType begin = offsetsView[ i ];
-      const IndexType end = offsetsView[ i + 1 ];
-      RealType aux( zero );
-      for( IndexType j = begin; j < end; j++  )
-         reduction( aux, fetch( i, j, args... ) );
-      keeper( i, aux );
-   };
-   Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
+   if( RowMajorOrder )
+   {
+      using RealType = decltype( fetch( IndexType(), IndexType() ) );
+      const IndexType segmentSize = this->segmentSize;
+      auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable {
+         const IndexType begin = i * segmentSize;
+         const IndexType end = begin + segmentSize;
+         RealType aux( zero );
+         for( IndexType j = begin; j < end; j++  )
+            reduction( aux, fetch( i, j, args... ) );
+         keeper( i, aux );
+      };
+      Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
+   }
+   else
+   {
+      using RealType = decltype( fetch( IndexType(), IndexType() ) );
+      const IndexType storageSize = this->getStorageSize();
+      const IndexType alignedSize = this->alignedSize;
+      auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable {
+         const IndexType begin = i;
+         const IndexType end = storageSize;
+         RealType aux( zero );
+         for( IndexType j = begin; j < end; j += alignedSize  )
+            reduction( aux, fetch( i, j, args... ) );
+         keeper( i, aux );
+      };
+      Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
+   }
 }
 
 template< typename Device,
@@ -219,7 +237,9 @@ void
 Ellpack< Device, Index, RowMajorOrder, Alignment >::
 save( File& file ) const
 {
-   file << this->offsets;
+   file.save( &segmentSize );
+   file.save( &size );
+   file.save( &alignedSize );
 }
 
 template< typename Device,
@@ -230,7 +250,9 @@ void
 Ellpack< Device, Index, RowMajorOrder, Alignment >::
 load( File& file )
 {
-   file >> this->offsets;
+   file.load( &segmentSize );
+   file.load( &size );
+   file.load( &alignedSize );
 }
 
       } // namespace Segments
diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp
index a43ddba82..9bc8d7fb7 100644
--- a/src/TNL/Matrices/SparseMatrix.hpp
+++ b/src/TNL/Matrices/SparseMatrix.hpp
@@ -489,12 +489,16 @@ vectorProduct( const InVector& inVector,
                const RealType& matrixMultiplicator,
                const RealType& inVectorAddition ) const
 {
-   auto inVectorView = inVector.getConstView();
+   const auto inVectorView = inVector.getConstView();
    auto outVectorView = outVector.getView();
-   auto valuesView = this->values.getConstView();
-   auto columnIndexesView = this->columnIndexes.getConstView();
+   const auto valuesView = this->values.getConstView();
+   const auto columnIndexesView = this->columnIndexes.getConstView();
+   const IndexType paddingIndex = this->getPaddingIndex();
    auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType offset ) -> RealType {
-      return valuesView[ offset ] * inVectorView[ columnIndexesView[ offset ] ];
+      const IndexType column = columnIndexesView[ offset ];
+      if( column == paddingIndex )
+         return 0.0;
+      return valuesView[ offset ] * inVectorView[ column ];
    };
    auto reduction = [] __cuda_callable__ ( RealType& sum, const RealType& value ) {
       sum += value;
diff --git a/src/UnitTests/Matrices/CMakeLists.txt b/src/UnitTests/Matrices/CMakeLists.txt
index 996dd0430..ef1f04371 100644
--- a/src/UnitTests/Matrices/CMakeLists.txt
+++ b/src/UnitTests/Matrices/CMakeLists.txt
@@ -31,6 +31,9 @@ IF( BUILD_CUDA )
    CUDA_ADD_EXECUTABLE( SparseMatrixTest_CSR_segments SparseMatrixTest_CSR_segments.cu OPTIONS ${CXX_TESTS_FLAGS} )
    TARGET_LINK_LIBRARIES( SparseMatrixTest_CSR_segments ${GTEST_BOTH_LIBRARIES} )
 
+   CUDA_ADD_EXECUTABLE( SparseMatrixTest_Ellpack_segments SparseMatrixTest_Ellpack_segments.cu OPTIONS ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( SparseMatrixTest_Ellpack_segments ${GTEST_BOTH_LIBRARIES} )
+
 ELSE(  BUILD_CUDA )
    ADD_EXECUTABLE( SparseMatrixCopyTest SparseMatrixCopyTest.cpp )
    TARGET_COMPILE_OPTIONS( SparseMatrixCopyTest PRIVATE ${CXX_TESTS_FLAGS} )
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h
index 79cdf06cf..c54aab948 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h
+++ b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h
@@ -26,34 +26,45 @@ protected:
    using EllpackMatrixType = Matrix;
 };
 
+////
+// Row-major format is used for the host system
+template< typename Device, typename Index >
+using RowMajorEllpack = TNL::Containers::Segments::Ellpack< Device, Index, true, 32 >;
+
+
+////
+// Column-major format is used for GPUs
+template< typename Device, typename Index >
+using ColumnMajorEllpack = TNL::Containers::Segments::Ellpack< Device, Index, false, 32 >;
+
 // types for which MatrixTest is instantiated
 using EllpackMatrixTypes = ::testing::Types
 <
-    TNL::Matrices::SparseMatrix< int,     TNL::Containers::Segments::Ellpack, TNL::Devices::Host, short >,
-    TNL::Matrices::SparseMatrix< long,    TNL::Containers::Segments::Ellpack, TNL::Devices::Host, short >,
-    TNL::Matrices::SparseMatrix< float,   TNL::Containers::Segments::Ellpack, TNL::Devices::Host, short >,
-    TNL::Matrices::SparseMatrix< double,  TNL::Containers::Segments::Ellpack, TNL::Devices::Host, short >,
-    TNL::Matrices::SparseMatrix< int,     TNL::Containers::Segments::Ellpack, TNL::Devices::Host, int   >,
-    TNL::Matrices::SparseMatrix< long,    TNL::Containers::Segments::Ellpack, TNL::Devices::Host, int   >,
-    TNL::Matrices::SparseMatrix< float,   TNL::Containers::Segments::Ellpack, TNL::Devices::Host, int   >,
-    TNL::Matrices::SparseMatrix< double,  TNL::Containers::Segments::Ellpack, TNL::Devices::Host, int   >,
-    TNL::Matrices::SparseMatrix< int,     TNL::Containers::Segments::Ellpack, TNL::Devices::Host, long  >,
-    TNL::Matrices::SparseMatrix< long,    TNL::Containers::Segments::Ellpack, TNL::Devices::Host, long  >,
-    TNL::Matrices::SparseMatrix< float,   TNL::Containers::Segments::Ellpack, TNL::Devices::Host, long  >,
-    TNL::Matrices::SparseMatrix< double,  TNL::Containers::Segments::Ellpack, TNL::Devices::Host, long  >
+    TNL::Matrices::SparseMatrix< int,     RowMajorEllpack, TNL::Devices::Host, short >,
+    TNL::Matrices::SparseMatrix< long,    RowMajorEllpack, TNL::Devices::Host, short >,
+    TNL::Matrices::SparseMatrix< float,   RowMajorEllpack, TNL::Devices::Host, short >,
+    TNL::Matrices::SparseMatrix< double,  RowMajorEllpack, TNL::Devices::Host, short >,
+    TNL::Matrices::SparseMatrix< int,     RowMajorEllpack, TNL::Devices::Host, int   >,
+    TNL::Matrices::SparseMatrix< long,    RowMajorEllpack, TNL::Devices::Host, int   >,
+    TNL::Matrices::SparseMatrix< float,   RowMajorEllpack, TNL::Devices::Host, int   >,
+    TNL::Matrices::SparseMatrix< double,  RowMajorEllpack, TNL::Devices::Host, int   >,
+    TNL::Matrices::SparseMatrix< int,     RowMajorEllpack, TNL::Devices::Host, long  >,
+    TNL::Matrices::SparseMatrix< long,    RowMajorEllpack, TNL::Devices::Host, long  >,
+    TNL::Matrices::SparseMatrix< float,   RowMajorEllpack, TNL::Devices::Host, long  >,
+    TNL::Matrices::SparseMatrix< double,  RowMajorEllpack, TNL::Devices::Host, long  >
 #ifdef HAVE_CUDA
-   ,TNL::Matrices::SparseMatrix< int,     TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, short >,
-    TNL::Matrices::SparseMatrix< long,    TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, short >,
-    TNL::Matrices::SparseMatrix< float,   TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, short >,
-    TNL::Matrices::SparseMatrix< double,  TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, short >,
-    TNL::Matrices::SparseMatrix< int,     TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, int   >,
-    TNL::Matrices::SparseMatrix< long,    TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, int   >,
-    TNL::Matrices::SparseMatrix< float,   TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, int   >,
-    TNL::Matrices::SparseMatrix< double,  TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, int   >,
-    TNL::Matrices::SparseMatrix< int,     TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, long  >,
-    TNL::Matrices::SparseMatrix< long,    TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, long  >,
-    TNL::Matrices::SparseMatrix< float,   TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, long  >,
-    TNL::Matrices::SparseMatrix< double,  TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, long  >
+   ,TNL::Matrices::SparseMatrix< int,     ColumnMajorEllpack, TNL::Devices::Cuda, short >,
+    TNL::Matrices::SparseMatrix< long,    ColumnMajorEllpack, TNL::Devices::Cuda, short >,
+    TNL::Matrices::SparseMatrix< float,   ColumnMajorEllpack, TNL::Devices::Cuda, short >,
+    TNL::Matrices::SparseMatrix< double,  ColumnMajorEllpack, TNL::Devices::Cuda, short >,
+    TNL::Matrices::SparseMatrix< int,     ColumnMajorEllpack, TNL::Devices::Cuda, int   >,
+    TNL::Matrices::SparseMatrix< long,    ColumnMajorEllpack, TNL::Devices::Cuda, int   >,
+    TNL::Matrices::SparseMatrix< float,   ColumnMajorEllpack, TNL::Devices::Cuda, int   >,
+    TNL::Matrices::SparseMatrix< double,  ColumnMajorEllpack, TNL::Devices::Cuda, int   >,
+    TNL::Matrices::SparseMatrix< int,     ColumnMajorEllpack, TNL::Devices::Cuda, long  >,
+    TNL::Matrices::SparseMatrix< long,    ColumnMajorEllpack, TNL::Devices::Cuda, long  >,
+    TNL::Matrices::SparseMatrix< float,   ColumnMajorEllpack, TNL::Devices::Cuda, long  >,
+    TNL::Matrices::SparseMatrix< double,  ColumnMajorEllpack, TNL::Devices::Cuda, long  >
 #endif
 >;
 
-- 
GitLab


From 2a37be3b4c0d2af5b8d82dd930285cbac8f29af2 Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Thu, 5 Dec 2019 20:29:06 +0100
Subject: [PATCH 013/179] Implementing SlicedEllpack segments.

---
 src/TNL/Containers/Segments/Ellpack.h         |   2 +-
 src/TNL/Containers/Segments/Ellpack.hpp       |  18 ++
 src/TNL/Containers/Segments/SlicedEllpack.h   | 102 +++++++
 src/TNL/Containers/Segments/SlicedEllpack.hpp | 270 ++++++++++++++++++
 .../Matrices/SparseMatrixTest_CSR_segments.h  |   2 +-
 .../SparseMatrixTest_Ellpack_segments.h       |   2 +-
 ...parseMatrixTest_SlicedEllpack_segments.cpp |   1 +
 ...SparseMatrixTest_SlicedEllpack_segments.cu |   1 +
 .../SparseMatrixTest_SlicedEllpack_segments.h | 152 ++++++++++
 9 files changed, 547 insertions(+), 3 deletions(-)
 create mode 100644 src/TNL/Containers/Segments/SlicedEllpack.h
 create mode 100644 src/TNL/Containers/Segments/SlicedEllpack.hpp
 create mode 100644 src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.cpp
 create mode 100644 src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.cu
 create mode 100644 src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h

diff --git a/src/TNL/Containers/Segments/Ellpack.h b/src/TNL/Containers/Segments/Ellpack.h
index dc1a717b3..d99ffe336 100644
--- a/src/TNL/Containers/Segments/Ellpack.h
+++ b/src/TNL/Containers/Segments/Ellpack.h
@@ -26,7 +26,6 @@ class Ellpack
 
       using DeviceType = Device;
       using IndexType = Index;
-      using OffsetsHolder = Containers::Vector< IndexType, DeviceType, IndexType >;
       static constexpr int getAlignment() { return Alignment; }
       static constexpr bool getRowMajorOrder() { return RowMajorOrder; }
 
@@ -44,6 +43,7 @@ class Ellpack
       template< typename SizesHolder = OffsetsHolder >
       void setSizes( const SizesHolder& sizes );
 
+      void setSizes( const IndexType segmentsCount, const IndexType segmentSize );
       /**
        * \brief Number segments.
        */
diff --git a/src/TNL/Containers/Segments/Ellpack.hpp b/src/TNL/Containers/Segments/Ellpack.hpp
index 8a23693ec..833b162eb 100644
--- a/src/TNL/Containers/Segments/Ellpack.hpp
+++ b/src/TNL/Containers/Segments/Ellpack.hpp
@@ -66,6 +66,24 @@ setSizes( const SizesHolder& sizes )
       this->alignedSize = roundUpDivision( size, this->getAlignment() ) * this->getAlignment();
 }
 
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int Alignment >
+   template< typename SizesHolder >
+void
+Ellpack< Device, Index, RowMajorOrder, Alignment >::
+setSizes( const IndexType segmentsCount, const IndexType segmentSize );
+{
+   this->segmentSize = segmentSize;
+   this->size = segmentsCount;
+   if( RowMajorOrder )
+      this->alignedSize = this->size;
+   else
+      this->alignedSize = roundUpDivision( size, this->getAlignment() ) * this->getAlignment();
+}
+
+
 template< typename Device,
           typename Index,
           bool RowMajorOrder,
diff --git a/src/TNL/Containers/Segments/SlicedEllpack.h b/src/TNL/Containers/Segments/SlicedEllpack.h
new file mode 100644
index 000000000..a5ef9d121
--- /dev/null
+++ b/src/TNL/Containers/Segments/SlicedEllpack.h
@@ -0,0 +1,102 @@
+/***************************************************************************
+                          SlicedEllpack.h -  description
+                             -------------------
+    begin                : Dec 4, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Containers/Vector.h>
+
+namespace TNL {
+   namespace Containers {
+      namespace Segments {
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder = std::is_same< Device, Devices::Host >::value,
+          int SliceSize = 32 >
+class SlicedEllpack
+{
+   public:
+
+      using DeviceType = Device;
+      using IndexType = Index;
+      using OffsetsHolder = Containers::Vector< IndexType, DeviceType, IndexType >;
+      static constexpr int getSliceSize() { return SliceSize; }
+      static constexpr bool getRowMajorOrder() { return RowMajorOrder; }
+
+      SlicedEllpack();
+
+      SlicedEllpack( const Vector< IndexType, DeviceType, IndexType >& sizes );
+
+      SlicedEllpack( const SlicedEllpack& segments );
+
+      SlicedEllpack( const SlicedEllpack&& segments );
+
+      /**
+       * \brief Set sizes of particular segments.
+       */
+      template< typename SizesHolder = OffsetsHolder >
+      void setSizes( const SizesHolder& sizes );
+
+      /**
+       * \brief Number segments.
+       */
+      __cuda_callable__
+      IndexType getSize() const;
+
+      __cuda_callable__
+      IndexType getSegmentSize( const IndexType segmentIdx ) const;
+
+      __cuda_callable__
+      IndexType getStorageSize() const;
+
+      __cuda_callable__
+      IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const;
+
+      __cuda_callable__
+      void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const;
+
+      /***
+       * \brief Go over all segments and for each segment element call
+       * function 'f' with arguments 'args'. The return type of 'f' is bool.
+       * When its true, the for-loop continues. Once 'f' returns false, the for-loop
+       * is terminated.
+       */
+      template< typename Function, typename... Args >
+      void forSegments( IndexType first, IndexType last, Function& f, Args... args ) const;
+
+      template< typename Function, typename... Args >
+      void forAll( Function& f, Args... args ) const;
+
+
+      /***
+       * \brief Go over all segments and perform a reduction in each of them.
+       */
+      template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
+      void segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const;
+
+      template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
+      void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const;
+
+      void save( File& file ) const;
+
+      void load( File& file );
+
+   protected:
+
+      IndexType size;
+
+      OffsetHolder sliceOffsets;
+};
+
+      } // namespace Segements
+   }  // namespace Conatiners
+} // namespace TNL
+
+#include <TNL/Containers/Segments/SlicedEllpack.hpp>
diff --git a/src/TNL/Containers/Segments/SlicedEllpack.hpp b/src/TNL/Containers/Segments/SlicedEllpack.hpp
new file mode 100644
index 000000000..60d2059fe
--- /dev/null
+++ b/src/TNL/Containers/Segments/SlicedEllpack.hpp
@@ -0,0 +1,270 @@
+/***************************************************************************
+                          SlicedEllpack.hpp -  description
+                             -------------------
+    begin                : Dec 4, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Containers/Vector.h>
+#include <TNL/Algorithms/ParallelFor.h>
+#include <TNL/Containers/Segments/SlicedEllpack.h>
+#include <TNL/Containers/Segments/Ellpack.h>
+
+namespace TNL {
+   namespace Containers {
+      namespace Segments {
+
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int SliceSize >
+SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >::
+SlicedEllpack()
+   : size( 0 )
+{
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int SliceSize >
+SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >::
+SlicedEllpack( const SlicedEllpack& slicedEllpack )
+   : size( slicedEllpack.size ), sliceOffsets( slicedEllpack.sliceOffsets )
+{
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int SliceSize >
+SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >::
+SlicedEllpack( const SlicedEllpack&& slicedEllpack )
+   : size( slicedEllpack.size ), sliceOffsets( slicedEllpack.sliceOffsets )
+{
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int SliceSize >
+   template< typename SizesHolder >
+void
+SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >::
+setSizes( const SizesHolder& sizes )
+{
+   this->size = sizes.getSize();
+   const IndexType segmentsCount = roundUpDivision( this->size, getSliceSize() );
+   this->segmentOffsets.setSize( segmentsCount + 1 );
+   Ellpack< DeviceType, IndexType, true > ellpack;
+   ellpack.setSizes( segmentsCount, SliceSize );
+   ...
+
+
+
+
+
+   if( RowMajorOrder )
+      this->alignedSize = this->size;
+   else
+      this->alignedSize = roundUpDivision( size, this->getSliceSize() ) * this->getSliceSize();
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int SliceSize >
+__cuda_callable__
+Index
+SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >::
+getSize() const
+{
+   return this->size;
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int SliceSize >
+__cuda_callable__
+Index
+SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >::
+getSegmentSize( const IndexType segmentIdx ) const
+{
+   return this->segmentSize;
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int SliceSize >
+__cuda_callable__
+Index
+SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >::
+getStorageSize() const
+{
+   return this->alignedSize * this->segmentSize;
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int SliceSize >
+__cuda_callable__
+Index
+SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >::
+getGlobalIndex( const Index segmentIdx, const Index localIdx ) const
+{
+   if( RowMajorOrder )
+      return segmentIdx * this->segmentSize + localIdx;
+   else
+      return segmentIdx + this->alignedSize * localIdx;
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int SliceSize >
+__cuda_callable__
+void
+SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >::
+getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const
+{
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int SliceSize >
+   template< typename Function, typename... Args >
+void
+SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >::
+forSegments( IndexType first, IndexType last, Function& f, Args... args ) const
+{
+   const auto offsetsView = this->offsets.getView();
+   if( RowMajorOrder )
+   {
+      const IndexType segmentSize = this->segmentSize;
+      auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) {
+         const IndexType begin = i * segmentSize;
+         const IndexType end = begin + segmentSize;
+         for( IndexType j = begin; j < end; j++  )
+            if( ! f( i, j, args... ) )
+               break;
+      };
+      Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
+   }
+   else
+   {
+      const IndexType storageSize = this->getStorageSize();
+      const IndexType alignedSize = this->alignedSize;
+      auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) {
+         const IndexType begin = i;
+         const IndexType end = storageSize;
+         for( IndexType j = begin; j < end; j += alignedSize )
+            if( ! f( i, j, args... ) )
+               break;
+      };
+      Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
+   }
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int SliceSize >
+   template< typename Function, typename... Args >
+void
+SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >::
+forAll( Function& f, Args... args ) const
+{
+   this->forSegments( 0, this->getSize(), f, args... );
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int SliceSize >
+   template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
+void
+SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >::
+segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
+{
+   if( RowMajorOrder )
+   {
+      using RealType = decltype( fetch( IndexType(), IndexType() ) );
+      const IndexType segmentSize = this->segmentSize;
+      auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable {
+         const IndexType begin = i * segmentSize;
+         const IndexType end = begin + segmentSize;
+         RealType aux( zero );
+         for( IndexType j = begin; j < end; j++  )
+            reduction( aux, fetch( i, j, args... ) );
+         keeper( i, aux );
+      };
+      Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
+   }
+   else
+   {
+      using RealType = decltype( fetch( IndexType(), IndexType() ) );
+      const IndexType storageSize = this->getStorageSize();
+      const IndexType alignedSize = this->alignedSize;
+      auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable {
+         const IndexType begin = i;
+         const IndexType end = storageSize;
+         RealType aux( zero );
+         for( IndexType j = begin; j < end; j += alignedSize  )
+            reduction( aux, fetch( i, j, args... ) );
+         keeper( i, aux );
+      };
+      Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
+   }
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int SliceSize >
+   template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
+void
+SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >::
+allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
+{
+   this->segmentsReduction( 0, this->getSize(), fetch, reduction, keeper, zero, args... );
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int SliceSize >
+void
+SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >::
+save( File& file ) const
+{
+   file.save( &segmentSize );
+   file.save( &size );
+   file.save( &alignedSize );
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int SliceSize >
+void
+SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >::
+load( File& file )
+{
+   file.load( &segmentSize );
+   file.load( &size );
+   file.load( &alignedSize );
+}
+
+      } // namespace Segments
+   }  // namespace Conatiners
+} // namespace TNL
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h
index b53358469..bf4e452fa 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h
+++ b/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h
@@ -1,5 +1,5 @@
 /***************************************************************************
-                          SparseMatrixTest_CSR.h -  description
+                          SparseMatrixTest_CSR_segments.h -  description
                              -------------------
     begin                : Dec 2, 2019
     copyright            : (C) 2019 by Tomas Oberhuber et al.
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h
index c54aab948..edfe0bc28 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h
+++ b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h
@@ -1,5 +1,5 @@
 /***************************************************************************
-                          SparseMatrixTest_Ellpack.h -  description
+                          SparseMatrixTest_Ellpack_segments.h -  description
                              -------------------
     begin                : Dec 3, 2019
     copyright            : (C) 2019 by Tomas Oberhuber et al.
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.cpp b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.cpp
new file mode 100644
index 000000000..a88301100
--- /dev/null
+++ b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.cpp
@@ -0,0 +1 @@
+#include "SparseMatrixTest_SlicedEllpack_segments.h"
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.cu b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.cu
new file mode 100644
index 000000000..a88301100
--- /dev/null
+++ b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.cu
@@ -0,0 +1 @@
+#include "SparseMatrixTest_SlicedEllpack_segments.h"
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h
new file mode 100644
index 000000000..8d17b8be7
--- /dev/null
+++ b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h
@@ -0,0 +1,152 @@
+/***************************************************************************
+                          SparseMatrixTest_SlicedEllpack.h -  description
+                             -------------------
+    begin                : Dec 3, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include <TNL/Containers/Segments/SlicedEllpack.h>
+#include <TNL/Matrices/SparseMatrix.h>
+
+
+#include "SparseMatrixTest.hpp"
+#include <iostream>
+
+#ifdef HAVE_GTEST
+#include <gtest/gtest.h>
+
+// test fixture for typed tests
+template< typename Matrix >
+class SlicedEllpackMatrixTest : public ::testing::Test
+{
+protected:
+   using SlicedEllpackMatrixType = Matrix;
+};
+
+////
+// Row-major format is used for the host system
+template< typename Device, typename Index >
+using RowMajorSlicedEllpack = TNL::Containers::Segments::SlicedEllpack< Device, Index, true, 32 >;
+
+
+////
+// Column-major format is used for GPUs
+template< typename Device, typename Index >
+using ColumnMajorSlicedEllpack = TNL::Containers::Segments::SlicedEllpack< Device, Index, false, 32 >;
+
+// types for which MatrixTest is instantiated
+using SlicedEllpackMatrixTypes = ::testing::Types
+<
+    TNL::Matrices::SparseMatrix< int,     RowMajorSlicedEllpack, TNL::Devices::Host, short >,
+    TNL::Matrices::SparseMatrix< long,    RowMajorSlicedEllpack, TNL::Devices::Host, short >,
+    TNL::Matrices::SparseMatrix< float,   RowMajorSlicedEllpack, TNL::Devices::Host, short >,
+    TNL::Matrices::SparseMatrix< double,  RowMajorSlicedEllpack, TNL::Devices::Host, short >,
+    TNL::Matrices::SparseMatrix< int,     RowMajorSlicedEllpack, TNL::Devices::Host, int   >,
+    TNL::Matrices::SparseMatrix< long,    RowMajorSlicedEllpack, TNL::Devices::Host, int   >,
+    TNL::Matrices::SparseMatrix< float,   RowMajorSlicedEllpack, TNL::Devices::Host, int   >,
+    TNL::Matrices::SparseMatrix< double,  RowMajorSlicedEllpack, TNL::Devices::Host, int   >,
+    TNL::Matrices::SparseMatrix< int,     RowMajorSlicedEllpack, TNL::Devices::Host, long  >,
+    TNL::Matrices::SparseMatrix< long,    RowMajorSlicedEllpack, TNL::Devices::Host, long  >,
+    TNL::Matrices::SparseMatrix< float,   RowMajorSlicedEllpack, TNL::Devices::Host, long  >,
+    TNL::Matrices::SparseMatrix< double,  RowMajorSlicedEllpack, TNL::Devices::Host, long  >
+#ifdef HAVE_CUDA
+   ,TNL::Matrices::SparseMatrix< int,     ColumnMajorSlicedEllpack, TNL::Devices::Cuda, short >,
+    TNL::Matrices::SparseMatrix< long,    ColumnMajorSlicedEllpack, TNL::Devices::Cuda, short >,
+    TNL::Matrices::SparseMatrix< float,   ColumnMajorSlicedEllpack, TNL::Devices::Cuda, short >,
+    TNL::Matrices::SparseMatrix< double,  ColumnMajorSlicedEllpack, TNL::Devices::Cuda, short >,
+    TNL::Matrices::SparseMatrix< int,     ColumnMajorSlicedEllpack, TNL::Devices::Cuda, int   >,
+    TNL::Matrices::SparseMatrix< long,    ColumnMajorSlicedEllpack, TNL::Devices::Cuda, int   >,
+    TNL::Matrices::SparseMatrix< float,   ColumnMajorSlicedEllpack, TNL::Devices::Cuda, int   >,
+    TNL::Matrices::SparseMatrix< double,  ColumnMajorSlicedEllpack, TNL::Devices::Cuda, int   >,
+    TNL::Matrices::SparseMatrix< int,     ColumnMajorSlicedEllpack, TNL::Devices::Cuda, long  >,
+    TNL::Matrices::SparseMatrix< long,    ColumnMajorSlicedEllpack, TNL::Devices::Cuda, long  >,
+    TNL::Matrices::SparseMatrix< float,   ColumnMajorSlicedEllpack, TNL::Devices::Cuda, long  >,
+    TNL::Matrices::SparseMatrix< double,  ColumnMajorSlicedEllpack, TNL::Devices::Cuda, long  >
+#endif
+>;
+
+TYPED_TEST_SUITE( SlicedEllpackMatrixTest, SlicedEllpackMatrixTypes);
+
+TYPED_TEST( SlicedEllpackMatrixTest, setDimensionsTest )
+{
+    using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
+
+    test_SetDimensions< SlicedEllpackMatrixType >();
+}
+
+//TYPED_TEST( SlicedEllpackMatrixTest, setCompressedRowLengthsTest )
+//{
+////    using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
+//
+////    test_SetCompressedRowLengths< SlicedEllpackMatrixType >();
+//
+//    bool testRan = false;
+//    EXPECT_TRUE( testRan );
+//    std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n";
+//    std::cout << "      This test is dependent on the input format. \n";
+//    std::cout << "      Almost every format allocates elements per row differently.\n\n";
+//    std::cout << "\n    TODO: Finish implementation of getNonZeroRowLength (Only non-zero elements, not the number of allocated elements.)\n\n";
+//}
+
+TYPED_TEST( SlicedEllpackMatrixTest, setLikeTest )
+{
+    using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
+
+    test_SetLike< SlicedEllpackMatrixType, SlicedEllpackMatrixType >();
+}
+
+TYPED_TEST( SlicedEllpackMatrixTest, resetTest )
+{
+    using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
+
+    test_Reset< SlicedEllpackMatrixType >();
+}
+
+TYPED_TEST( SlicedEllpackMatrixTest, setElementTest )
+{
+    using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
+
+    test_SetElement< SlicedEllpackMatrixType >();
+}
+
+TYPED_TEST( SlicedEllpackMatrixTest, addElementTest )
+{
+    using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
+
+    test_AddElement< SlicedEllpackMatrixType >();
+}
+
+TYPED_TEST( SlicedEllpackMatrixTest, setRowTest )
+{
+    using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
+
+    test_SetRow< SlicedEllpackMatrixType >();
+}
+
+TYPED_TEST( SlicedEllpackMatrixTest, vectorProductTest )
+{
+    using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
+
+    test_VectorProduct< SlicedEllpackMatrixType >();
+}
+
+TYPED_TEST( SlicedEllpackMatrixTest, saveAndLoadTest )
+{
+    using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
+
+    test_SaveAndLoad< SlicedEllpackMatrixType >( "test_SparseMatrixTest_SlicedEllpack_segments" );
+}
+
+TYPED_TEST( SlicedEllpackMatrixTest, printTest )
+{
+    using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
+
+    test_Print< SlicedEllpackMatrixType >();
+}
+
+#endif
+
+#include "../main.h"
-- 
GitLab


From 47d413b3623fff9e2b733955bd55481a430d8d93 Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Fri, 6 Dec 2019 16:30:30 +0100
Subject: [PATCH 014/179] Added Segments unit tests.

---
 src/TNL/Containers/Segments/CSR.h             | 19 +++-
 src/TNL/Containers/Segments/CSR.hpp           | 28 +++++-
 src/TNL/Containers/Segments/Ellpack.h         | 15 ++-
 src/TNL/Containers/Segments/Ellpack.hpp       | 42 +++++++-
 src/TNL/Matrices/SparseMatrix.hpp             |  2 +-
 src/UnitTests/Containers/CMakeLists.txt       |  2 +
 .../Containers/Segments/CMakeLists.txt        | 52 ++++++++++
 .../Containers/Segments/SegmentsTest.hpp      | 95 +++++++++++++++++++
 .../Containers/Segments/SegmentsTest_CSR.cpp  |  1 +
 .../Containers/Segments/SegmentsTest_CSR.cu   |  1 +
 .../Containers/Segments/SegmentsTest_CSR.h    | 57 +++++++++++
 .../Segments/SegmentsTest_Ellpack.cpp         |  1 +
 .../Segments/SegmentsTest_Ellpack.cu          |  1 +
 .../Segments/SegmentsTest_Ellpack.h           | 65 +++++++++++++
 14 files changed, 364 insertions(+), 17 deletions(-)
 create mode 100644 src/UnitTests/Containers/Segments/CMakeLists.txt
 create mode 100644 src/UnitTests/Containers/Segments/SegmentsTest.hpp
 create mode 100644 src/UnitTests/Containers/Segments/SegmentsTest_CSR.cpp
 create mode 100644 src/UnitTests/Containers/Segments/SegmentsTest_CSR.cu
 create mode 100644 src/UnitTests/Containers/Segments/SegmentsTest_CSR.h
 create mode 100644 src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.cpp
 create mode 100644 src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.cu
 create mode 100644 src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.h

diff --git a/src/TNL/Containers/Segments/CSR.h b/src/TNL/Containers/Segments/CSR.h
index 52ca36e22..ecd1de983 100644
--- a/src/TNL/Containers/Segments/CSR.h
+++ b/src/TNL/Containers/Segments/CSR.h
@@ -25,10 +25,11 @@ class CSR
       using DeviceType = Device;
       using IndexType = Index;
       using OffsetsHolder = Containers::Vector< IndexType, DeviceType, IndexType >;
+      using SegmentsSizes = OffsetsHolder;
 
       CSR();
 
-      CSR( const Vector< IndexType, DeviceType, IndexType >& sizes );
+      CSR( const SegmentsSizes& sizes );
 
       CSR( const CSR& segments );
 
@@ -38,17 +39,29 @@ class CSR
        * \brief Set sizes of particular segments.
        */
       template< typename SizesHolder = OffsetsHolder >
-      void setSizes( const SizesHolder& sizes );
+      void setSegmentsSizes( const SizesHolder& sizes );
 
       /**
        * \brief Number segments.
        */
       __cuda_callable__
-      IndexType getSize() const;
+      IndexType getSegmentsCount() const;
 
+      /***
+       * \brief Returns size of the segment number \r segmentIdx
+       */
       __cuda_callable__
       IndexType getSegmentSize( const IndexType segmentIdx ) const;
 
+      /***
+       * \brief Returns number of elements managed by all segments.
+       */
+      __cuda_callable__
+      IndexType getSize() const;
+
+      /***
+       * \brief Returns number of elements that needs to be allocated.
+       */
       __cuda_callable__
       IndexType getStorageSize() const;
 
diff --git a/src/TNL/Containers/Segments/CSR.hpp b/src/TNL/Containers/Segments/CSR.hpp
index b40524e5e..677cd1b00 100644
--- a/src/TNL/Containers/Segments/CSR.hpp
+++ b/src/TNL/Containers/Segments/CSR.hpp
@@ -26,6 +26,14 @@ CSR()
 {
 }
 
+template< typename Device,
+          typename Index >
+CSR< Device, Index >::
+CSR( const SegmentsSizes& segmentsSizes )
+{
+   this->setSegmentsSizes( segmentsSizes );
+}
+
 template< typename Device,
           typename Index >
 CSR< Device, Index >::
@@ -46,7 +54,7 @@ template< typename Device,
    template< typename SizesHolder >
 void
 CSR< Device, Index >::
-setSizes( const SizesHolder& sizes )
+setSegmentsSizes( const SizesHolder& sizes )
 {
    this->offsets.setSize( sizes.getSize() + 1 );
    auto view = this->offsets.getView( 0, sizes.getSize() );
@@ -60,7 +68,7 @@ template< typename Device,
 __cuda_callable__
 Index
 CSR< Device, Index >::
-getSize() const
+getSegmentsCount() const
 {
    return this->offsets.getSize() - 1;
 }
@@ -83,6 +91,16 @@ getSegmentSize( const IndexType segmentIdx ) const
    return offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ];
 }
 
+template< typename Device,
+          typename Index >
+__cuda_callable__
+Index
+CSR< Device, Index >::
+getSize() const
+{
+   return this->getStorageSize();
+}
+
 template< typename Device,
           typename Index >
 __cuda_callable__
@@ -93,12 +111,12 @@ getStorageSize() const
    if( ! std::is_same< DeviceType, Devices::Host >::value )
    {
 #ifdef __CUDA_ARCH__
-      return offsets[ this->getSize() ];
+      return offsets[ this->getSegmentsCount() ];
 #else
-      return offsets.getElement( this->getSize() );
+      return offsets.getElement( this->getSegmentsCount() );
 #endif
    }
-   return offsets[ this->getSize() ];
+   return offsets[ this->getSegmentsCount() ];
 }
 
 template< typename Device,
diff --git a/src/TNL/Containers/Segments/Ellpack.h b/src/TNL/Containers/Segments/Ellpack.h
index d99ffe336..b08ad0f04 100644
--- a/src/TNL/Containers/Segments/Ellpack.h
+++ b/src/TNL/Containers/Segments/Ellpack.h
@@ -28,10 +28,14 @@ class Ellpack
       using IndexType = Index;
       static constexpr int getAlignment() { return Alignment; }
       static constexpr bool getRowMajorOrder() { return RowMajorOrder; }
+      using OffsetsHolder = Containers::Vector< IndexType, DeviceType, IndexType >;
+      using SegmentsSizes = OffsetsHolder;
 
       Ellpack();
 
-      Ellpack( const Vector< IndexType, DeviceType, IndexType >& sizes );
+      Ellpack( const SegmentsSizes& sizes );
+
+      Ellpack( const IndexType segmentsCount, const IndexType segmentSize );
 
       Ellpack( const Ellpack& segments );
 
@@ -41,18 +45,21 @@ class Ellpack
        * \brief Set sizes of particular segments.
        */
       template< typename SizesHolder = OffsetsHolder >
-      void setSizes( const SizesHolder& sizes );
+      void setSegmentsSizes( const SizesHolder& sizes );
 
-      void setSizes( const IndexType segmentsCount, const IndexType segmentSize );
+      void setSegmentsSizes( const IndexType segmentsCount, const IndexType segmentSize );
       /**
        * \brief Number segments.
        */
       __cuda_callable__
-      IndexType getSize() const;
+      IndexType getSegmentsCount() const;
 
       __cuda_callable__
       IndexType getSegmentSize( const IndexType segmentIdx ) const;
 
+      __cuda_callable__
+      IndexType getSize() const;
+
       __cuda_callable__
       IndexType getStorageSize() const;
 
diff --git a/src/TNL/Containers/Segments/Ellpack.hpp b/src/TNL/Containers/Segments/Ellpack.hpp
index 833b162eb..e855d0d9a 100644
--- a/src/TNL/Containers/Segments/Ellpack.hpp
+++ b/src/TNL/Containers/Segments/Ellpack.hpp
@@ -29,6 +29,28 @@ Ellpack()
 {
 }
 
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int Alignment >
+Ellpack< Device, Index, RowMajorOrder, Alignment >::
+Ellpack( const SegmentsSizes& segmentsSizes )
+   : segmentSize( 0 ), size( 0 ), alignedSize( 0 )
+{
+   this->setSegmentsSizes( segmentsSizes );
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int Alignment >
+Ellpack< Device, Index, RowMajorOrder, Alignment >::
+Ellpack( const IndexType segmentsCount, const IndexType segmentSize )
+   : segmentSize( 0 ), size( 0 ), alignedSize( 0 )
+{
+   this->setSegmentsSizes( segmentsCount, segmentSize );
+}
+
 template< typename Device,
           typename Index,
           bool RowMajorOrder,
@@ -56,7 +78,7 @@ template< typename Device,
    template< typename SizesHolder >
 void
 Ellpack< Device, Index, RowMajorOrder, Alignment >::
-setSizes( const SizesHolder& sizes )
+setSegmentsSizes( const SizesHolder& sizes )
 {
    this->segmentSize = max( sizes );
    this->size = sizes.getSize();
@@ -70,10 +92,9 @@ template< typename Device,
           typename Index,
           bool RowMajorOrder,
           int Alignment >
-   template< typename SizesHolder >
 void
 Ellpack< Device, Index, RowMajorOrder, Alignment >::
-setSizes( const IndexType segmentsCount, const IndexType segmentSize );
+setSegmentsSizes( const IndexType segmentsCount, const IndexType segmentSize )
 {
    this->segmentSize = segmentSize;
    this->size = segmentsCount;
@@ -91,7 +112,7 @@ template< typename Device,
 __cuda_callable__
 Index
 Ellpack< Device, Index, RowMajorOrder, Alignment >::
-getSize() const
+getSegmentsCount() const
 {
    return this->size;
 }
@@ -108,6 +129,19 @@ getSegmentSize( const IndexType segmentIdx ) const
    return this->segmentSize;
 }
 
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int Alignment >
+__cuda_callable__
+Index
+Ellpack< Device, Index, RowMajorOrder, Alignment >::
+getSize() const
+{
+   return this->size * this->segmentSize;
+}
+
+
 template< typename Device,
           typename Index,
           bool RowMajorOrder,
diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp
index 9bc8d7fb7..37f59c058 100644
--- a/src/TNL/Matrices/SparseMatrix.hpp
+++ b/src/TNL/Matrices/SparseMatrix.hpp
@@ -107,7 +107,7 @@ SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
 setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths )
 {
    TNL_ASSERT_EQ( rowLengths.getSize(), this->getRows(), "Number of matrix rows does not fit with rowLengths vector size." );
-   this->segments.setSizes( rowLengths );
+   this->segments.setSegmentsSizes( rowLengths );
    this->values.setSize( this->segments.getStorageSize() );
    this->values = ( RealType ) 0;
    this->columnIndexes.setSize( this->segments.getStorageSize() );
diff --git a/src/UnitTests/Containers/CMakeLists.txt b/src/UnitTests/Containers/CMakeLists.txt
index 9f27aaa86..227a86551 100644
--- a/src/UnitTests/Containers/CMakeLists.txt
+++ b/src/UnitTests/Containers/CMakeLists.txt
@@ -1,3 +1,5 @@
+ADD_SUBDIRECTORY( Segments )
+
 ADD_EXECUTABLE( ArrayTest ArrayTest.cpp )
 TARGET_COMPILE_OPTIONS( ArrayTest PRIVATE ${CXX_TESTS_FLAGS} )
 TARGET_LINK_LIBRARIES( ArrayTest ${GTEST_BOTH_LIBRARIES} )
diff --git a/src/UnitTests/Containers/Segments/CMakeLists.txt b/src/UnitTests/Containers/Segments/CMakeLists.txt
new file mode 100644
index 000000000..6304a4998
--- /dev/null
+++ b/src/UnitTests/Containers/Segments/CMakeLists.txt
@@ -0,0 +1,52 @@
+IF( BUILD_CUDA )
+#   CUDA_ADD_EXECUTABLE( SegmentsTest_AdEllpack SegmentsTest_AdEllpack.cu OPTIONS ${CXX_TESTS_FLAGS} )
+#   TARGET_LINK_LIBRARIES( SegmentsTest_AdEllpack ${GTEST_BOTH_LIBRARIES} )
+
+#   CUDA_ADD_EXECUTABLE( SegmentsTest_BiEllpack SegmentsTest_BiEllpack.cu OPTIONS ${CXX_TESTS_FLAGS} )
+#   TARGET_LINK_LIBRARIES( SegmentsTest_BiEllpack ${GTEST_BOTH_LIBRARIES} )
+
+#   CUDA_ADD_EXECUTABLE( SegmentsTest_ChunkedEllpack SegmentsTest_ChunkedEllpack.cu OPTIONS ${CXX_TESTS_FLAGS} )
+#   TARGET_LINK_LIBRARIES( SegmentsTest_ChunkedEllpack ${GTEST_BOTH_LIBRARIES} )
+
+   CUDA_ADD_EXECUTABLE( SegmentsTest_CSR SegmentsTest_CSR.cu OPTIONS ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( SegmentsTest_CSR ${GTEST_BOTH_LIBRARIES} )
+
+   CUDA_ADD_EXECUTABLE( SegmentsTest_Ellpack SegmentsTest_Ellpack.cu OPTIONS ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( SegmentsTest_Ellpack ${GTEST_BOTH_LIBRARIES} )
+
+#   CUDA_ADD_EXECUTABLE( SegmentsTest_SlicedEllpack SegmentsTest_SlicedEllpack.cu OPTIONS ${CXX_TESTS_FLAGS} )
+#   TARGET_LINK_LIBRARIES( SegmentsTest_SlicedEllpack ${GTEST_BOTH_LIBRARIES} )
+
+ELSE(  BUILD_CUDA )
+#   ADD_EXECUTABLE( SegmentsTest_AdEllpack SegmentsTest_AdEllpack.cpp )
+#   TARGET_COMPILE_OPTIONS( SegmentsTest_AdEllpack PRIVATE ${CXX_TESTS_FLAGS} )
+#   TARGET_LINK_LIBRARIES( SegmentsTest_AdEllpack ${GTEST_BOTH_LIBRARIES} )
+
+#   ADD_EXECUTABLE( SegmentsTest_BiEllpack SegmentsTest_BiEllpack.cpp )
+#   TARGET_COMPILE_OPTIONS( SegmentsTest_BiEllpack PRIVATE ${CXX_TESTS_FLAGS} )
+#   TARGET_LINK_LIBRARIES( SegmentsTest_BiEllpack ${GTEST_BOTH_LIBRARIES} )
+
+#   ADD_EXECUTABLE( SegmentsTest_ChunkedEllpack SegmentsTest_ChunkedEllpack.cpp )
+#   TARGET_COMPILE_OPTIONS( SegmentsTest_ChunkedEllpack PRIVATE ${CXX_TESTS_FLAGS} )
+#   TARGET_LINK_LIBRARIES( SegmentsTest_ChunkedEllpack ${GTEST_BOTH_LIBRARIES} )
+
+   ADD_EXECUTABLE( SegmentsTest_CSR SegmentsTest_CSR.cpp )
+   TARGET_COMPILE_OPTIONS( SegmentsTest_CSR PRIVATE ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( SegmentsTest_CSR ${GTEST_BOTH_LIBRARIES} )
+
+   ADD_EXECUTABLE( SegmentsTest_Ellpack SegmentsTest_Ellpack.cpp )
+   TARGET_COMPILE_OPTIONS( SegmentsTest_Ellpack PRIVATE ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( SegmentsTest_Ellpack ${GTEST_BOTH_LIBRARIES} )
+
+#   ADD_EXECUTABLE( SegmentsTest_SlicedEllpack SegmentsTest_SlicedEllpack.cpp )
+#   TARGET_COMPILE_OPTIONS( SegmentsTest_SlicedEllpack PRIVATE ${CXX_TESTS_FLAGS} )
+#   TARGET_LINK_LIBRARIES( SegmentsTest_SlicedEllpack ${GTEST_BOTH_LIBRARIES} )
+ENDIF( BUILD_CUDA )
+
+
+#ADD_TEST( SegmentsTest_BiEllpack ${EXECUTABLE_OUTPUT_PATH}/SegmentsTest_BiEllpack${CMAKE_EXECUTABLE_SUFFIX} )
+#ADD_TEST( SegmentsTest_ChunkedEllpack ${EXECUTABLE_OUTPUT_PATH}/SegmentsTest_ChunkedEllpack${CMAKE_EXECUTABLE_SUFFIX} )
+ADD_TEST( SegmentsTest_CSR ${EXECUTABLE_OUTPUT_PATH}/SegmentsTest_CSR${CMAKE_EXECUTABLE_SUFFIX} )
+ADD_TEST( SegmentsTest_Ellpack ${EXECUTABLE_OUTPUT_PATH}/SegmentsTest_Ellpack${CMAKE_EXECUTABLE_SUFFIX} )
+#ADD_TEST( SegmentsTest_SlicedEllpack ${EXECUTABLE_OUTPUT_PATH}/SegmentsTest_SlicedEllpack${CMAKE_EXECUTABLE_SUFFIX} )
+
diff --git a/src/UnitTests/Containers/Segments/SegmentsTest.hpp b/src/UnitTests/Containers/Segments/SegmentsTest.hpp
new file mode 100644
index 000000000..9aa7fb94f
--- /dev/null
+++ b/src/UnitTests/Containers/Segments/SegmentsTest.hpp
@@ -0,0 +1,95 @@
+/***************************************************************************
+                          SegmentsTest.hpp -  description
+                             -------------------
+    begin                : Dec 6, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include <TNL/Containers/Vector.h>
+#include <TNL/Containers/VectorView.h>
+#include <TNL/Math.h>
+#include <iostream>
+
+#ifdef HAVE_GTEST
+#include <gtest/gtest.h>
+
+template< typename Segments >
+void test_SetSegmentsSizes_EqualSizes()
+{
+   using DeviceType = typename Segments::DeviceType;
+   using IndexType = typename Segments::IndexType;
+
+   const IndexType segmentsCount = 20;
+   const IndexType segmentSize = 5;
+   TNL::Containers::Vector< IndexType, DeviceType, IndexType > segmentsSizes( segmentsCount );
+   segmentsSizes = 5;//segmentSize;
+
+   Segments segments( segmentsSizes );
+
+   EXPECT_EQ( segments.getSegmentsCount(), segmentsCount );
+   EXPECT_EQ( segments.getSize(), segmentsCount * segmentSize );
+   EXPECT_LE( segments.getSize(), segments.getStorageSize() );
+
+   for( IndexType i = 0; i < segmentsCount; i++ )
+      EXPECT_EQ( segments.getSegmentSize( i ), segmentSize );
+
+   Segments segments2( segments );
+   EXPECT_EQ( segments2.getSegmentsCount(), segmentsCount );
+   EXPECT_EQ( segments2.getSize(), segmentsCount * segmentSize );
+   EXPECT_LE( segments2.getSize(), segments2.getStorageSize() );
+
+   for( IndexType i = 0; i < segmentsCount; i++ )
+      EXPECT_EQ( segments2.getSegmentSize( i ), segmentSize );
+
+   Segments segments3;
+   segments3.setSegmentsSizes( segmentsSizes );
+
+   EXPECT_EQ( segments3.getSegmentsCount(), segmentsCount );
+   EXPECT_EQ( segments3.getSize(), segmentsCount * segmentSize );
+   EXPECT_LE( segments3.getSize(), segments3.getStorageSize() );
+
+   for( IndexType i = 0; i < segmentsCount; i++ )
+      EXPECT_EQ( segments3.getSegmentSize( i ), segmentSize );
+}
+
+template< typename Segments >
+void test_SetSegmentsSizes_EqualSizes_EllpackOnly()
+{
+   using DeviceType = typename Segments::DeviceType;
+   using IndexType = typename Segments::IndexType;
+
+   const IndexType segmentsCount = 20;
+   const IndexType segmentSize = 5;
+
+   Segments segments( segmentsCount, segmentSize );
+
+   EXPECT_EQ( segments.getSegmentsCount(), segmentsCount );
+   EXPECT_EQ( segments.getSize(), segmentsCount * segmentSize );
+   EXPECT_LE( segments.getSize(), segments.getStorageSize() );
+
+   for( IndexType i = 0; i < segmentsCount; i++ )
+      EXPECT_EQ( segments.getSegmentSize( i ), segmentSize );
+
+   Segments segments2( segments );
+   EXPECT_EQ( segments2.getSegmentsCount(), segmentsCount );
+   EXPECT_EQ( segments2.getSize(), segmentsCount * segmentSize );
+   EXPECT_LE( segments2.getSize(), segments2.getStorageSize() );
+
+   for( IndexType i = 0; i < segmentsCount; i++ )
+      EXPECT_EQ( segments2.getSegmentSize( i ), segmentSize );
+
+   Segments segments3;
+   segments3.setSegmentsSizes( segmentsCount, segmentSize );
+
+   EXPECT_EQ( segments3.getSegmentsCount(), segmentsCount );
+   EXPECT_EQ( segments3.getSize(), segmentsCount * segmentSize );
+   EXPECT_LE( segments3.getSize(), segments3.getStorageSize() );
+
+   for( IndexType i = 0; i < segmentsCount; i++ )
+      EXPECT_EQ( segments3.getSegmentSize( i ), segmentSize );
+}
+
+#endif
diff --git a/src/UnitTests/Containers/Segments/SegmentsTest_CSR.cpp b/src/UnitTests/Containers/Segments/SegmentsTest_CSR.cpp
new file mode 100644
index 000000000..02edac332
--- /dev/null
+++ b/src/UnitTests/Containers/Segments/SegmentsTest_CSR.cpp
@@ -0,0 +1 @@
+#include "SegmentsTest_CSR.h"
diff --git a/src/UnitTests/Containers/Segments/SegmentsTest_CSR.cu b/src/UnitTests/Containers/Segments/SegmentsTest_CSR.cu
new file mode 100644
index 000000000..02edac332
--- /dev/null
+++ b/src/UnitTests/Containers/Segments/SegmentsTest_CSR.cu
@@ -0,0 +1 @@
+#include "SegmentsTest_CSR.h"
diff --git a/src/UnitTests/Containers/Segments/SegmentsTest_CSR.h b/src/UnitTests/Containers/Segments/SegmentsTest_CSR.h
new file mode 100644
index 000000000..e92b7c738
--- /dev/null
+++ b/src/UnitTests/Containers/Segments/SegmentsTest_CSR.h
@@ -0,0 +1,57 @@
+/***************************************************************************
+                          SegmentsTest_CSR.h -  description
+                             -------------------
+    begin                : Nov 2, 2018
+    copyright            : (C) 2018 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include <TNL/Containers/Segments/CSR.h>
+
+#include "SegmentsTest.hpp"
+#include <iostream>
+
+#ifdef HAVE_GTEST
+#include <gtest/gtest.h>
+
+// test fixture for typed tests
+template< typename Segments >
+class CSRSegmentsTest : public ::testing::Test
+{
+protected:
+   using CSRSegmentsType = Segments;
+};
+
+// types for which MatrixTest is instantiated
+using CSRSegmentsTypes = ::testing::Types
+<
+    TNL::Containers::Segments::CSR< TNL::Devices::Host, int    >,
+    TNL::Containers::Segments::CSR< TNL::Devices::Host, long   >,
+    TNL::Containers::Segments::CSR< TNL::Devices::Host, int    >,
+    TNL::Containers::Segments::CSR< TNL::Devices::Host, long   >,
+    TNL::Containers::Segments::CSR< TNL::Devices::Host, int    >,
+    TNL::Containers::Segments::CSR< TNL::Devices::Host, long   >
+#ifdef HAVE_CUDA
+   ,TNL::Containers::Segments::CSR< TNL::Devices::Cuda, int    >,
+    TNL::Containers::Segments::CSR< TNL::Devices::Cuda, long   >,
+    TNL::Containers::Segments::CSR< TNL::Devices::Cuda, int    >,
+    TNL::Containers::Segments::CSR< TNL::Devices::Cuda, long   >,
+    TNL::Containers::Segments::CSR< TNL::Devices::Cuda, int    >,
+    TNL::Containers::Segments::CSR< TNL::Devices::Cuda, long   >,
+#endif
+>;
+
+TYPED_TEST_SUITE( CSRSegmentsTest, CSRSegmentsTypes );
+
+TYPED_TEST( CSRSegmentsTest, setSegmentsSizes_EqualSizes )
+{
+    using CSRSegmentsType = typename TestFixture::CSRSegmentsType;
+
+    test_SetSegmentsSizes_EqualSizes< CSRSegmentsType >();
+}
+
+#endif
+
+#include "../../main.h"
diff --git a/src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.cpp b/src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.cpp
new file mode 100644
index 000000000..120a25103
--- /dev/null
+++ b/src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.cpp
@@ -0,0 +1 @@
+#include "SegmentsTest_Ellpack.h"
diff --git a/src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.cu b/src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.cu
new file mode 100644
index 000000000..120a25103
--- /dev/null
+++ b/src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.cu
@@ -0,0 +1 @@
+#include "SegmentsTest_Ellpack.h"
diff --git a/src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.h b/src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.h
new file mode 100644
index 000000000..d484fd27d
--- /dev/null
+++ b/src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.h
@@ -0,0 +1,65 @@
+/***************************************************************************
+                          SegmentsTest_Ellpack.h -  description
+                             -------------------
+    begin                : Dec 6, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include <TNL/Containers/Segments/Ellpack.h>
+
+#include "SegmentsTest.hpp"
+#include <iostream>
+
+#ifdef HAVE_GTEST
+#include <gtest/gtest.h>
+
+// test fixture for typed tests
+template< typename Segments >
+class EllpackSegmentsTest : public ::testing::Test
+{
+protected:
+   using EllpackSegmentsType = Segments;
+};
+
+// types for which MatrixTest is instantiated
+using EllpackSegmentsTypes = ::testing::Types
+<
+    TNL::Containers::Segments::Ellpack< TNL::Devices::Host, int    >,
+    TNL::Containers::Segments::Ellpack< TNL::Devices::Host, long   >,
+    TNL::Containers::Segments::Ellpack< TNL::Devices::Host, int    >,
+    TNL::Containers::Segments::Ellpack< TNL::Devices::Host, long   >,
+    TNL::Containers::Segments::Ellpack< TNL::Devices::Host, int    >,
+    TNL::Containers::Segments::Ellpack< TNL::Devices::Host, long   >
+#ifdef HAVE_CUDA
+   ,TNL::Containers::Segments::Ellpack< TNL::Devices::Cuda, int    >,
+    TNL::Containers::Segments::Ellpack< TNL::Devices::Cuda, long   >,
+    TNL::Containers::Segments::Ellpack< TNL::Devices::Cuda, int    >,
+    TNL::Containers::Segments::Ellpack< TNL::Devices::Cuda, long   >,
+    TNL::Containers::Segments::Ellpack< TNL::Devices::Cuda, int    >,
+    TNL::Containers::Segments::Ellpack< TNL::Devices::Cuda, long   >,
+#endif
+>;
+
+TYPED_TEST_SUITE( EllpackSegmentsTest, EllpackSegmentsTypes );
+
+TYPED_TEST( EllpackSegmentsTest, setSegmentsSizes_EqualSizes )
+{
+    using EllpackSegmentsType = typename TestFixture::EllpackSegmentsType;
+
+    test_SetSegmentsSizes_EqualSizes< EllpackSegmentsType >();
+}
+
+TYPED_TEST( EllpackSegmentsTest, setSegmentsSizes_EqualSizes_EllpackOnly )
+{
+    using EllpackSegmentsType = typename TestFixture::EllpackSegmentsType;
+
+    test_SetSegmentsSizes_EqualSizes_EllpackOnly< EllpackSegmentsType >();
+}
+
+
+#endif
+
+#include "../../main.h"
-- 
GitLab


From 8bdceeae02a439a915709675a801b1e60bc6f887 Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Fri, 6 Dec 2019 23:13:52 +0100
Subject: [PATCH 015/179] Implementing unit tests for Segments.

---
 .../Containers/Segments/SegmentsTest.hpp      | 39 ++++++++++++++++++-
 .../Segments/SegmentsTest_Ellpack.h           |  6 +++
 2 files changed, 44 insertions(+), 1 deletion(-)

diff --git a/src/UnitTests/Containers/Segments/SegmentsTest.hpp b/src/UnitTests/Containers/Segments/SegmentsTest.hpp
index 9aa7fb94f..19fff24fa 100644
--- a/src/UnitTests/Containers/Segments/SegmentsTest.hpp
+++ b/src/UnitTests/Containers/Segments/SegmentsTest.hpp
@@ -25,7 +25,7 @@ void test_SetSegmentsSizes_EqualSizes()
    const IndexType segmentsCount = 20;
    const IndexType segmentSize = 5;
    TNL::Containers::Vector< IndexType, DeviceType, IndexType > segmentsSizes( segmentsCount );
-   segmentsSizes = 5;//segmentSize;
+   segmentsSizes = segmentSize;
 
    Segments segments( segmentsSizes );
 
@@ -92,4 +92,41 @@ void test_SetSegmentsSizes_EqualSizes_EllpackOnly()
       EXPECT_EQ( segments3.getSegmentSize( i ), segmentSize );
 }
 
+template< typename Segments >
+void test_GetMaxInSegments()
+{
+   using DeviceType = typename Segments::DeviceType;
+   using IndexType = typename Segments::IndexType;
+
+   const IndexType segmentsCount = 20;
+   const IndexType segmentSize = 5;
+   const IndexType size = segmentsCount * segmentSize;
+
+   Segments segments( segmentsCount, segmentSize );
+   TNL::Containers::Vector< IndexType, DeviceType, IndexType > segmentsSizes( segmentsCount );
+   segmentsSizes = segmentSize;
+
+   Segments segments( segmentsSizes );
+
+   TNL::Containers::Vector< IndexType, DeviceType, IndexType > v( size );
+
+   for( IndexType i = 0; i < size; i++ )
+      v.setElement( i, i );
+
+   TNL::Containers::Vector< IndexType, DeviceType, IndexType >result( segmentsCount );
+
+   const auto v_view = v.getConstView();
+   auto result_view = result.getView();
+   auto fetch = [=] __cuda_callable__ ( IndexType i ) -> IndexType {
+      return v_view[ i ];
+   }
+   auto reduce = [] __cuda_callable__ ( IndexType& a, const IndexType b ) {
+      a = TNL::max( a, b );
+   }
+   auto keep = [=] __cuda_callable__ ( IndexType& i, const IndexType a ) mutable {
+      result_view[ i ] = a;
+   }
+   segments.allReduction( fetch, reduction, keep, std::numeric_limits< ResultType >::min() );
+}
+
 #endif
diff --git a/src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.h b/src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.h
index d484fd27d..510fa8738 100644
--- a/src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.h
+++ b/src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.h
@@ -59,6 +59,12 @@ TYPED_TEST( EllpackSegmentsTest, setSegmentsSizes_EqualSizes_EllpackOnly )
     test_SetSegmentsSizes_EqualSizes_EllpackOnly< EllpackSegmentsType >();
 }
 
+TYPED_TEST( EllpackSegmentsTest, getMaxInSegments )
+{
+    using EllpackSegmentsType = typename TestFixture::EllpackSegmentsType;
+
+    test_GetMaxInSegments< EllpackSegmentsType >();
+}
 
 #endif
 
-- 
GitLab


From c8c5fc096930d387d0ce3a8b654a3db19f363d83 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Sat, 7 Dec 2019 11:58:37 +0100
Subject: [PATCH 016/179] Added test for allReduction in Segments.

---
 src/TNL/Containers/Segments/CSR.hpp           |  2 +-
 src/TNL/Containers/Segments/Ellpack.hpp       |  2 +-
 .../Containers/Segments/SegmentsTest.hpp      | 29 +++++++++++--------
 .../Containers/Segments/SegmentsTest_CSR.h    |  7 +++++
 .../Segments/SegmentsTest_Ellpack.h           |  4 +--
 5 files changed, 28 insertions(+), 16 deletions(-)

diff --git a/src/TNL/Containers/Segments/CSR.hpp b/src/TNL/Containers/Segments/CSR.hpp
index 677cd1b00..486149e04 100644
--- a/src/TNL/Containers/Segments/CSR.hpp
+++ b/src/TNL/Containers/Segments/CSR.hpp
@@ -201,7 +201,7 @@ void
 CSR< Device, Index >::
 allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
 {
-   this->segmentsReduction( 0, this->getSize(), fetch, reduction, keeper, zero, args... );
+   this->segmentsReduction( 0, this->getSegmentsCount(), fetch, reduction, keeper, zero, args... );
 }
 
 template< typename Device,
diff --git a/src/TNL/Containers/Segments/Ellpack.hpp b/src/TNL/Containers/Segments/Ellpack.hpp
index e855d0d9a..034b0820e 100644
--- a/src/TNL/Containers/Segments/Ellpack.hpp
+++ b/src/TNL/Containers/Segments/Ellpack.hpp
@@ -278,7 +278,7 @@ void
 Ellpack< Device, Index, RowMajorOrder, Alignment >::
 allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
 {
-   this->segmentsReduction( 0, this->getSize(), fetch, reduction, keeper, zero, args... );
+   this->segmentsReduction( 0, this->getSegmentsCount(), fetch, reduction, keeper, zero, args... );
 }
 
 template< typename Device,
diff --git a/src/UnitTests/Containers/Segments/SegmentsTest.hpp b/src/UnitTests/Containers/Segments/SegmentsTest.hpp
index 19fff24fa..484b92eb4 100644
--- a/src/UnitTests/Containers/Segments/SegmentsTest.hpp
+++ b/src/UnitTests/Containers/Segments/SegmentsTest.hpp
@@ -93,7 +93,7 @@ void test_SetSegmentsSizes_EqualSizes_EllpackOnly()
 }
 
 template< typename Segments >
-void test_GetMaxInSegments()
+void test_AllReduction_MaximumInSegments()
 {
    using DeviceType = typename Segments::DeviceType;
    using IndexType = typename Segments::IndexType;
@@ -102,31 +102,36 @@ void test_GetMaxInSegments()
    const IndexType segmentSize = 5;
    const IndexType size = segmentsCount * segmentSize;
 
-   Segments segments( segmentsCount, segmentSize );
    TNL::Containers::Vector< IndexType, DeviceType, IndexType > segmentsSizes( segmentsCount );
    segmentsSizes = segmentSize;
 
    Segments segments( segmentsSizes );
 
-   TNL::Containers::Vector< IndexType, DeviceType, IndexType > v( size );
+   TNL::Containers::Vector< IndexType, DeviceType, IndexType > v( segments.getStorageSize() );
 
-   for( IndexType i = 0; i < size; i++ )
-      v.setElement( i, i );
+   IndexType k( 1 );
+   for( IndexType i = 0; i < segmentsCount; i++ )
+      for( IndexType j = 0; j < segmentSize; j++ )
+         v.setElement( segments.getGlobalIndex( i, j ), k++ );
 
    TNL::Containers::Vector< IndexType, DeviceType, IndexType >result( segmentsCount );
 
    const auto v_view = v.getConstView();
    auto result_view = result.getView();
-   auto fetch = [=] __cuda_callable__ ( IndexType i ) -> IndexType {
-      return v_view[ i ];
-   }
+   auto fetch = [=] __cuda_callable__ ( IndexType segmentIdx, IndexType globalIdx ) -> IndexType {
+      return v_view[ globalIdx ];
+   };
    auto reduce = [] __cuda_callable__ ( IndexType& a, const IndexType b ) {
       a = TNL::max( a, b );
-   }
-   auto keep = [=] __cuda_callable__ ( IndexType& i, const IndexType a ) mutable {
+   };
+   auto keep = [=] __cuda_callable__ ( const IndexType i, const IndexType a ) mutable {
       result_view[ i ] = a;
-   }
-   segments.allReduction( fetch, reduction, keep, std::numeric_limits< ResultType >::min() );
+   };
+   segments.allReduction( fetch, reduce, keep, std::numeric_limits< IndexType >::min() );
+
+   std::cerr << result << std::endl;
+   for( IndexType i = 0; i < segmentsCount; i++ )
+      EXPECT_EQ( result.getElement( i ), ( i + 1 ) * segmentSize );
 }
 
 #endif
diff --git a/src/UnitTests/Containers/Segments/SegmentsTest_CSR.h b/src/UnitTests/Containers/Segments/SegmentsTest_CSR.h
index e92b7c738..81d4e9ff3 100644
--- a/src/UnitTests/Containers/Segments/SegmentsTest_CSR.h
+++ b/src/UnitTests/Containers/Segments/SegmentsTest_CSR.h
@@ -52,6 +52,13 @@ TYPED_TEST( CSRSegmentsTest, setSegmentsSizes_EqualSizes )
     test_SetSegmentsSizes_EqualSizes< CSRSegmentsType >();
 }
 
+TYPED_TEST( CSRSegmentsTest, allReduction_MaximumInSegments )
+{
+    using CSRSegmentsType = typename TestFixture::CSRSegmentsType;
+
+    test_AllReduction_MaximumInSegments< CSRSegmentsType >();
+}
+
 #endif
 
 #include "../../main.h"
diff --git a/src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.h b/src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.h
index 510fa8738..7b5e90b23 100644
--- a/src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.h
+++ b/src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.h
@@ -59,11 +59,11 @@ TYPED_TEST( EllpackSegmentsTest, setSegmentsSizes_EqualSizes_EllpackOnly )
     test_SetSegmentsSizes_EqualSizes_EllpackOnly< EllpackSegmentsType >();
 }
 
-TYPED_TEST( EllpackSegmentsTest, getMaxInSegments )
+TYPED_TEST( EllpackSegmentsTest, allReduction_MaximumInSegments )
 {
     using EllpackSegmentsType = typename TestFixture::EllpackSegmentsType;
 
-    test_GetMaxInSegments< EllpackSegmentsType >();
+    test_AllReduction_MaximumInSegments< EllpackSegmentsType >();
 }
 
 #endif
-- 
GitLab


From bfb8e0c0a4794f48e95e6eff29f572d0f952228a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Sat, 7 Dec 2019 12:10:17 +0100
Subject: [PATCH 017/179] Implementing SlicedEllpack segments.

---
 src/TNL/Containers/Segments/SlicedEllpack.hpp | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/src/TNL/Containers/Segments/SlicedEllpack.hpp b/src/TNL/Containers/Segments/SlicedEllpack.hpp
index 60d2059fe..c91a13473 100644
--- a/src/TNL/Containers/Segments/SlicedEllpack.hpp
+++ b/src/TNL/Containers/Segments/SlicedEllpack.hpp
@@ -64,9 +64,22 @@ setSizes( const SizesHolder& sizes )
    this->segmentOffsets.setSize( segmentsCount + 1 );
    Ellpack< DeviceType, IndexType, true > ellpack;
    ellpack.setSizes( segmentsCount, SliceSize );
-   ...
 
+   const IndexType _size = this->getSize();
+   const auto sizes_view = sizes.getConstView();
+   auto offsets_view = this->segmentOffsets().getView();
+   auto fetch = [=] __cuda_callable__ ( IndexType segmentIdx, IndexType globalIdx ) -> IndexType {
+      if( globalIdx < size )
+         return sizes_view[ globalIdx ];
+   };
+   auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType i ) {
+      aux = TNL::max( aux, i );
+   };
+   auto keep = [=] __cuda_callable__ ( IndexType i, IndexType res ) {
+      offsets_view[ i ] = res;
+   }
 
+   std::cerr << offsets_view << std::endl;
 
 
-- 
GitLab


From 01f367ca5e64c90d8c1db876e16bad37b3ef6e2d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Mon, 9 Dec 2019 20:07:29 +0100
Subject: [PATCH 018/179] Implementing SlicedEllpack segments.

---
 src/TNL/Containers/Segments/CSR.hpp           |   2 +-
 src/TNL/Containers/Segments/Ellpack.hpp       |   5 +-
 src/TNL/Containers/Segments/SlicedEllpack.h   |  14 +-
 src/TNL/Containers/Segments/SlicedEllpack.hpp | 191 ++++++++++++------
 4 files changed, 141 insertions(+), 71 deletions(-)

diff --git a/src/TNL/Containers/Segments/CSR.hpp b/src/TNL/Containers/Segments/CSR.hpp
index 486149e04..ef7431038 100644
--- a/src/TNL/Containers/Segments/CSR.hpp
+++ b/src/TNL/Containers/Segments/CSR.hpp
@@ -153,7 +153,7 @@ void
 CSR< Device, Index >::
 forSegments( IndexType first, IndexType last, Function& f, Args... args ) const
 {
-   const auto offsetsView = this->offsets.getView();
+   const auto offsetsView = this->offsets.getConstView();
    auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable {
       const IndexType begin = offsetsView[ i ];
       const IndexType end = offsetsView[ i + 1 ];
diff --git a/src/TNL/Containers/Segments/Ellpack.hpp b/src/TNL/Containers/Segments/Ellpack.hpp
index 034b0820e..d3d90be5e 100644
--- a/src/TNL/Containers/Segments/Ellpack.hpp
+++ b/src/TNL/Containers/Segments/Ellpack.hpp
@@ -189,11 +189,10 @@ void
 Ellpack< Device, Index, RowMajorOrder, Alignment >::
 forSegments( IndexType first, IndexType last, Function& f, Args... args ) const
 {
-   const auto offsetsView = this->offsets.getView();
    if( RowMajorOrder )
    {
       const IndexType segmentSize = this->segmentSize;
-      auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) {
+      auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable {
          const IndexType begin = i * segmentSize;
          const IndexType end = begin + segmentSize;
          for( IndexType j = begin; j < end; j++  )
@@ -206,7 +205,7 @@ forSegments( IndexType first, IndexType last, Function& f, Args... args ) const
    {
       const IndexType storageSize = this->getStorageSize();
       const IndexType alignedSize = this->alignedSize;
-      auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) {
+      auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable {
          const IndexType begin = i;
          const IndexType end = storageSize;
          for( IndexType j = begin; j < end; j += alignedSize )
diff --git a/src/TNL/Containers/Segments/SlicedEllpack.h b/src/TNL/Containers/Segments/SlicedEllpack.h
index a5ef9d121..ecc2c8c7e 100644
--- a/src/TNL/Containers/Segments/SlicedEllpack.h
+++ b/src/TNL/Containers/Segments/SlicedEllpack.h
@@ -42,7 +42,13 @@ class SlicedEllpack
        * \brief Set sizes of particular segments.
        */
       template< typename SizesHolder = OffsetsHolder >
-      void setSizes( const SizesHolder& sizes );
+      void setSegmentsSizes( const SizesHolder& sizes );
+
+      __cuda_callable__
+      IndexType getSegmentsCount() const;
+
+      __cuda_callable__
+      IndexType getSegmentSize( const IndexType segmentIdx ) const;
 
       /**
        * \brief Number segments.
@@ -50,8 +56,6 @@ class SlicedEllpack
       __cuda_callable__
       IndexType getSize() const;
 
-      __cuda_callable__
-      IndexType getSegmentSize( const IndexType segmentIdx ) const;
 
       __cuda_callable__
       IndexType getStorageSize() const;
@@ -90,9 +94,9 @@ class SlicedEllpack
 
    protected:
 
-      IndexType size;
+      IndexType size, alignedSize, segmentsCount;
 
-      OffsetHolder sliceOffsets;
+      OffsetsHolder sliceOffsets, sliceSegmentSizes;
 };
 
       } // namespace Segements
diff --git a/src/TNL/Containers/Segments/SlicedEllpack.hpp b/src/TNL/Containers/Segments/SlicedEllpack.hpp
index c91a13473..e23ee5f15 100644
--- a/src/TNL/Containers/Segments/SlicedEllpack.hpp
+++ b/src/TNL/Containers/Segments/SlicedEllpack.hpp
@@ -26,17 +26,30 @@ template< typename Device,
           int SliceSize >
 SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >::
 SlicedEllpack()
-   : size( 0 )
+   : size( 0 ), alignedSize( 0 ), segmentsCount( 0 )
 {
 }
 
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int SliceSize >
+SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >::
+SlicedEllpack( const Vector< IndexType, DeviceType, IndexType >& sizes )
+   : size( 0 ), alignedSize( 0 ), segmentsCount( 0 )
+{
+   this->setSegmentsSizes( sizes );
+}
+
 template< typename Device,
           typename Index,
           bool RowMajorOrder,
           int SliceSize >
 SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >::
 SlicedEllpack( const SlicedEllpack& slicedEllpack )
-   : size( slicedEllpack.size ), sliceOffsets( slicedEllpack.sliceOffsets )
+   : size( slicedEllpack.size ), alignedSize( slicedEllpack.alignedSize ),
+     segmentsCount( slicedEllpack.segmentsCount ), sliceOffsets( slicedEllpack.sliceOffsets ),
+     sliceSegmentSizes( slicedEllpack.sliceSegmentSizes )
 {
 }
 
@@ -46,7 +59,9 @@ template< typename Device,
           int SliceSize >
 SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >::
 SlicedEllpack( const SlicedEllpack&& slicedEllpack )
-   : size( slicedEllpack.size ), sliceOffsets( slicedEllpack.sliceOffsets )
+   : size( slicedEllpack.size ), alignedSize( slicedEllpack.alignedSize ),
+     segmentsCount( slicedEllpack.segmentsCount ), sliceOffsets( slicedEllpack.sliceOffsets ),
+     sliceSegmentSizes( slicedEllpack.sliceSegmentSizes )
 {
 }
 
@@ -57,36 +72,36 @@ template< typename Device,
    template< typename SizesHolder >
 void
 SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >::
-setSizes( const SizesHolder& sizes )
+setSegmentsSizes( const SizesHolder& sizes )
 {
-   this->size = sizes.getSize();
-   const IndexType segmentsCount = roundUpDivision( this->size, getSliceSize() );
-   this->segmentOffsets.setSize( segmentsCount + 1 );
+   this->segmentsCount = sizes.getSize();
+   const IndexType slicesCount = roundUpDivision( this->segmentsCount, getSliceSize() );
+   this->sliceOffsets.setSize( slicesCount + 1 );
+   this->sliceOffsets = 0;
+   this->sliceSegmentSizes.setSize( slicesCount );
    Ellpack< DeviceType, IndexType, true > ellpack;
-   ellpack.setSizes( segmentsCount, SliceSize );
+   ellpack.setSegmentsSizes( slicesCount, SliceSize );
 
-   const IndexType _size = this->getSize();
+   const IndexType _size = sizes.getSize();
    const auto sizes_view = sizes.getConstView();
-   auto offsets_view = this->segmentOffsets().getView();
+   auto slices_view = this->sliceOffsets.getView();
+   auto slice_segment_size_view = this->sliceSegmentSizes.getView();
    auto fetch = [=] __cuda_callable__ ( IndexType segmentIdx, IndexType globalIdx ) -> IndexType {
-      if( globalIdx < size )
+      if( globalIdx < _size )
          return sizes_view[ globalIdx ];
+      return 0;
    };
    auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType i ) {
       aux = TNL::max( aux, i );
    };
-   auto keep = [=] __cuda_callable__ ( IndexType i, IndexType res ) {
-      offsets_view[ i ] = res;
-   }
-
-   std::cerr << offsets_view << std::endl;
-
-
-
-   if( RowMajorOrder )
-      this->alignedSize = this->size;
-   else
-      this->alignedSize = roundUpDivision( size, this->getSliceSize() ) * this->getSliceSize();
+   auto keep = [=] __cuda_callable__ ( IndexType i, IndexType res ) mutable {
+      slices_view[ i ] = res * SliceSize;
+      slice_segment_size_view[ i ] = res;
+   };
+   ellpack.allReduction( fetch, reduce, keep, std::numeric_limits< IndexType >::min() );
+   this->sliceOffsets.template scan< Algorithms::ScanType::Exclusive >();
+   this->size = sum( sizes );
+   this->alignedSize = this->sliceOffsets.getElement( slicesCount );
 }
 
 template< typename Device,
@@ -96,9 +111,9 @@ template< typename Device,
 __cuda_callable__
 Index
 SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >::
-getSize() const
+getSegmentsCount() const
 {
-   return this->size;
+   return this->segmentsCount;
 }
 
 template< typename Device,
@@ -110,7 +125,29 @@ Index
 SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >::
 getSegmentSize( const IndexType segmentIdx ) const
 {
-   return this->segmentSize;
+   const Index sliceIdx = segmentIdx / SliceSize;
+   if( std::is_same< DeviceType, Devices::Host >::value )
+      return this->sliceSegmentSizes[ sliceIdx ];
+   else
+   {
+#ifdef __CUDA_ARCH__
+   return this->sliceSegmentSizes[ sliceIdx ];
+#else
+   return this->sliceSegmentSizes.getElement( sliceIdx );
+#endif
+   }
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int SliceSize >
+__cuda_callable__
+Index
+SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >::
+getSize() const
+{
+   return this->size;
 }
 
 template< typename Device,
@@ -122,7 +159,7 @@ Index
 SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >::
 getStorageSize() const
 {
-   return this->alignedSize * this->segmentSize;
+   return this->alignedSize;
 }
 
 template< typename Device,
@@ -134,10 +171,28 @@ Index
 SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >::
 getGlobalIndex( const Index segmentIdx, const Index localIdx ) const
 {
+   const IndexType sliceIdx = segmentIdx / SliceSize;
+   const IndexType segmentInSliceIdx = segmentIdx % SliceSize;
+   IndexType sliceOffset, segmentSize;
+   if( std::is_same< DeviceType, Devices::Host >::value )
+   {
+      sliceOffset = this->sliceOffsets[ sliceIdx ];
+      segmentSize = this->sliceSegmentSizes[ sliceIdx ];
+   }
+   else
+   {
+#ifdef __CUDA__ARCH__
+      sliceOffset = this->sliceOffsets[ sliceIdx ];
+      segmentSize = this->sliceSegmentSizes[ sliceIdx ];
+#else
+      sliceOffset = this->sliceOffsets.getElement( sliceIdx );
+      segmentSize = this->sliceSegmentSizes.getElement( sliceIdx );
+#endif
+   }
    if( RowMajorOrder )
-      return segmentIdx * this->segmentSize + localIdx;
+      return sliceOffset + segmentInSliceIdx * segmentSize + localIdx;
    else
-      return segmentIdx + this->alignedSize * localIdx;
+      return sliceOffset + segmentInSliceIdx + SliceSize * localIdx;
 }
 
 template< typename Device,
@@ -160,28 +215,32 @@ void
 SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >::
 forSegments( IndexType first, IndexType last, Function& f, Args... args ) const
 {
-   const auto offsetsView = this->offsets.getView();
+   const auto sliceSegmentSizes_view = this->sliceSegmentSizes.getConstView();
+   const auto sliceOffsets_view = this->sliceOffsets.getConstView();
    if( RowMajorOrder )
    {
-      const IndexType segmentSize = this->segmentSize;
-      auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) {
-         const IndexType begin = i * segmentSize;
+      auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) {
+         const IndexType sliceIdx = segmentIdx / SliceSize;
+         const IndexType segmentInSliceIdx = segmentIdx % SliceSize;
+         const IndexType segmentSize = sliceSegmentSizes_view[ sliceIdx ];
+         const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx * segmentSize;
          const IndexType end = begin + segmentSize;
-         for( IndexType j = begin; j < end; j++  )
-            if( ! f( i, j, args... ) )
+         for( IndexType globalIdx = begin; globalIdx < end; globalIdx++  )
+            if( ! f( segmentIdx, globalIdx, args... ) )
                break;
       };
       Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
    }
    else
    {
-      const IndexType storageSize = this->getStorageSize();
-      const IndexType alignedSize = this->alignedSize;
-      auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) {
-         const IndexType begin = i;
-         const IndexType end = storageSize;
-         for( IndexType j = begin; j < end; j += alignedSize )
-            if( ! f( i, j, args... ) )
+      auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) {
+         const IndexType sliceIdx = segmentIdx / SliceSize;
+         const IndexType segmentInSliceIdx = segmentIdx % SliceSize;
+         const IndexType segmentSize = sliceSegmentSizes_view[ sliceIdx ];
+         const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx;
+         const IndexType end = sliceOffsets_view[ sliceIdx + 1 ];
+         for( IndexType globalIdx = begin; globalIdx < end; globalIdx += SliceSize )
+            if( ! f( segmentIdx, globalIdx, args... ) )
                break;
       };
       Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
@@ -197,7 +256,7 @@ void
 SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >::
 forAll( Function& f, Args... args ) const
 {
-   this->forSegments( 0, this->getSize(), f, args... );
+   this->forSegments( 0, this->getSegmentsCount(), f, args... );
 }
 
 template< typename Device,
@@ -209,32 +268,36 @@ void
 SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >::
 segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
 {
+   using RealType = decltype( fetch( IndexType(), IndexType() ) );
+   const auto sliceSegmentSizes_view = this->sliceSegmentSizes.getConstView();
+   const auto sliceOffsets_view = this->sliceOffsets.getConstView();
    if( RowMajorOrder )
    {
-      using RealType = decltype( fetch( IndexType(), IndexType() ) );
-      const IndexType segmentSize = this->segmentSize;
-      auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable {
-         const IndexType begin = i * segmentSize;
+      auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable {
+         const IndexType sliceIdx = segmentIdx / SliceSize;
+         const IndexType segmentInSliceIdx = segmentIdx % SliceSize;
+         const IndexType segmentSize = sliceSegmentSizes_view[ sliceIdx ];
+         const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx * segmentSize;
          const IndexType end = begin + segmentSize;
          RealType aux( zero );
-         for( IndexType j = begin; j < end; j++  )
-            reduction( aux, fetch( i, j, args... ) );
-         keeper( i, aux );
+         for( IndexType globalIdx = begin; globalIdx< end; globalIdx++  )
+            reduction( aux, fetch( segmentIdx, globalIdx, args... ) );
+         keeper( segmentIdx, aux );
       };
       Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
    }
    else
    {
-      using RealType = decltype( fetch( IndexType(), IndexType() ) );
-      const IndexType storageSize = this->getStorageSize();
-      const IndexType alignedSize = this->alignedSize;
-      auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable {
-         const IndexType begin = i;
-         const IndexType end = storageSize;
+      auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable {
+         const IndexType sliceIdx = segmentIdx / SliceSize;
+         const IndexType segmentInSliceIdx = segmentIdx % SliceSize;
+         const IndexType segmentSize = sliceSegmentSizes_view[ sliceIdx ];
+         const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx;
+         const IndexType end = sliceOffsets_view[ sliceIdx + 1 ];
          RealType aux( zero );
-         for( IndexType j = begin; j < end; j += alignedSize  )
-            reduction( aux, fetch( i, j, args... ) );
-         keeper( i, aux );
+         for( IndexType globalIdx = begin; globalIdx < end; globalIdx += SliceSize  )
+            reduction( aux, fetch( segmentIdx, globalIdx, args... ) );
+         keeper( segmentIdx, aux );
       };
       Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
    }
@@ -249,7 +312,7 @@ void
 SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >::
 allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
 {
-   this->segmentsReduction( 0, this->getSize(), fetch, reduction, keeper, zero, args... );
+   this->segmentsReduction( 0, this->getSegmentsCount(), fetch, reduction, keeper, zero, args... );
 }
 
 template< typename Device,
@@ -260,9 +323,11 @@ void
 SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >::
 save( File& file ) const
 {
-   file.save( &segmentSize );
    file.save( &size );
    file.save( &alignedSize );
+   file.save( &segmentsCount );
+   this->sliceOffsets.save( file );
+   this->sliceSegmentSizes.save( file );
 }
 
 template< typename Device,
@@ -273,9 +338,11 @@ void
 SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >::
 load( File& file )
 {
-   file.load( &segmentSize );
    file.load( &size );
    file.load( &alignedSize );
+   file.load( &segmentsCount );
+   this->sliceOffsets.load( file );
+   this->sliceSegmentSizes.load( file );
 }
 
       } // namespace Segments
-- 
GitLab


From 258f4d760ed9c155bbcd750e1792fe1f7f72ea9e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Mon, 9 Dec 2019 20:07:57 +0100
Subject: [PATCH 019/179] Added SlicedEllpack segments unit tests.

---
 .../Containers/Segments/CMakeLists.txt        | 12 ++--
 .../Containers/Segments/SegmentsTest.hpp      | 10 ++-
 .../Segments/SegmentsTest_SlicedEllpack.cpp   |  1 +
 .../Segments/SegmentsTest_SlicedEllpack.cu    |  1 +
 .../Segments/SegmentsTest_SlicedEllpack.h     | 64 +++++++++++++++++++
 5 files changed, 79 insertions(+), 9 deletions(-)
 create mode 100644 src/UnitTests/Containers/Segments/SegmentsTest_SlicedEllpack.cpp
 create mode 100644 src/UnitTests/Containers/Segments/SegmentsTest_SlicedEllpack.cu
 create mode 100644 src/UnitTests/Containers/Segments/SegmentsTest_SlicedEllpack.h

diff --git a/src/UnitTests/Containers/Segments/CMakeLists.txt b/src/UnitTests/Containers/Segments/CMakeLists.txt
index 6304a4998..742fb69ef 100644
--- a/src/UnitTests/Containers/Segments/CMakeLists.txt
+++ b/src/UnitTests/Containers/Segments/CMakeLists.txt
@@ -14,8 +14,8 @@ IF( BUILD_CUDA )
    CUDA_ADD_EXECUTABLE( SegmentsTest_Ellpack SegmentsTest_Ellpack.cu OPTIONS ${CXX_TESTS_FLAGS} )
    TARGET_LINK_LIBRARIES( SegmentsTest_Ellpack ${GTEST_BOTH_LIBRARIES} )
 
-#   CUDA_ADD_EXECUTABLE( SegmentsTest_SlicedEllpack SegmentsTest_SlicedEllpack.cu OPTIONS ${CXX_TESTS_FLAGS} )
-#   TARGET_LINK_LIBRARIES( SegmentsTest_SlicedEllpack ${GTEST_BOTH_LIBRARIES} )
+   CUDA_ADD_EXECUTABLE( SegmentsTest_SlicedEllpack SegmentsTest_SlicedEllpack.cu OPTIONS ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( SegmentsTest_SlicedEllpack ${GTEST_BOTH_LIBRARIES} )
 
 ELSE(  BUILD_CUDA )
 #   ADD_EXECUTABLE( SegmentsTest_AdEllpack SegmentsTest_AdEllpack.cpp )
@@ -38,9 +38,9 @@ ELSE(  BUILD_CUDA )
    TARGET_COMPILE_OPTIONS( SegmentsTest_Ellpack PRIVATE ${CXX_TESTS_FLAGS} )
    TARGET_LINK_LIBRARIES( SegmentsTest_Ellpack ${GTEST_BOTH_LIBRARIES} )
 
-#   ADD_EXECUTABLE( SegmentsTest_SlicedEllpack SegmentsTest_SlicedEllpack.cpp )
-#   TARGET_COMPILE_OPTIONS( SegmentsTest_SlicedEllpack PRIVATE ${CXX_TESTS_FLAGS} )
-#   TARGET_LINK_LIBRARIES( SegmentsTest_SlicedEllpack ${GTEST_BOTH_LIBRARIES} )
+   ADD_EXECUTABLE( SegmentsTest_SlicedEllpack SegmentsTest_SlicedEllpack.cpp )
+   TARGET_COMPILE_OPTIONS( SegmentsTest_SlicedEllpack PRIVATE ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( SegmentsTest_SlicedEllpack ${GTEST_BOTH_LIBRARIES} )
 ENDIF( BUILD_CUDA )
 
 
@@ -48,5 +48,5 @@ ENDIF( BUILD_CUDA )
 #ADD_TEST( SegmentsTest_ChunkedEllpack ${EXECUTABLE_OUTPUT_PATH}/SegmentsTest_ChunkedEllpack${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( SegmentsTest_CSR ${EXECUTABLE_OUTPUT_PATH}/SegmentsTest_CSR${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( SegmentsTest_Ellpack ${EXECUTABLE_OUTPUT_PATH}/SegmentsTest_Ellpack${CMAKE_EXECUTABLE_SUFFIX} )
-#ADD_TEST( SegmentsTest_SlicedEllpack ${EXECUTABLE_OUTPUT_PATH}/SegmentsTest_SlicedEllpack${CMAKE_EXECUTABLE_SUFFIX} )
+ADD_TEST( SegmentsTest_SlicedEllpack ${EXECUTABLE_OUTPUT_PATH}/SegmentsTest_SlicedEllpack${CMAKE_EXECUTABLE_SUFFIX} )
 
diff --git a/src/UnitTests/Containers/Segments/SegmentsTest.hpp b/src/UnitTests/Containers/Segments/SegmentsTest.hpp
index 484b92eb4..acc75655f 100644
--- a/src/UnitTests/Containers/Segments/SegmentsTest.hpp
+++ b/src/UnitTests/Containers/Segments/SegmentsTest.hpp
@@ -40,7 +40,6 @@ void test_SetSegmentsSizes_EqualSizes()
    EXPECT_EQ( segments2.getSegmentsCount(), segmentsCount );
    EXPECT_EQ( segments2.getSize(), segmentsCount * segmentSize );
    EXPECT_LE( segments2.getSize(), segments2.getStorageSize() );
-
    for( IndexType i = 0; i < segmentsCount; i++ )
       EXPECT_EQ( segments2.getSegmentSize( i ), segmentSize );
 
@@ -100,7 +99,6 @@ void test_AllReduction_MaximumInSegments()
 
    const IndexType segmentsCount = 20;
    const IndexType segmentSize = 5;
-   const IndexType size = segmentsCount * segmentSize;
 
    TNL::Containers::Vector< IndexType, DeviceType, IndexType > segmentsSizes( segmentsCount );
    segmentsSizes = segmentSize;
@@ -113,6 +111,13 @@ void test_AllReduction_MaximumInSegments()
    for( IndexType i = 0; i < segmentsCount; i++ )
       for( IndexType j = 0; j < segmentSize; j++ )
          v.setElement( segments.getGlobalIndex( i, j ), k++ );
+   /*auto view = v.getView();
+   auto init = [=] __cuda_callable__ ( const IndexType i, const IndexType j ) mutable -> bool {
+      view[ j ] =  j + 1;
+      return true;
+   };
+   segments.forAll( init );
+   std::cerr << v << std::endl;*/
 
    TNL::Containers::Vector< IndexType, DeviceType, IndexType >result( segmentsCount );
 
@@ -129,7 +134,6 @@ void test_AllReduction_MaximumInSegments()
    };
    segments.allReduction( fetch, reduce, keep, std::numeric_limits< IndexType >::min() );
 
-   std::cerr << result << std::endl;
    for( IndexType i = 0; i < segmentsCount; i++ )
       EXPECT_EQ( result.getElement( i ), ( i + 1 ) * segmentSize );
 }
diff --git a/src/UnitTests/Containers/Segments/SegmentsTest_SlicedEllpack.cpp b/src/UnitTests/Containers/Segments/SegmentsTest_SlicedEllpack.cpp
new file mode 100644
index 000000000..cd9865f28
--- /dev/null
+++ b/src/UnitTests/Containers/Segments/SegmentsTest_SlicedEllpack.cpp
@@ -0,0 +1 @@
+#include "SegmentsTest_SlicedEllpack.h"
diff --git a/src/UnitTests/Containers/Segments/SegmentsTest_SlicedEllpack.cu b/src/UnitTests/Containers/Segments/SegmentsTest_SlicedEllpack.cu
new file mode 100644
index 000000000..cd9865f28
--- /dev/null
+++ b/src/UnitTests/Containers/Segments/SegmentsTest_SlicedEllpack.cu
@@ -0,0 +1 @@
+#include "SegmentsTest_SlicedEllpack.h"
diff --git a/src/UnitTests/Containers/Segments/SegmentsTest_SlicedEllpack.h b/src/UnitTests/Containers/Segments/SegmentsTest_SlicedEllpack.h
new file mode 100644
index 000000000..1bcff3191
--- /dev/null
+++ b/src/UnitTests/Containers/Segments/SegmentsTest_SlicedEllpack.h
@@ -0,0 +1,64 @@
+/***************************************************************************
+                          SegmentsTest_SlicedEllpack.h -  description
+                             -------------------
+    begin                : Dec 9, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include <TNL/Containers/Segments/SlicedEllpack.h>
+
+#include "SegmentsTest.hpp"
+#include <iostream>
+
+#ifdef HAVE_GTEST
+#include <gtest/gtest.h>
+
+// test fixture for typed tests
+template< typename Segments >
+class SlicedEllpackSegmentsTest : public ::testing::Test
+{
+protected:
+   using SlicedEllpackSegmentsType = Segments;
+};
+
+// types for which MatrixTest is instantiated
+using SlicedEllpackSegmentsTypes = ::testing::Types
+<
+    TNL::Containers::Segments::SlicedEllpack< TNL::Devices::Host, int    >,
+    TNL::Containers::Segments::SlicedEllpack< TNL::Devices::Host, long   >,
+    TNL::Containers::Segments::SlicedEllpack< TNL::Devices::Host, int    >,
+    TNL::Containers::Segments::SlicedEllpack< TNL::Devices::Host, long   >,
+    TNL::Containers::Segments::SlicedEllpack< TNL::Devices::Host, int    >,
+    TNL::Containers::Segments::SlicedEllpack< TNL::Devices::Host, long   >
+#ifdef HAVE_CUDA
+   ,TNL::Containers::Segments::SlicedEllpack< TNL::Devices::Cuda, int    >,
+    TNL::Containers::Segments::SlicedEllpack< TNL::Devices::Cuda, long   >,
+    TNL::Containers::Segments::SlicedEllpack< TNL::Devices::Cuda, int    >,
+    TNL::Containers::Segments::SlicedEllpack< TNL::Devices::Cuda, long   >,
+    TNL::Containers::Segments::SlicedEllpack< TNL::Devices::Cuda, int    >,
+    TNL::Containers::Segments::SlicedEllpack< TNL::Devices::Cuda, long   >,
+#endif
+>;
+
+TYPED_TEST_SUITE( SlicedEllpackSegmentsTest, SlicedEllpackSegmentsTypes );
+
+TYPED_TEST( SlicedEllpackSegmentsTest, setSegmentsSizes_EqualSizes )
+{
+    using SlicedEllpackSegmentsType = typename TestFixture::SlicedEllpackSegmentsType;
+
+    test_SetSegmentsSizes_EqualSizes< SlicedEllpackSegmentsType >();
+}
+
+TYPED_TEST( SlicedEllpackSegmentsTest, allReduction_MaximumInSegments )
+{
+    using SlicedEllpackSegmentsType = typename TestFixture::SlicedEllpackSegmentsType;
+
+    test_AllReduction_MaximumInSegments< SlicedEllpackSegmentsType >();
+}
+
+#endif
+
+#include "../../main.h"
-- 
GitLab


From 2ee53835a219ef29baceeb1d8334a436dde27dc9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Mon, 9 Dec 2019 21:10:05 +0100
Subject: [PATCH 020/179] Added segments based SlicedEllpack sparse matrix unit
 test.

---
 src/TNL/Containers/Segments/SlicedEllpack.hpp |  8 +-
 src/UnitTests/Matrices/CMakeLists.txt         |  7 ++
 .../SparseMatrixTest_SlicedEllpack.cpp        |  2 +-
 .../SparseMatrixTest_SlicedEllpack.cu         |  2 +-
 .../Matrices/SparseMatrixTest_SlicedEllpack.h | 73 +++++++++++--------
 5 files changed, 56 insertions(+), 36 deletions(-)

diff --git a/src/TNL/Containers/Segments/SlicedEllpack.hpp b/src/TNL/Containers/Segments/SlicedEllpack.hpp
index e23ee5f15..c8e74ec59 100644
--- a/src/TNL/Containers/Segments/SlicedEllpack.hpp
+++ b/src/TNL/Containers/Segments/SlicedEllpack.hpp
@@ -326,8 +326,8 @@ save( File& file ) const
    file.save( &size );
    file.save( &alignedSize );
    file.save( &segmentsCount );
-   this->sliceOffsets.save( file );
-   this->sliceSegmentSizes.save( file );
+   file << this->sliceOffsets;
+   file << this->sliceSegmentSizes;
 }
 
 template< typename Device,
@@ -341,8 +341,8 @@ load( File& file )
    file.load( &size );
    file.load( &alignedSize );
    file.load( &segmentsCount );
-   this->sliceOffsets.load( file );
-   this->sliceSegmentSizes.load( file );
+   file >> this->sliceOffsets;
+   file >> this->sliceSegmentSizes;
 }
 
       } // namespace Segments
diff --git a/src/UnitTests/Matrices/CMakeLists.txt b/src/UnitTests/Matrices/CMakeLists.txt
index ef1f04371..9b168bd56 100644
--- a/src/UnitTests/Matrices/CMakeLists.txt
+++ b/src/UnitTests/Matrices/CMakeLists.txt
@@ -34,6 +34,9 @@ IF( BUILD_CUDA )
    CUDA_ADD_EXECUTABLE( SparseMatrixTest_Ellpack_segments SparseMatrixTest_Ellpack_segments.cu OPTIONS ${CXX_TESTS_FLAGS} )
    TARGET_LINK_LIBRARIES( SparseMatrixTest_Ellpack_segments ${GTEST_BOTH_LIBRARIES} )
 
+   CUDA_ADD_EXECUTABLE( SparseMatrixTest_SlicedEllpack_segments SparseMatrixTest_SlicedEllpack_segments.cu OPTIONS ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( SparseMatrixTest_SlicedEllpack_segments ${GTEST_BOTH_LIBRARIES} )
+
 ELSE(  BUILD_CUDA )
    ADD_EXECUTABLE( SparseMatrixCopyTest SparseMatrixCopyTest.cpp )
    TARGET_COMPILE_OPTIONS( SparseMatrixCopyTest PRIVATE ${CXX_TESTS_FLAGS} )
@@ -81,6 +84,9 @@ ELSE(  BUILD_CUDA )
    TARGET_COMPILE_OPTIONS( SparseMatrixTest_Ellpack_segments PRIVATE ${CXX_TESTS_FLAGS} )
    TARGET_LINK_LIBRARIES( SparseMatrixTest_Ellpack_segments ${GTEST_BOTH_LIBRARIES} )
 
+   ADD_EXECUTABLE( SparseMatrixTest_SlicedEllpack_segments SparseMatrixTest_SlicedEllpack_segments.cpp )
+   TARGET_COMPILE_OPTIONS( SparseMatrixTest_SlicedEllpack_segments PRIVATE ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( SparseMatrixTest_SlicedEllpack_segments ${GTEST_BOTH_LIBRARIES} )
 
 ENDIF( BUILD_CUDA )
 
@@ -101,6 +107,7 @@ ADD_TEST( SparseMatrixTest_SlicedEllpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixT
 # Segments tests
 ADD_TEST( SparseMatrixTest_CSR_segments ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_CSR_segments${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( SparseMatrixTest_Ellpack_segments ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_Ellpack_segments${CMAKE_EXECUTABLE_SUFFIX} )
+ADD_TEST( SparseMatrixTest_SlicedEllpack_segments ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_SlicedEllpack_segments${CMAKE_EXECUTABLE_SUFFIX} )
 
 if( ${BUILD_MPI} )
    if( BUILD_CUDA )
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cpp b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cpp
index 40e2e94b8..a88301100 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cpp
+++ b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cpp
@@ -1 +1 @@
-#include "SparseMatrixTest_SlicedEllpack.h"
+#include "SparseMatrixTest_SlicedEllpack_segments.h"
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cu b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cu
index 40e2e94b8..a88301100 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cu
+++ b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cu
@@ -1 +1 @@
-#include "SparseMatrixTest_SlicedEllpack.h"
+#include "SparseMatrixTest_SlicedEllpack_segments.h"
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.h b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.h
index 0798f59dc..00184754c 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.h
+++ b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.h
@@ -1,14 +1,16 @@
 /***************************************************************************
-                          SparseMatrixTest_SlicedEllpack.h -  description
+                          SparseMatrixTest_SlicedEllpack_segments.h -  description
                              -------------------
-    begin                : Nov 2, 2018
-    copyright            : (C) 2018 by Tomas Oberhuber et al.
+    begin                : Dec 9, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber et al.
     email                : tomas.oberhuber@fjfi.cvut.cz
  ***************************************************************************/
 
 /* See Copyright Notice in tnl/Copyright */
 
-#include <TNL/Matrices/SlicedEllpack.h>
+#include <TNL/Containers/Segments/SlicedEllpack.h>
+#include <TNL/Matrices/SparseMatrix.h>
+
 
 #include "SparseMatrixTest.hpp"
 #include <iostream>
@@ -24,38 +26,49 @@ protected:
    using SlicedEllpackMatrixType = Matrix;
 };
 
+////
+// Row-major format is used for the host system
+template< typename Device, typename Index >
+using RowMajorSlicedEllpack = TNL::Containers::Segments::SlicedEllpack< Device, Index, true, 32 >;
+
+
+////
+// Column-major format is used for GPUs
+template< typename Device, typename Index >
+using ColumnMajorSlicedEllpack = TNL::Containers::Segments::SlicedEllpack< Device, Index, false, 32 >;
+
 // types for which MatrixTest is instantiated
 using SlicedEllpackMatrixTypes = ::testing::Types
 <
-    TNL::Matrices::SlicedEllpack< int,    TNL::Devices::Host, short >,
-    TNL::Matrices::SlicedEllpack< long,   TNL::Devices::Host, short >,
-    TNL::Matrices::SlicedEllpack< float,  TNL::Devices::Host, short >,
-    TNL::Matrices::SlicedEllpack< double, TNL::Devices::Host, short >,
-    TNL::Matrices::SlicedEllpack< int,    TNL::Devices::Host, int >,
-    TNL::Matrices::SlicedEllpack< long,   TNL::Devices::Host, int >,
-    TNL::Matrices::SlicedEllpack< float,  TNL::Devices::Host, int >,
-    TNL::Matrices::SlicedEllpack< double, TNL::Devices::Host, int >,
-    TNL::Matrices::SlicedEllpack< int,    TNL::Devices::Host, long >,
-    TNL::Matrices::SlicedEllpack< long,   TNL::Devices::Host, long >,
-    TNL::Matrices::SlicedEllpack< float,  TNL::Devices::Host, long >,
-    TNL::Matrices::SlicedEllpack< double, TNL::Devices::Host, long >
+    TNL::Matrices::SparseMatrix< int,     RowMajorSlicedEllpack, TNL::Devices::Host, short >,
+    TNL::Matrices::SparseMatrix< long,    RowMajorSlicedEllpack, TNL::Devices::Host, short >,
+    TNL::Matrices::SparseMatrix< float,   RowMajorSlicedEllpack, TNL::Devices::Host, short >,
+    TNL::Matrices::SparseMatrix< double,  RowMajorSlicedEllpack, TNL::Devices::Host, short >,
+    TNL::Matrices::SparseMatrix< int,     RowMajorSlicedEllpack, TNL::Devices::Host, int   >,
+    TNL::Matrices::SparseMatrix< long,    RowMajorSlicedEllpack, TNL::Devices::Host, int   >,
+    TNL::Matrices::SparseMatrix< float,   RowMajorSlicedEllpack, TNL::Devices::Host, int   >,
+    TNL::Matrices::SparseMatrix< double,  RowMajorSlicedEllpack, TNL::Devices::Host, int   >,
+    TNL::Matrices::SparseMatrix< int,     RowMajorSlicedEllpack, TNL::Devices::Host, long  >,
+    TNL::Matrices::SparseMatrix< long,    RowMajorSlicedEllpack, TNL::Devices::Host, long  >,
+    TNL::Matrices::SparseMatrix< float,   RowMajorSlicedEllpack, TNL::Devices::Host, long  >,
+    TNL::Matrices::SparseMatrix< double,  RowMajorSlicedEllpack, TNL::Devices::Host, long  >
 #ifdef HAVE_CUDA
-   ,TNL::Matrices::SlicedEllpack< int,    TNL::Devices::Cuda, short >,
-    TNL::Matrices::SlicedEllpack< long,   TNL::Devices::Cuda, short >,
-    TNL::Matrices::SlicedEllpack< float,  TNL::Devices::Cuda, short >,
-    TNL::Matrices::SlicedEllpack< double, TNL::Devices::Cuda, short >,
-    TNL::Matrices::SlicedEllpack< int,    TNL::Devices::Cuda, int >,
-    TNL::Matrices::SlicedEllpack< long,   TNL::Devices::Cuda, int >,
-    TNL::Matrices::SlicedEllpack< float,  TNL::Devices::Cuda, int >,
-    TNL::Matrices::SlicedEllpack< double, TNL::Devices::Cuda, int >,
-    TNL::Matrices::SlicedEllpack< int,    TNL::Devices::Cuda, long >,
-    TNL::Matrices::SlicedEllpack< long,   TNL::Devices::Cuda, long >,
-    TNL::Matrices::SlicedEllpack< float,  TNL::Devices::Cuda, long >,
-    TNL::Matrices::SlicedEllpack< double, TNL::Devices::Cuda, long >
+   ,TNL::Matrices::SparseMatrix< int,     ColumnMajorSlicedEllpack, TNL::Devices::Cuda, short >,
+    TNL::Matrices::SparseMatrix< long,    ColumnMajorSlicedEllpack, TNL::Devices::Cuda, short >,
+    TNL::Matrices::SparseMatrix< float,   ColumnMajorSlicedEllpack, TNL::Devices::Cuda, short >,
+    TNL::Matrices::SparseMatrix< double,  ColumnMajorSlicedEllpack, TNL::Devices::Cuda, short >,
+    TNL::Matrices::SparseMatrix< int,     ColumnMajorSlicedEllpack, TNL::Devices::Cuda, int   >,
+    TNL::Matrices::SparseMatrix< long,    ColumnMajorSlicedEllpack, TNL::Devices::Cuda, int   >,
+    TNL::Matrices::SparseMatrix< float,   ColumnMajorSlicedEllpack, TNL::Devices::Cuda, int   >,
+    TNL::Matrices::SparseMatrix< double,  ColumnMajorSlicedEllpack, TNL::Devices::Cuda, int   >,
+    TNL::Matrices::SparseMatrix< int,     ColumnMajorSlicedEllpack, TNL::Devices::Cuda, long  >,
+    TNL::Matrices::SparseMatrix< long,    ColumnMajorSlicedEllpack, TNL::Devices::Cuda, long  >,
+    TNL::Matrices::SparseMatrix< float,   ColumnMajorSlicedEllpack, TNL::Devices::Cuda, long  >,
+    TNL::Matrices::SparseMatrix< double,  ColumnMajorSlicedEllpack, TNL::Devices::Cuda, long  >
 #endif
 >;
 
-TYPED_TEST_SUITE( SlicedEllpackMatrixTest, SlicedEllpackMatrixTypes );
+TYPED_TEST_SUITE( SlicedEllpackMatrixTest, SlicedEllpackMatrixTypes);
 
 TYPED_TEST( SlicedEllpackMatrixTest, setDimensionsTest )
 {
@@ -124,7 +137,7 @@ TYPED_TEST( SlicedEllpackMatrixTest, saveAndLoadTest )
 {
     using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
 
-    test_SaveAndLoad< SlicedEllpackMatrixType >( "test_SparseMatrixTest_SlicedEllpack" );
+    test_SaveAndLoad< SlicedEllpackMatrixType >( "test_SparseMatrixTest_SlicedEllpack_segments" );
 }
 
 TYPED_TEST( SlicedEllpackMatrixTest, printTest )
-- 
GitLab


From 40db8e95f91a0c69ca485d45f7565cb33ac1d2ef Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Mon, 9 Dec 2019 22:17:47 +0100
Subject: [PATCH 021/179] Implementing
 SparseMatrix::getNumberOfNonzeroMatrixElements.

---
 src/TNL/Matrices/SparseMatrix.h   | 2 +-
 src/TNL/Matrices/SparseMatrix.hpp | 8 ++++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h
index 6b6a58f9a..b510636f5 100644
--- a/src/TNL/Matrices/SparseMatrix.h
+++ b/src/TNL/Matrices/SparseMatrix.h
@@ -17,7 +17,7 @@ namespace TNL {
 namespace Matrices {
 
 template< typename Real,
-          template< typename, typename > class Segments,
+          template< typename Device_, typename Index_ > class Segments,
           typename Device = Devices::Host,
           typename Index = int,
           typename RealAllocator = typename Allocators::Default< Device >::template Allocator< Real >,
diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp
index 37f59c058..1c243bcea 100644
--- a/src/TNL/Matrices/SparseMatrix.hpp
+++ b/src/TNL/Matrices/SparseMatrix.hpp
@@ -10,7 +10,9 @@
 
 #pragma once
 
+#include <functional>
 #include <TNL/Matrices/SparseMatrix.h>
+#include <TNL/Algorithms/Reduction.h>
 
 namespace TNL {
 namespace Matrices {
@@ -192,6 +194,12 @@ Index
 SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
 getNumberOfNonzeroMatrixElements() const
 {
+   const auto columns_view = this->columnIndexes.getConstView();
+   const IndexType paddingIndex = this->getPaddingIndex();
+   auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType {
+      return ( columns_view[ i ] != paddingIndex );
+   };
+   return Algorithms::Reduction< DeviceType >::reduce( this->columnIndexes.getSize(), std::plus<>{}, fetch, 0 );
 }
 
 template< typename Real,
-- 
GitLab


From 21bd25d4f536738eee8d28a641c1742525ded11f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Mon, 9 Dec 2019 22:31:22 +0100
Subject: [PATCH 022/179] Added unit test for
 SparseMatrix::getNumberOfNonzeroMatrixElements.

---
 src/UnitTests/Matrices/SparseMatrixTest.hpp | 69 +++++++++++++++++++++
 1 file changed, 69 insertions(+)

diff --git a/src/UnitTests/Matrices/SparseMatrixTest.hpp b/src/UnitTests/Matrices/SparseMatrixTest.hpp
index ef5b28d24..5dcd96ebc 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest.hpp
+++ b/src/UnitTests/Matrices/SparseMatrixTest.hpp
@@ -150,6 +150,75 @@ void test_SetLike()
     EXPECT_EQ( m1.getColumns(), m2.getColumns() );
 }
 
+template< typename Matrix >
+void test_GetNumberOfNonzeroMatrixElements()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /*
+    * Sets up the following 10x10 sparse matrix:
+    *
+    *    /  1  0  2  0  3  0  4  0  0  0  \
+    *    |  5  6  7  0  0  0  0  0  0  0  |
+    *    |  8  9 10 11 12 13 14 15  0  0  |
+    *    | 16 17  0  0  0  0  0  0  0  0  |
+    *    | 18  0  0  0  0  0  0  0  0  0  |
+    *    | 19  0  0  0  0  0  0  0  0  0  |
+    *    | 20  0  0  0  0  0  0  0  0  0  |
+    *    | 21  0  0  0  0  0  0  0  0  0  |
+    *    | 22 23 24 25 26 27 28 29 30 31  |
+    *    \ 32 33 34 35 36 37 38 39 40 41 /
+    */
+
+   const IndexType rows = 10;
+   const IndexType cols = 10;
+    
+   Matrix m;
+   m.reset();
+
+   m.setDimensions( rows, cols );
+
+   typename Matrix::CompressedRowLengthsVector rowLengths;
+   rowLengths.setSize( rows );
+   rowLengths.setElement( 0, 4 );
+   rowLengths.setElement( 1, 3 );
+   rowLengths.setElement( 2, 8 );
+   rowLengths.setElement( 3, 2 );
+   for( IndexType i = 4; i < rows - 2; i++ )
+   {
+      rowLengths.setElement( i, 1 );
+   }
+   rowLengths.setElement( 8, 10 );
+   rowLengths.setElement( 9, 10 );
+   m.setCompressedRowLengths( rowLengths );
+
+   RealType value = 1;
+   for( IndexType i = 0; i < 4; i++ )
+      m.setElement( 0, 2 * i, value++ );
+
+   for( IndexType i = 0; i < 3; i++ )
+      m.setElement( 1, i, value++ );
+
+   for( IndexType i = 0; i < 8; i++ )
+      m.setElement( 2, i, value++ );
+
+   for( IndexType i = 0; i < 2; i++ )
+      m.setElement( 3, i, value++ );
+
+   for( IndexType i = 4; i < 8; i++ )
+      m.setElement( i, 0, value++ );
+
+   for( IndexType j = 8; j < rows; j++)
+   {
+      for( IndexType i = 0; i < cols; i++ )
+         m.setElement( j, i, value++ );
+   }
+
+   EXPECT_EQ( m.getNumberOfNonzeroMatrixElements(), 41 );
+}
+
 template< typename Matrix >
 void test_Reset()
 {
-- 
GitLab


From 93c68aef6be53f00b523519d9fc7b4e3e921974c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Mon, 9 Dec 2019 22:51:45 +0100
Subject: [PATCH 023/179] Added error messages to matrix reader.

---
 src/TNL/Matrices/MatrixReader_impl.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/src/TNL/Matrices/MatrixReader_impl.h b/src/TNL/Matrices/MatrixReader_impl.h
index d00fdb904..70fd06d36 100644
--- a/src/TNL/Matrices/MatrixReader_impl.h
+++ b/src/TNL/Matrices/MatrixReader_impl.h
@@ -55,7 +55,10 @@ bool MatrixReader< Matrix >::readMtxFileHostMatrix( std::istream& file,
    bool symmetricMatrix( false );
 
    if( ! readMtxHeader( file, rows, columns, symmetricMatrix, verbose ) )
+   {
+      std::cerr << "Unable to read MTX file header." << std::endl;
       return false;
+   }
 
    if( symReader && !symmetricMatrix )
    {
@@ -67,12 +70,18 @@ bool MatrixReader< Matrix >::readMtxFileHostMatrix( std::istream& file,
    rowLengths.setSize( rows );
 
    if( ! computeCompressedRowLengthsFromMtxFile( file, rowLengths, columns, rows, symmetricMatrix, verbose ) )
+   {
+      std::cerr << "Unable to compute compressed row lengths." << std::endl;
       return false;
+   }
 
    matrix.setCompressedRowLengths( rowLengths );
 
    if( ! readMatrixElementsFromMtxFile( file, matrix, symmetricMatrix, verbose, symReader ) )
+   {
+      std::cerr << "Unable to read matrix elements from MTX file," << std::endl;
       return false;
+   }
    return true;
 }
 
@@ -84,7 +93,10 @@ bool MatrixReader< Matrix >::verifyMtxFile( std::istream& file,
    bool symmetricMatrix( false );
    IndexType rows, columns;
    if( ! readMtxHeader( file, rows, columns, symmetricMatrix, false ) )
+   {
+      std::cerr << "Unable to read MTX file header." << std::endl;
       return false;
+   }
    file.clear();
    file.seekg( 0, std::ios::beg );
    String line;
@@ -103,7 +115,10 @@ bool MatrixReader< Matrix >::verifyMtxFile( std::istream& file,
       IndexType row( 1 ), column( 1 );
       RealType value;
       if( ! parseMtxLineWithElement( line, row, column, value ) )
+      {
+         std::cerr << "Unable to parse MTX file line." << std::endl;
          return false;
+      }
       if( value != matrix.getElement( row-1, column-1 ) ||
           ( symmetricMatrix && value != matrix.getElement( column-1, row-1 ) ) )
       {
-- 
GitLab


From 51884a04fead8bd5e5f81f421cc4346a85a029c8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Mon, 9 Dec 2019 22:52:14 +0100
Subject: [PATCH 024/179] Debuging SpMV benchmark.

---
 src/Benchmarks/SpMV/spmv.h               | 86 +++++++++++++-----------
 src/Benchmarks/SpMV/tnl-benchmark-spmv.h | 10 +--
 2 files changed, 53 insertions(+), 43 deletions(-)

diff --git a/src/Benchmarks/SpMV/spmv.h b/src/Benchmarks/SpMV/spmv.h
index 6bfee5ffe..7e3928e09 100644
--- a/src/Benchmarks/SpMV/spmv.h
+++ b/src/Benchmarks/SpMV/spmv.h
@@ -25,6 +25,11 @@
 #include <TNL/Matrices/BiEllpack.h>
 
 #include <TNL/Matrices/MatrixReader.h>
+
+#include <TNL/Matrices/SparseMatrix.h>
+#include <TNL/Containers/Segments/CSR.h>
+#include <TNL/Containers/Segments/Ellpack.h>
+#include <TNL/Containers/Segments/SlicedEllpack.h>
 using namespace TNL::Matrices;
 
 #include "cusparseCSRMatrix.h"
@@ -36,6 +41,22 @@ namespace Benchmarks {
 template< typename Real, typename Device, typename Index >
 using SlicedEllpackAlias = Matrices::SlicedEllpack< Real, Device, Index >;
 
+// Segments based sparse matrix aliases
+template< typename Real, typename Device, typename Index >
+using SparseMatrix_CSR = Matrices::SparseMatrix< Real, Containers::Segments::CSR, Device, Index >;
+
+template< typename Device, typename Index >
+using EllpackSegments = Containers::Segments::Ellpack< Device, Index >;
+
+template< typename Real, typename Device, typename Index >
+using SparseMatrix_Ellpack = Matrices::SparseMatrix< Real, EllpackSegments, Device, Index >;
+
+template< typename Device, typename Index >
+using SlicedEllpackSegments = Containers::Segments::SlicedEllpack< Device, Index >;
+
+template< typename Real, typename Device, typename Index >
+using SparseMatrix_SlicedEllpack = Matrices::SparseMatrix< Real, SlicedEllpackSegments, Device, Index >;
+
 // Get the name (with extension) of input matrix file
 std::string getMatrixFileName( const String& InputFileName )
 {
@@ -85,7 +106,7 @@ void printMatrixInfo( const Matrix& matrix,
 template< typename Real,
           template< typename, typename, typename > class Matrix,
           template< typename, typename, typename, typename > class Vector = Containers::Vector >
-bool
+void
 benchmarkSpMV( Benchmark& benchmark,
                const String& inputFileName,
                bool verboseMR )
@@ -98,19 +119,10 @@ benchmarkSpMV( Benchmark& benchmark,
     CSR_DeviceMatrix CSRdeviceMatrix;
 
     // Read the matrix for CSR, to set up cuSPARSE
-    try
-      {
-         if( ! MatrixReader< CSR_HostMatrix >::readMtxFile( inputFileName, CSRhostMatrix, verboseMR ) )
-         {
-             throw std::bad_alloc();
-             return false;
-         }
-      }
-      catch( std::bad_alloc& e )
-      {
-          e.what();
-          return false;
-      }
+    if( ! MatrixReader< CSR_HostMatrix >::readMtxFile( inputFileName, CSRhostMatrix, verboseMR ) )
+    {
+        throw std::bad_alloc();
+    }
 
 #ifdef HAVE_CUDA
     // cuSPARSE handle setup
@@ -140,19 +152,10 @@ benchmarkSpMV( Benchmark& benchmark,
     CudaVector deviceVector, deviceVector2;
 
     // Load the format
-    try
-      {
-         if( ! MatrixReader< HostMatrix >::readMtxFile( inputFileName, hostMatrix, verboseMR ) )
-         {
-             throw std::bad_alloc();
-             return false;
-         }
-      }
-      catch( std::bad_alloc& e )
-      {
-          e.what();
-          return false;
-      }
+    if( ! MatrixReader< HostMatrix >::readMtxFile( inputFileName, hostMatrix, verboseMR ) )
+    {
+      throw std::bad_alloc();
+    }
 
 
     // Setup MetaData here (not in tnl-benchmark-spmv.h, as done in Benchmarks/BLAS),
@@ -244,7 +247,7 @@ benchmarkSpMV( Benchmark& benchmark,
 
     resultcuSPARSEDeviceVector2 = deviceVector2;
 
-    // Difference between GPU (curent format) and GPU-cuSPARSE results
+    // Difference between GPU (current format) and GPU-cuSPARSE results
     //Real cuSparseDifferenceAbsMax = resultDeviceVector2.differenceAbsMax( resultcuSPARSEDeviceVector2 );
     Real cuSparseDifferenceAbsMax = max( abs( resultDeviceVector2 - resultcuSPARSEDeviceVector2 ) );
     //Real cuSparseDifferenceLpNorm = resultDeviceVector2.differenceLpNorm( resultcuSPARSEDeviceVector2, 1 );
@@ -279,26 +282,33 @@ benchmarkSpMV( Benchmark& benchmark,
  #endif
 
     std::cout << std::endl;
-    return true;
 }
 
 template< typename Real = double,
           typename Index = int >
-bool
+void
 benchmarkSpmvSynthetic( Benchmark& benchmark,
                         const String& inputFileName,
                         bool verboseMR )
 {
-   bool result = true;
-   result |= benchmarkSpMV< Real, Matrices::CSR >( benchmark, inputFileName, verboseMR );
-   result |= benchmarkSpMV< Real, Matrices::Ellpack >( benchmark, inputFileName, verboseMR );
-   result |= benchmarkSpMV< Real, SlicedEllpackAlias >( benchmark, inputFileName, verboseMR );
-   result |= benchmarkSpMV< Real, Matrices::ChunkedEllpack >( benchmark, inputFileName, verboseMR );
+   benchmarkSpMV< Real, Matrices::CSR >( benchmark, inputFileName, verboseMR );
+   benchmarkSpMV< Real, Matrices::Ellpack >( benchmark, inputFileName, verboseMR );
+   benchmarkSpMV< Real, SlicedEllpackAlias >( benchmark, inputFileName, verboseMR );
+   benchmarkSpMV< Real, Matrices::ChunkedEllpack >( benchmark, inputFileName, verboseMR );
+
+   ////
+   // Segments based sparse matrices
+   std::cerr << "*********************************" << std::endl;
+   benchmarkSpMV< Real, SparseMatrix_CSR >( benchmark, inputFileName, verboseMR );
+   std::cerr << "*********************************" << std::endl;
+   benchmarkSpMV< Real, SparseMatrix_Ellpack >( benchmark, inputFileName, verboseMR );
+   std::cerr << "*********************************" << std::endl;
+   benchmarkSpMV< Real, SparseMatrix_SlicedEllpack >( benchmark, inputFileName, verboseMR );
+   std::cerr << "*********************************" << std::endl;
 
    // AdEllpack is broken
-//   result |= benchmarkSpMV< Real, Matrices::AdEllpack >( benchmark, inputFileName, verboseMR );
-   result |= benchmarkSpMV< Real, Matrices::BiEllpack >( benchmark, inputFileName, verboseMR );
-   return result;
+   // benchmarkSpMV< Real, Matrices::AdEllpack >( benchmark, inputFileName, verboseMR );
+   benchmarkSpMV< Real, Matrices::BiEllpack >( benchmark, inputFileName, verboseMR );
 }
 
 } // namespace Benchmarks
diff --git a/src/Benchmarks/SpMV/tnl-benchmark-spmv.h b/src/Benchmarks/SpMV/tnl-benchmark-spmv.h
index 77c079c4c..65416f043 100644
--- a/src/Benchmarks/SpMV/tnl-benchmark-spmv.h
+++ b/src/Benchmarks/SpMV/tnl-benchmark-spmv.h
@@ -96,11 +96,11 @@ main( int argc, char* argv[] )
    //           * The guide on what parameters to use prints twice.
    // FIXME: When ./tnl-benchmark-spmv-dbg is called with '--help':
    //           * The guide on what parameter to use print once. 
-   //               But then it CRASHES due to segfault:
-//                    The program attempts to get unknown parameter openmp-enabled
-//                    Aborting the program.
-//                    terminate called after throwing an instance of 'int'
-//                    [1]    17156 abort (core dumped)  ~/tnl-dev/Debug/bin/./tnl-benchmark-spmv-dbg --help
+   //              But then it CRASHES due to segfault:
+   //              The program attempts to get unknown parameter openmp-enabled
+   //              Aborting the program.
+   //              terminate called after throwing an instance of 'int'
+   //      [1]    17156 abort (core dumped)  ~/tnl-dev/Debug/bin/./tnl-benchmark-spmv-dbg --help
 
    if( ! parseCommandLine( argc, argv, conf_desc, parameters ) ) {
       conf_desc.printUsage( argv[ 0 ] );
-- 
GitLab


From 0b14ec6443093b8a063ded26365ed853961a6ffb Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Tue, 10 Dec 2019 15:31:44 +0100
Subject: [PATCH 025/179] Replacing error messages in MatrixReader with
 exceptions.

---
 src/Benchmarks/SpMV/spmv.h           |  10 +-
 src/TNL/Matrices/MatrixReader.h      |  20 ++--
 src/TNL/Matrices/MatrixReader_impl.h | 168 +++++++++------------------
 3 files changed, 67 insertions(+), 131 deletions(-)

diff --git a/src/Benchmarks/SpMV/spmv.h b/src/Benchmarks/SpMV/spmv.h
index 7e3928e09..a6acb52fd 100644
--- a/src/Benchmarks/SpMV/spmv.h
+++ b/src/Benchmarks/SpMV/spmv.h
@@ -119,10 +119,7 @@ benchmarkSpMV( Benchmark& benchmark,
     CSR_DeviceMatrix CSRdeviceMatrix;
 
     // Read the matrix for CSR, to set up cuSPARSE
-    if( ! MatrixReader< CSR_HostMatrix >::readMtxFile( inputFileName, CSRhostMatrix, verboseMR ) )
-    {
-        throw std::bad_alloc();
-    }
+    MatrixReader< CSR_HostMatrix >::readMtxFile( inputFileName, CSRhostMatrix, verboseMR );
 
 #ifdef HAVE_CUDA
     // cuSPARSE handle setup
@@ -152,10 +149,7 @@ benchmarkSpMV( Benchmark& benchmark,
     CudaVector deviceVector, deviceVector2;
 
     // Load the format
-    if( ! MatrixReader< HostMatrix >::readMtxFile( inputFileName, hostMatrix, verboseMR ) )
-    {
-      throw std::bad_alloc();
-    }
+    MatrixReader< HostMatrix >::readMtxFile( inputFileName, hostMatrix, verboseMR );
 
 
     // Setup MetaData here (not in tnl-benchmark-spmv.h, as done in Benchmarks/BLAS),
diff --git a/src/TNL/Matrices/MatrixReader.h b/src/TNL/Matrices/MatrixReader.h
index aaf75a373..2c3cbb424 100644
--- a/src/TNL/Matrices/MatrixReader.h
+++ b/src/TNL/Matrices/MatrixReader.h
@@ -15,7 +15,7 @@
 #include <TNL/Containers/Vector.h>
 
 namespace TNL {
-namespace Matrices {   
+namespace Matrices {
 
 template< typename Device >
 class MatrixReaderDeviceDependentCode
@@ -30,24 +30,24 @@ class MatrixReader
    typedef typename Matrix::DeviceType DeviceType;
    typedef typename Matrix::RealType RealType;
 
-   static bool readMtxFile( const String& fileName,
+   static void readMtxFile( const String& fileName,
                             Matrix& matrix,
                             bool verbose = false,
                             bool symReader = false );
 
-   static bool readMtxFile( std::istream& file,
+   static void readMtxFile( std::istream& file,
                             Matrix& matrix,
                             bool verbose = false,
                             bool symReader = false );
 
-   static bool readMtxFileHostMatrix( std::istream& file,
+   static void readMtxFileHostMatrix( std::istream& file,
                                       Matrix& matrix,
                                       typename Matrix::CompressedRowLengthsVector& rowLengths,
                                       bool verbose,
                                       bool symReader );
 
 
-   static bool verifyMtxFile( std::istream& file,
+   static void verifyMtxFile( std::istream& file,
                               const Matrix& matrix,
                               bool verbose = false );
 
@@ -58,16 +58,16 @@ class MatrixReader
                                   IndexType& lineNumber );
    protected:
 
-   static bool checkMtxHeader( const String& header,
+   static void checkMtxHeader( const String& header,
                                bool& symmetric );
 
-   static bool readMtxHeader( std::istream& file,
+   static void readMtxHeader( std::istream& file,
                               IndexType& rows,
                               IndexType& columns,
                               bool& symmetricMatrix,
                               bool verbose );
 
-   static bool computeCompressedRowLengthsFromMtxFile( std::istream& file,
+   static void computeCompressedRowLengthsFromMtxFile( std::istream& file,
                                              Containers::Vector< int, DeviceType, int >& rowLengths,
                                              const int columns,
                                              const int rows,
@@ -75,13 +75,13 @@ class MatrixReader
                                              bool verbose,
                                              bool symReader = false );
 
-   static bool readMatrixElementsFromMtxFile( std::istream& file,
+   static void readMatrixElementsFromMtxFile( std::istream& file,
                                               Matrix& matrix,
                                               bool symmetricMatrix,
                                               bool verbose,
                                               bool symReader );
 
-   static bool parseMtxLineWithElement( const String& line,
+   static void parseMtxLineWithElement( const String& line,
                                         IndexType& row,
                                         IndexType& column,
                                         RealType& value );
diff --git a/src/TNL/Matrices/MatrixReader_impl.h b/src/TNL/Matrices/MatrixReader_impl.h
index 70fd06d36..25643d8c7 100644
--- a/src/TNL/Matrices/MatrixReader_impl.h
+++ b/src/TNL/Matrices/MatrixReader_impl.h
@@ -11,6 +11,7 @@
 #pragma once
 
 #include <iomanip>
+#include <sstream>
 #include <TNL/String.h>
 #include <TNL/Containers/Vector.h>
 #include <TNL/Timer.h>
@@ -20,7 +21,7 @@ namespace TNL {
 namespace Matrices {
 
 template< typename Matrix >
-bool MatrixReader< Matrix >::readMtxFile( const String& fileName,
+void MatrixReader< Matrix >::readMtxFile( const String& fileName,
                                              Matrix& matrix,
                                              bool verbose,
                                              bool symReader )
@@ -28,24 +29,21 @@ bool MatrixReader< Matrix >::readMtxFile( const String& fileName,
    std::fstream file;
    file.open( fileName.getString(), std::ios::in );
    if( ! file )
-   {
-      std::cerr << "I am not able to open the file " << fileName << "." << std::endl;
-      return false;
-   }
-   return readMtxFile( file, matrix, verbose, symReader );
+      throw std::runtime_error( std::string( "I am not able to open the file " ) + fileName.getString() );
+   readMtxFile( file, matrix, verbose, symReader );
 }
 
 template< typename Matrix >
-bool MatrixReader< Matrix >::readMtxFile( std::istream& file,
+void MatrixReader< Matrix >::readMtxFile( std::istream& file,
                                              Matrix& matrix,
                                              bool verbose,
                                              bool symReader )
 {
-   return MatrixReaderDeviceDependentCode< typename Matrix::DeviceType >::readMtxFile( file, matrix, verbose, symReader );
+   MatrixReaderDeviceDependentCode< typename Matrix::DeviceType >::readMtxFile( file, matrix, verbose, symReader );
 }
 
 template< typename Matrix >
-bool MatrixReader< Matrix >::readMtxFileHostMatrix( std::istream& file,
+void MatrixReader< Matrix >::readMtxFileHostMatrix( std::istream& file,
                                                        Matrix& matrix,
                                                        typename Matrix::CompressedRowLengthsVector& rowLengths,
                                                        bool verbose,
@@ -54,17 +52,10 @@ bool MatrixReader< Matrix >::readMtxFileHostMatrix( std::istream& file,
    IndexType rows, columns;
    bool symmetricMatrix( false );
 
-   if( ! readMtxHeader( file, rows, columns, symmetricMatrix, verbose ) )
-   {
-      std::cerr << "Unable to read MTX file header." << std::endl;
-      return false;
-   }
+   readMtxHeader( file, rows, columns, symmetricMatrix, verbose );
 
    if( symReader && !symmetricMatrix )
-   {
-      std::cout << "Matrix is not symmetric, but flag for symmetric matrix is given. Aborting." << std::endl;
-      return false;
-   }
+      throw std::runtime_error( "Matrix is not symmetric, but flag for symmetric matrix is given. Aborting." );
 
    matrix.setDimensions( rows, columns );
    rowLengths.setSize( rows );
@@ -75,28 +66,21 @@ bool MatrixReader< Matrix >::readMtxFileHostMatrix( std::istream& file,
       return false;
    }
 
+   computeCompressedRowLengthsFromMtxFile( file, rowLengths, columns, rows, symmetricMatrix, verbose );
+
    matrix.setCompressedRowLengths( rowLengths );
 
-   if( ! readMatrixElementsFromMtxFile( file, matrix, symmetricMatrix, verbose, symReader ) )
-   {
-      std::cerr << "Unable to read matrix elements from MTX file," << std::endl;
-      return false;
-   }
-   return true;
+   readMatrixElementsFromMtxFile( file, matrix, symmetricMatrix, verbose, symReader );
 }
 
 template< typename Matrix >
-bool MatrixReader< Matrix >::verifyMtxFile( std::istream& file,
+void MatrixReader< Matrix >::verifyMtxFile( std::istream& file,
                                                const Matrix& matrix,
                                                bool verbose )
 {
    bool symmetricMatrix( false );
    IndexType rows, columns;
-   if( ! readMtxHeader( file, rows, columns, symmetricMatrix, false ) )
-   {
-      std::cerr << "Unable to read MTX file header." << std::endl;
-      return false;
-   }
+   readMtxHeader( file, rows, columns, symmetricMatrix, false );
    file.clear();
    file.seekg( 0, std::ios::beg );
    String line;
@@ -114,19 +98,16 @@ bool MatrixReader< Matrix >::verifyMtxFile( std::istream& file,
       }
       IndexType row( 1 ), column( 1 );
       RealType value;
-      if( ! parseMtxLineWithElement( line, row, column, value ) )
-      {
-         std::cerr << "Unable to parse MTX file line." << std::endl;
-         return false;
-      }
+      parseMtxLineWithElement( line, row, column, value );
       if( value != matrix.getElement( row-1, column-1 ) ||
           ( symmetricMatrix && value != matrix.getElement( column-1, row-1 ) ) )
       {
-         std::cerr << "*** !!! VERIFICATION ERROR !!! *** " << std::endl
-              << "The elements differ at " << row-1 << " row " << column-1 << " column." << std::endl
-              << "The matrix value is " << matrix.getElement( row-1, column-1 )
-              << " while the file value is " << value << "." << std::endl;
-         return false;
+         std::stringstream str;
+         str << "*** !!! VERIFICATION ERROR !!! *** " << std::endl
+             << "The elements differ at " << row-1 << " row " << column-1 << " column." << std::endl
+             << "The matrix value is " << matrix.getElement( row-1, column-1 )
+             << " while the file value is " << value << "." << std::endl;
+         throw std::runtime_error( str.str() );
       }
       processedElements++;
       if( symmetricMatrix && row != column )
@@ -141,7 +122,6 @@ bool MatrixReader< Matrix >::verifyMtxFile( std::istream& file,
      std::cout << " Verifying the matrix elements ... " << processedElements << " / " << matrix.getNumberOfMatrixElements()
            << " -> " << timer.getRealTime()
            << " sec. i.e. " << fileSize / ( timer.getRealTime() * ( 1 << 20 ))  << "MB/s." << std::endl;
-   return true;
 }
 
 template< typename Matrix >
@@ -167,8 +147,7 @@ bool MatrixReader< Matrix >::findLineByElement( std::istream& file,
       }
       IndexType currentRow( 1 ), currentColumn( 1 );
       RealType value;
-      if( ! parseMtxLineWithElement( line, currentRow, currentColumn, value ) )
-         return false;
+      parseMtxLineWithElement( line, currentRow, currentColumn, value );
       if( ( currentRow == row + 1 && currentColumn == column + 1 ) ||
           ( symmetricMatrix && currentRow == column + 1 && currentColumn == row + 1 ) )
          return true;
@@ -177,45 +156,30 @@ bool MatrixReader< Matrix >::findLineByElement( std::istream& file,
 }
 
 template< typename Matrix >
-bool MatrixReader< Matrix >::checkMtxHeader( const String& header,
+void MatrixReader< Matrix >::checkMtxHeader( const String& header,
                                                 bool& symmetric )
 {
    std::vector< String > parsedLine = header.split( ' ', String::SplitSkip::SkipEmpty );
-   if( (int) parsedLine.size() < 5 )
-      return false;
-   if( parsedLine[ 0 ] != "%%MatrixMarket" )
-      return false;
+   if( (int) parsedLine.size() < 5 || parsedLine[ 0 ] != "%%MatrixMarket" )
+      throw std::runtime_error( "Wrong MTX file header. We expect line like this: %%MatrixMarket matrix coordinate real general" );
    if( parsedLine[ 1 ] != "matrix" )
-   {
-      std::cerr << "Error: 'matrix' expected in the header line (" << header << ")." << std::endl;
-      return false;
-   }
+      throw std::runtime_error( std::string( "Keyword 'matrix' is expected in the header line: " ) + header.getString() );
    if( parsedLine[ 2 ] != "coordinates" &&
        parsedLine[ 2 ] != "coordinate" )
-   {
-      std::cerr << "Error: Only 'coordinates' format is supported now, not " << parsedLine[ 2 ] << "." << std::endl;
-      return false;
-   }
+      throw std::runtime_error( std::string( "Error: Only 'coordinates' format is supported now, not " ) + parsedLine[ 2 ].getString() );
    if( parsedLine[ 3 ] != "real" )
-   {
-      std::cerr << "Error: Only 'real' matrices are supported, not " << parsedLine[ 3 ] << "." << std::endl;
-      return false;
-   }
+      throw std::runtime_error( std::string( "Only 'real' matrices are supported, not " ) + parsedLine[ 3 ].getString() );
    if( parsedLine[ 4 ] != "general" )
    {
       if( parsedLine[ 4 ] == "symmetric" )
          symmetric = true;
       else
-      {
-         std::cerr << "Error: Only 'general' matrices are supported, not " << parsedLine[ 4 ] << "." << std::endl;
-         return false;
-      }
+         throw std::runtime_error(  std::string( "Only 'general' matrices are supported, not "  ) + parsedLine[ 4 ].getString() );
    }
-   return true;
 }
 
 template< typename Matrix >
-bool MatrixReader< Matrix >::readMtxHeader( std::istream& file,
+void MatrixReader< Matrix >::readMtxHeader( std::istream& file,
                                                IndexType& rows,
                                                IndexType& columns,
                                                bool& symmetric,
@@ -231,27 +195,18 @@ bool MatrixReader< Matrix >::readMtxHeader( std::istream& file,
       std::getline( file, line );
       if( ! headerParsed )
       {
-         headerParsed = checkMtxHeader( line, symmetric );
-         if( ! headerParsed )
-            return false;
+         checkMtxHeader( line, symmetric );
          if( verbose && symmetric )
            std::cout << "The matrix is SYMMETRIC ... ";
          continue;
       }
       if( line[ 0 ] == '%' ) continue;
       if( ! headerParsed )
-      {
-         std::cerr << "Unknown format of the file. We expect line like this:" << std::endl;
-         std::cerr << "%%MatrixMarket matrix coordinate real general" << std::endl;
-         return false;
-      }
+         throw std::runtime_error( "Unknown format of the file. We expect line like this: %%MatrixMarket matrix coordinate real general" );
 
       parsedLine = line.split( ' ', String::SplitSkip::SkipEmpty );
       if( (int) parsedLine.size() != 3 )
-      {
-         std::cerr << "Wrong number of parameters in the matrix header." << std::endl;
-         return false;
-      }
+         throw std::runtime_error( "Wrong number of parameters in the matrix header - should be 3." );
       rows = atoi( parsedLine[ 0 ].getString() );
       columns = atoi( parsedLine[ 1 ].getString() );
       if( verbose )
@@ -259,16 +214,12 @@ bool MatrixReader< Matrix >::readMtxHeader( std::istream& file,
               << " rows and " << columns << " columns. " << std::endl;
 
       if( rows <= 0 || columns <= 0 )
-      {
-         std::cerr << "Wrong parameters in the matrix header." << std::endl;
-         return false;
-      }
-      return true;
+         throw std::runtime_error( "Row or column index is negative."  );
    }
 }
 
 template< typename Matrix >
-bool MatrixReader< Matrix >::computeCompressedRowLengthsFromMtxFile( std::istream& file,
+void MatrixReader< Matrix >::computeCompressedRowLengthsFromMtxFile( std::istream& file,
                                                               Containers::Vector< int, DeviceType, int >& rowLengths,
                                                               const int columns,
                                                               const int rows,
@@ -294,13 +245,13 @@ bool MatrixReader< Matrix >::computeCompressedRowLengthsFromMtxFile( std::istrea
       }
       IndexType row( 1 ), column( 1 );
       RealType value;
-      if( ! parseMtxLineWithElement( line, row, column, value ) )
-         return false;
+      parseMtxLineWithElement( line, row, column, value );
       numberOfElements++;
       if( column > columns || row > rows )
       {
-         std::cerr << "There is an element at position " << row << ", " << column << " out of the matrix dimensions " << rows << " x " << columns << "." << std::endl;
-         return false;
+         std::stringstream str;
+         str << "There is an element at position " << row << ", " << column << " out of the matrix dimensions " << rows << " x " << columns << ".";
+         throw std::runtime_error( str.str() );
       }
       if( verbose )
          std::cout << " Counting the matrix elements ... " << numberOfElements / 1000 << " thousands      \r" << std::flush;
@@ -313,23 +264,23 @@ bool MatrixReader< Matrix >::computeCompressedRowLengthsFromMtxFile( std::istrea
 
       if( rowLengths[ row - 1 ] > columns )
       {
-         std::cerr << "There are more elements ( " << rowLengths[ row - 1 ] << " ) than the matrix columns ( " << columns << " ) at the row " << row << "." << std::endl;
-         return false;
+         std::stringstream str;
+         str << "There are more elements ( " << rowLengths[ row - 1 ] << " ) than the matrix columns ( " << columns << " ) at the row " << row << ".";
+         throw std::runtime_error( str.str() );
       }
       if( symmetricMatrix && row != column && symReader )
       {
          rowLengths[ column - 1 ]++;
          if( rowLengths[ column - 1 ] > columns )
          {
-            std::cerr << "There are more elements ( " << rowLengths[ row - 1 ] << " ) than the matrix columns ( " << columns << " ) at the row " << column << " ." << std::endl;
-            return false;
+            std::stringstream str;
+            str << "There are more elements ( " << rowLengths[ row - 1 ] << " ) than the matrix columns ( " << columns << " ) at the row " << column << " .";
+            throw std::runtime_error( str.str() );
          }
          continue;
       }
       else if( symmetricMatrix && row != column && !symReader )
-      {
           rowLengths[ column - 1 ]++;
-      }
    }
    file.clear();
    long int fileSize = file.tellg();
@@ -338,11 +289,10 @@ bool MatrixReader< Matrix >::computeCompressedRowLengthsFromMtxFile( std::istrea
      std::cout << " Counting the matrix elements ... " << numberOfElements / 1000
            << " thousands  -> " << timer.getRealTime()
            << " sec. i.e. " << fileSize / ( timer.getRealTime() * ( 1 << 20 ))  << "MB/s." << std::endl;
-   return true;
 }
 
 template< typename Matrix >
-bool MatrixReader< Matrix >::readMatrixElementsFromMtxFile( std::istream& file,
+void MatrixReader< Matrix >::readMatrixElementsFromMtxFile( std::istream& file,
                                                                Matrix& matrix,
                                                                bool symmetricMatrix,
                                                                bool verbose,
@@ -366,8 +316,7 @@ bool MatrixReader< Matrix >::readMatrixElementsFromMtxFile( std::istream& file,
       }
       IndexType row( 1 ), column( 1 );
       RealType value;
-      if( ! parseMtxLineWithElement( line, row, column, value ) )
-         return false;
+      parseMtxLineWithElement( line, row, column, value );
 
       if( !symReader ||
           ( symReader && row >= column ) )
@@ -377,9 +326,7 @@ bool MatrixReader< Matrix >::readMatrixElementsFromMtxFile( std::istream& file,
 
       processedElements++;
       if( symmetricMatrix && row != column && symReader )
-      {
           continue;
-      }
       else if( symmetricMatrix && row != column && !symReader )
       {
           matrix.setElement( column - 1, row - 1, value );
@@ -394,12 +341,10 @@ bool MatrixReader< Matrix >::readMatrixElementsFromMtxFile( std::istream& file,
      std::cout << " Reading the matrix elements ... " << processedElements << " / " << matrix.getNumberOfMatrixElements()
               << " -> " << timer.getRealTime()
               << " sec. i.e. " << fileSize / ( timer.getRealTime() * ( 1 << 20 ))  << "MB/s." << std::endl;
-
-   return true;
 }
 
 template< typename Matrix >
-bool MatrixReader< Matrix >::parseMtxLineWithElement( const String& line,
+void MatrixReader< Matrix >::parseMtxLineWithElement( const String& line,
                                                          IndexType& row,
                                                          IndexType& column,
                                                          RealType& value )
@@ -407,13 +352,13 @@ bool MatrixReader< Matrix >::parseMtxLineWithElement( const String& line,
    std::vector< String > parsedLine = line.split( ' ', String::SplitSkip::SkipEmpty );
    if( (int) parsedLine.size() != 3 )
    {
-      std::cerr << "Wrong number of parameters in the matrix row at line:" << line << std::endl;
-      return false;
+      std::stringstream str;
+      str << "Wrong number of parameters in the matrix row at line:" << line;
+      throw std::runtime_error( str.str() );
    }
    row = atoi( parsedLine[ 0 ].getString() );
    column = atoi( parsedLine[ 1 ].getString() );
    value = ( RealType ) atof( parsedLine[ 2 ].getString() );
-   return true;
 }
 
 template<>
@@ -422,13 +367,13 @@ class MatrixReaderDeviceDependentCode< Devices::Host >
    public:
 
    template< typename Matrix >
-   static bool readMtxFile( std::istream& file,
+   static void readMtxFile( std::istream& file,
                             Matrix& matrix,
                             bool verbose,
                             bool symReader )
    {
       typename Matrix::CompressedRowLengthsVector rowLengths;
-      return MatrixReader< Matrix >::readMtxFileHostMatrix( file, matrix, rowLengths, verbose, symReader );
+      MatrixReader< Matrix >::readMtxFileHostMatrix( file, matrix, rowLengths, verbose, symReader );
    }
 };
 
@@ -438,7 +383,7 @@ class MatrixReaderDeviceDependentCode< Devices::Cuda >
    public:
 
    template< typename Matrix >
-   static bool readMtxFile( std::istream& file,
+   static void readMtxFile( std::istream& file,
                             Matrix& matrix,
                             bool verbose,
                             bool symReader )
@@ -448,10 +393,7 @@ class MatrixReaderDeviceDependentCode< Devices::Cuda >
 
       HostMatrixType hostMatrix;
       CompressedRowLengthsVector rowLengths;
-      return MatrixReader< Matrix >::readMtxFileHostMatrix( file, matrix, rowLengths, verbose, symReader );
-
-      matrix = hostMatrix;
-      return true;
+      MatrixReader< Matrix >::readMtxFileHostMatrix( file, matrix, rowLengths, verbose, symReader );
    }
 };
 
-- 
GitLab


From 369ae3ce33bca8f6446cf73fa33bff7dcbcbb571 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Tue, 10 Dec 2019 22:25:29 +0100
Subject: [PATCH 026/179] Fixing MatrixReader.

---
 src/TNL/Matrices/MatrixReader.h      | 2 +-
 src/TNL/Matrices/MatrixReader_impl.h | 8 +++++---
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/TNL/Matrices/MatrixReader.h b/src/TNL/Matrices/MatrixReader.h
index 2c3cbb424..ae0606678 100644
--- a/src/TNL/Matrices/MatrixReader.h
+++ b/src/TNL/Matrices/MatrixReader.h
@@ -58,7 +58,7 @@ class MatrixReader
                                   IndexType& lineNumber );
    protected:
 
-   static void checkMtxHeader( const String& header,
+   static bool checkMtxHeader( const String& header,
                                bool& symmetric );
 
    static void readMtxHeader( std::istream& file,
diff --git a/src/TNL/Matrices/MatrixReader_impl.h b/src/TNL/Matrices/MatrixReader_impl.h
index 25643d8c7..476a7327e 100644
--- a/src/TNL/Matrices/MatrixReader_impl.h
+++ b/src/TNL/Matrices/MatrixReader_impl.h
@@ -156,12 +156,12 @@ bool MatrixReader< Matrix >::findLineByElement( std::istream& file,
 }
 
 template< typename Matrix >
-void MatrixReader< Matrix >::checkMtxHeader( const String& header,
+bool MatrixReader< Matrix >::checkMtxHeader( const String& header,
                                                 bool& symmetric )
 {
    std::vector< String > parsedLine = header.split( ' ', String::SplitSkip::SkipEmpty );
    if( (int) parsedLine.size() < 5 || parsedLine[ 0 ] != "%%MatrixMarket" )
-      throw std::runtime_error( "Wrong MTX file header. We expect line like this: %%MatrixMarket matrix coordinate real general" );
+      return false;
    if( parsedLine[ 1 ] != "matrix" )
       throw std::runtime_error( std::string( "Keyword 'matrix' is expected in the header line: " ) + header.getString() );
    if( parsedLine[ 2 ] != "coordinates" &&
@@ -176,6 +176,7 @@ void MatrixReader< Matrix >::checkMtxHeader( const String& header,
       else
          throw std::runtime_error(  std::string( "Only 'general' matrices are supported, not "  ) + parsedLine[ 4 ].getString() );
    }
+   return true;
 }
 
 template< typename Matrix >
@@ -195,7 +196,7 @@ void MatrixReader< Matrix >::readMtxHeader( std::istream& file,
       std::getline( file, line );
       if( ! headerParsed )
       {
-         checkMtxHeader( line, symmetric );
+         headerParsed = checkMtxHeader( line, symmetric );
          if( verbose && symmetric )
            std::cout << "The matrix is SYMMETRIC ... ";
          continue;
@@ -215,6 +216,7 @@ void MatrixReader< Matrix >::readMtxHeader( std::istream& file,
 
       if( rows <= 0 || columns <= 0 )
          throw std::runtime_error( "Row or column index is negative."  );
+      break;
    }
 }
 
-- 
GitLab


From a00c862ce14608a0c34078ea1711d19414fc2872 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Tue, 10 Dec 2019 22:26:37 +0100
Subject: [PATCH 027/179] Implementing SparseMatrix::rowsReduction and
 SparseMatrix::allRowsReduction.

---
 src/TNL/Matrices/SparseMatrix.h               |  17 ++-
 src/TNL/Matrices/SparseMatrix.hpp             |  51 ++++++-
 src/UnitTests/Matrices/SparseMatrixTest.hpp   | 135 +++++++++++++++---
 .../Matrices/SparseMatrixTest_CSR_segments.h  |   7 +
 .../SparseMatrixTest_Ellpack_segments.h       |   7 +
 .../SparseMatrixTest_SlicedEllpack_segments.h |   7 +
 6 files changed, 203 insertions(+), 21 deletions(-)

diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h
index b510636f5..268fba6d3 100644
--- a/src/TNL/Matrices/SparseMatrix.h
+++ b/src/TNL/Matrices/SparseMatrix.h
@@ -34,10 +34,17 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
       using IndexType = Index;
       using RealAllocatorType = RealAllocator;
       using IndexAllocatorType = IndexAllocator;
-      using CompressedRowLengthsVectorView = Containers::VectorView< IndexType, DeviceType, IndexType >;
-      using ConstCompressedRowLengthsVectorView = typename CompressedRowLengthsVectorView::ConstViewType;
+      using RowsCapacitiesType = Containers::Vector< IndexType, DeviceType, IndexType >;
+      using RowsCapacitiesView = Containers::VectorView< IndexType, DeviceType, IndexType >;
+      using ConstRowsCapacitiesView = typename RowsCapacitiesView::ConstViewType;
       using ValuesVectorType = typename Matrix< Real, Device, Index, RealAllocator >::ValuesVector;
       using ColumnsVectorType = Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocatorType >;
+      
+      // TODO: remove this - it is here only for compatibility with original matrix implementation
+      typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector;
+      typedef Containers::VectorView< IndexType, DeviceType, IndexType > CompressedRowLengthsVectorView;
+      typedef typename CompressedRowLengthsVectorView::ConstViewType ConstCompressedRowLengthsVectorView;
+
 
       SparseMatrix( const RealAllocatorType& realAllocator = RealAllocatorType(),
                     const IndexAllocatorType& indexAllocator = IndexAllocatorType() );
@@ -158,6 +165,12 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
                              const RealType& matrixMultiplicator = 1.0 );
        */
 
+      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+      void rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
+      
+      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+      void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
+
       template< typename Vector1, typename Vector2 >
       bool performSORIteration( const Vector1& b,
                                 const IndexType row,
diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp
index 1c243bcea..e26693f6a 100644
--- a/src/TNL/Matrices/SparseMatrix.hpp
+++ b/src/TNL/Matrices/SparseMatrix.hpp
@@ -517,6 +517,44 @@ vectorProduct( const InVector& inVector,
    this->segments.segmentsReduction( 0, this->getRows(), fetch, reduction, keeper, ( RealType ) 0.0 );
 }
 
+template< typename Real,
+          template< typename, typename > class Segments,
+          typename Device,
+          typename Index,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename Fetch, typename Reduce, typename Keep, typename FetchValue >
+void
+SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchValue& zero ) const
+{
+   const auto columns_view = this->columnIndexes.getConstView();
+   const auto values_view = this->values.getConstView();
+   const IndexType paddingIndex_ = this->getPaddingIndex();
+   auto fetch_ = [=] __cuda_callable__ ( IndexType rowIdx, IndexType globalIdx ) mutable -> decltype( fetch( IndexType(), IndexType(), RealType() ) ) {
+      IndexType columnIdx = columns_view[ globalIdx ];
+      if( columnIdx != paddingIndex_ )
+         return fetch( rowIdx, columnIdx, values_view[ globalIdx ] );
+      return zero;
+   };
+   this->segments.segmentsReduction( first, last, fetch_, reduce, keep, zero );
+}
+
+template< typename Real,
+          template< typename, typename > class Segments,
+          typename Device,
+          typename Index,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+void
+SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const
+{
+   this->rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero );
+}
+
+
 /*template< typename Real,
           template< typename, typename > class Segments,
           typename Device,
@@ -576,7 +614,11 @@ SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >&
 SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
 operator=( const SparseMatrix& matrix )
 {
-
+   Matrix< Real, Device, Index >::operator=( matrix );
+   this->columnIndexes = matrix.columnIndexes;
+   this->segments = matrix.segments;
+   this->indexAlloctor = matrix.indexAllocator;
+   this->realAllocator = matrix.realAllocator;
 }
 
 // cross-device copy assignment
@@ -596,7 +638,12 @@ SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >&
 SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
 operator=( const SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2, IndexAllocator2 >& matrix )
 {
-
+   if( std::is_same< Device, Device2 >::value )
+   {
+      
+   }
+      
+   
 }
 
 template< typename Real,
diff --git a/src/UnitTests/Matrices/SparseMatrixTest.hpp b/src/UnitTests/Matrices/SparseMatrixTest.hpp
index 5dcd96ebc..b366f4e2f 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest.hpp
+++ b/src/UnitTests/Matrices/SparseMatrixTest.hpp
@@ -908,18 +908,18 @@ void test_VectorProduct()
     EXPECT_EQ( outVector_4.getElement( 7 ), 330 );
     
   
-/*
- * Sets up the following 8x8 sparse matrix:
- *
- *    /  1  2  3  0  4  5  0  1 \   6
- *    |  0  6  0  7  0  0  0  1 |   3
- *    |  0  8  9  0 10  0  0  1 |   4
- *    |  0 11 12 13 14  0  0  1 |   5
- *    |  0 15  0  0  0  0  0  1 |   2
- *    |  0 16 17 18 19 20 21  1 |   7
- *    | 22 23 24 25 26 27 28  1 |   8
- *    \ 29 30 31 32 33 34 35 36 /   8
- */
+   /*
+    * Sets up the following 8x8 sparse matrix:
+    *
+    *    /  1  2  3  0  4  5  0  1 \   6
+    *    |  0  6  0  7  0  0  0  1 |   3
+    *    |  0  8  9  0 10  0  0  1 |   4
+    *    |  0 11 12 13 14  0  0  1 |   5
+    *    |  0 15  0  0  0  0  0  1 |   2
+    *    |  0 16 17 18 19 20 21  1 |   7
+    *    | 22 23 24 25 26 27 28  1 |   8
+    *    \ 29 30 31 32 33 34 35 36 /   8
+    */
 
     const IndexType m_rows_5 = 8;
     const IndexType m_cols_5 = 8;
@@ -970,20 +970,18 @@ void test_VectorProduct()
 
     for( IndexType i = 0; i < 7; i++ )            // 1s at the end of rows
         m_5.setElement( i, 7, 1);
-    
+
     VectorType inVector_5;
     inVector_5.setSize( m_cols_5 );
-    for( IndexType i = 0; i < inVector_5.getSize(); i++ )        
+    for( IndexType i = 0; i < inVector_5.getSize(); i++ )
         inVector_5.setElement( i, 2 );
 
     VectorType outVector_5;  
     outVector_5.setSize( m_rows_5 );
     for( IndexType j = 0; j < outVector_5.getSize(); j++ )
         outVector_5.setElement( j, 0 );
-    
-    
+
     m_5.vectorProduct( inVector_5, outVector_5 );
-    
 
     EXPECT_EQ( outVector_5.getElement( 0 ),  32 );
     EXPECT_EQ( outVector_5.getElement( 1 ),  28 );
@@ -995,6 +993,109 @@ void test_VectorProduct()
     EXPECT_EQ( outVector_5.getElement( 7 ), 520 );
 }
 
+template< typename Matrix >
+void test_RowsReduction()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /*
+    * Sets up the following 8x8 sparse matrix:
+    *
+    *    /  1  2  3  0  4  5  0  1 \   6
+    *    |  0  6  0  7  0  0  0  1 |   3
+    *    |  0  8  9  0 10  0  0  1 |   4
+    *    |  0 11 12 13 14  0  0  1 |   5
+    *    |  0 15  0  0  0  0  0  1 |   2
+    *    |  0 16 17 18 19 20 21  1 |   7
+    *    | 22 23 24 25 26 27 28  1 |   8
+    *    \ 29 30 31 32 33 34 35 36 /   8
+    */
+
+   const IndexType rows = 8;
+   const IndexType cols = 8;
+
+   Matrix m;
+   m.setDimensions( rows, cols );
+   typename Matrix::RowsCapacitiesType rowsCapacities( rows );
+   //rowLengths.setSize( rows );
+   rowsCapacities.setElement(0, 6);
+   rowsCapacities.setElement(1, 3);
+   rowsCapacities.setElement(2, 4);
+   rowsCapacities.setElement(3, 5);
+   rowsCapacities.setElement(4, 2);
+   rowsCapacities.setElement(5, 7);
+   rowsCapacities.setElement(6, 8);
+   rowsCapacities.setElement(7, 8);
+   m.setCompressedRowLengths( rowsCapacities );
+
+   RealType value = 1;
+   for( IndexType i = 0; i < 3; i++ )   // 0th row
+      m.setElement( 0, i, value++ );
+
+   m.setElement( 0, 4, value++ );           // 0th row
+   m.setElement( 0, 5, value++ );
+
+   m.setElement( 1, 1, value++ );           // 1st row
+   m.setElement( 1, 3, value++ );
+
+   for( IndexType i = 1; i < 3; i++ )            // 2nd row
+      m.setElement( 2, i, value++ );
+
+   m.setElement( 2, 4, value++ );           // 2nd row
+
+   for( IndexType i = 1; i < 5; i++ )            // 3rd row
+      m.setElement( 3, i, value++ );
+
+   m.setElement( 4, 1, value++ );           // 4th row
+
+   for( IndexType i = 1; i < 7; i++ )            // 5th row
+      m.setElement( 5, i, value++ );
+
+   for( IndexType i = 0; i < 7; i++ )            // 6th row
+      m.setElement( 6, i, value++ );
+
+   for( IndexType i = 0; i < 8; i++ )            // 7th row
+       m.setElement( 7, i, value++ );
+
+   for( IndexType i = 0; i < 7; i++ )            // 1s at the end of rows
+      m.setElement( i, 7, 1);
+
+   ////
+   // Compute number of non-zero elements in rows.
+   typename Matrix::RowsCapacitiesType rowLengths( rows );
+   auto rowLengths_view = rowLengths.getView();
+   auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, const RealType& value ) -> IndexType {
+      return ( value != 0.0 );
+   };
+   auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) {
+      aux += a;
+   };
+   auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable {
+      rowLengths_view[ rowIdx ] = value;
+   };
+   m.allRowsReduction( fetch, reduce, keep, 0 );
+   EXPECT_EQ( rowsCapacities, rowLengths );
+
+   ////
+   // Compute max norm
+   TNL::Containers::Vector< RealType, DeviceType, IndexType > rowSums( rows );
+   auto rowSums_view = rowSums.getView();
+   auto max_fetch = [] __cuda_callable__ ( IndexType row, IndexType column, const RealType& value ) -> IndexType {
+      return abs( value );
+   };
+   auto max_reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) {
+      aux += a;
+   };
+   auto max_keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable {
+      rowSums_view[ rowIdx ] = value;
+   };
+   m.allRowsReduction( max_fetch, max_reduce, max_keep, 0 );
+   const RealType maxNorm = TNL::max( rowSums );
+   EXPECT_EQ( maxNorm, 260 ) ; // 29+30+31+32+33+34+35+36
+}
+
 template< typename Matrix >
 void test_PerformSORIteration()
 {
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h
index bf4e452fa..0718e3a69 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h
+++ b/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h
@@ -122,6 +122,13 @@ TYPED_TEST( CSRMatrixTest, vectorProductTest )
     test_VectorProduct< CSRMatrixType >();
 }
 
+TYPED_TEST( CSRMatrixTest, rowsReduction )
+{
+    using CSRMatrixType = typename TestFixture::CSRMatrixType;
+
+    test_RowsReduction< CSRMatrixType >();
+}
+
 TYPED_TEST( CSRMatrixTest, saveAndLoadTest )
 {
     using CSRMatrixType = typename TestFixture::CSRMatrixType;
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h
index edfe0bc28..2c0514c0a 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h
+++ b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h
@@ -133,6 +133,13 @@ TYPED_TEST( EllpackMatrixTest, vectorProductTest )
     test_VectorProduct< EllpackMatrixType >();
 }
 
+TYPED_TEST( EllpackMatrixTest, rowsReduction )
+{
+    using EllpackMatrixType = typename TestFixture::EllpackMatrixType;
+
+    test_RowsReduction< EllpackMatrixType >();
+}
+
 TYPED_TEST( EllpackMatrixTest, saveAndLoadTest )
 {
     using EllpackMatrixType = typename TestFixture::EllpackMatrixType;
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h
index 8d17b8be7..5efcb1eae 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h
+++ b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h
@@ -133,6 +133,13 @@ TYPED_TEST( SlicedEllpackMatrixTest, vectorProductTest )
     test_VectorProduct< SlicedEllpackMatrixType >();
 }
 
+TYPED_TEST( SlicedEllpackMatrixTest, rowsReduction )
+{
+    using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
+
+    test_RowsReduction< SlicedEllpackMatrixType >();
+}
+
 TYPED_TEST( SlicedEllpackMatrixTest, saveAndLoadTest )
 {
     using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
-- 
GitLab


From 8573f541dd5dd163e2f47b824787fe9464764c27 Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Wed, 11 Dec 2019 18:00:42 +0100
Subject: [PATCH 028/179] Implementing sparse matrcies assignment.

---
 src/TNL/Containers/Segments/CSR.hpp           |  11 +-
 src/TNL/Containers/Segments/Ellpack.hpp       |  18 +-
 src/TNL/Containers/Segments/SlicedEllpack.hpp |   6 +-
 src/TNL/Matrices/Matrix.h                     |   9 +-
 src/TNL/Matrices/SparseMatrix.h               |  19 +-
 src/TNL/Matrices/SparseMatrix.hpp             |  88 +++-
 src/UnitTests/Matrices/SparseMatrixTest.hpp   | 494 +++++++++---------
 7 files changed, 374 insertions(+), 271 deletions(-)

diff --git a/src/TNL/Containers/Segments/CSR.hpp b/src/TNL/Containers/Segments/CSR.hpp
index ef7431038..ccb483125 100644
--- a/src/TNL/Containers/Segments/CSR.hpp
+++ b/src/TNL/Containers/Segments/CSR.hpp
@@ -154,11 +154,12 @@ CSR< Device, Index >::
 forSegments( IndexType first, IndexType last, Function& f, Args... args ) const
 {
    const auto offsetsView = this->offsets.getConstView();
-   auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable {
-      const IndexType begin = offsetsView[ i ];
-      const IndexType end = offsetsView[ i + 1 ];
-      for( IndexType j = begin; j < end; j++  )
-         if( ! f( i, j, args... ) )
+   auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable {
+      const IndexType begin = offsetsView[ segmentIdx ];
+      const IndexType end = offsetsView[ segmentIdx + 1 ];
+      IndexType localIdx( 0 );
+      for( IndexType globalIdx = begin; globalIdx < end; globalIdx++  )
+         if( ! f( segmentIdx, localIdx++, globalIdx, args... ) )
             break;
    };
    Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
diff --git a/src/TNL/Containers/Segments/Ellpack.hpp b/src/TNL/Containers/Segments/Ellpack.hpp
index d3d90be5e..337009e99 100644
--- a/src/TNL/Containers/Segments/Ellpack.hpp
+++ b/src/TNL/Containers/Segments/Ellpack.hpp
@@ -192,11 +192,12 @@ forSegments( IndexType first, IndexType last, Function& f, Args... args ) const
    if( RowMajorOrder )
    {
       const IndexType segmentSize = this->segmentSize;
-      auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable {
-         const IndexType begin = i * segmentSize;
+      auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable {
+         const IndexType begin = segmentIdx * segmentSize;
          const IndexType end = begin + segmentSize;
-         for( IndexType j = begin; j < end; j++  )
-            if( ! f( i, j, args... ) )
+         IndexType localIdx( 0 );
+         for( IndexType globalIdx = begin; globalIdx < end; globalIdx++  )
+            if( ! f( segmentIdx, localIdx++, globalIdx,  args... ) )
                break;
       };
       Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
@@ -205,11 +206,12 @@ forSegments( IndexType first, IndexType last, Function& f, Args... args ) const
    {
       const IndexType storageSize = this->getStorageSize();
       const IndexType alignedSize = this->alignedSize;
-      auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable {
-         const IndexType begin = i;
+      auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable {
+         const IndexType begin = segmentIdx;
          const IndexType end = storageSize;
-         for( IndexType j = begin; j < end; j += alignedSize )
-            if( ! f( i, j, args... ) )
+         IndexType localIdx( 0 );
+         for( IndexType globalIdx = begin; globalIdx < end; globalIdx += alignedSize )
+            if( ! f( segmentIdx, localIdx++, globalIdx, args... ) )
                break;
       };
       Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
diff --git a/src/TNL/Containers/Segments/SlicedEllpack.hpp b/src/TNL/Containers/Segments/SlicedEllpack.hpp
index c8e74ec59..d721edb00 100644
--- a/src/TNL/Containers/Segments/SlicedEllpack.hpp
+++ b/src/TNL/Containers/Segments/SlicedEllpack.hpp
@@ -225,8 +225,9 @@ forSegments( IndexType first, IndexType last, Function& f, Args... args ) const
          const IndexType segmentSize = sliceSegmentSizes_view[ sliceIdx ];
          const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx * segmentSize;
          const IndexType end = begin + segmentSize;
+         IndexType localIdx( 0 );
          for( IndexType globalIdx = begin; globalIdx < end; globalIdx++  )
-            if( ! f( segmentIdx, globalIdx, args... ) )
+            if( ! f( segmentIdx, localIdx++, globalIdx, args... ) )
                break;
       };
       Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
@@ -239,8 +240,9 @@ forSegments( IndexType first, IndexType last, Function& f, Args... args ) const
          const IndexType segmentSize = sliceSegmentSizes_view[ sliceIdx ];
          const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx;
          const IndexType end = sliceOffsets_view[ sliceIdx + 1 ];
+         IndexType localIdx( 0 );
          for( IndexType globalIdx = begin; globalIdx < end; globalIdx += SliceSize )
-            if( ! f( segmentIdx, globalIdx, args... ) )
+            if( ! f( segmentIdx, localIdx++, globalIdx, args... ) )
                break;
       };
       Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
diff --git a/src/TNL/Matrices/Matrix.h b/src/TNL/Matrices/Matrix.h
index a877fd5c2..4a038eb2e 100644
--- a/src/TNL/Matrices/Matrix.h
+++ b/src/TNL/Matrices/Matrix.h
@@ -39,7 +39,7 @@ public:
    using RealAllocatorType = RealAllocator;
 
    Matrix( const RealAllocatorType& allocator = RealAllocatorType() );
-   
+
    Matrix( const IndexType rows,
            const IndexType columns,
            const RealAllocatorType& allocator = RealAllocatorType() );
@@ -100,9 +100,9 @@ public:
 
    virtual Real getElement( const IndexType row,
                             const IndexType column ) const = 0;
-   
+
    const ValuesVector& getValues() const;
-   
+
    ValuesVector& getValues();
 
    // TODO: parallelize and optimize for sparse matrices
@@ -137,7 +137,8 @@ public:
    __cuda_callable__
    Index getValuesSize() const;
 
-   protected:
+   // TODO: restore this
+   //protected:
 
    IndexType rows, columns, numberOfColors;
 
diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h
index 268fba6d3..b6a618e10 100644
--- a/src/TNL/Matrices/SparseMatrix.h
+++ b/src/TNL/Matrices/SparseMatrix.h
@@ -39,7 +39,7 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
       using ConstRowsCapacitiesView = typename RowsCapacitiesView::ConstViewType;
       using ValuesVectorType = typename Matrix< Real, Device, Index, RealAllocator >::ValuesVector;
       using ColumnsVectorType = Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocatorType >;
-      
+
       // TODO: remove this - it is here only for compatibility with original matrix implementation
       typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector;
       typedef Containers::VectorView< IndexType, DeviceType, IndexType > CompressedRowLengthsVectorView;
@@ -64,6 +64,9 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
 
       void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths );
 
+      template< typename Vector >
+      void getCompressedRowLengths( Vector& rowLengths ) const;
+
       IndexType getRowLength( const IndexType row ) const;
 
       __cuda_callable__
@@ -167,10 +170,16 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
 
       template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
       void rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
-      
+
       template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
       void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
 
+      template< typename Function >
+      void forRows( IndexType first, IndexType last, Function& function ) const;
+
+      template< typename Function >
+      void forAllRows( Function& function ) const;
+
       template< typename Vector1, typename Vector2 >
       bool performSORIteration( const Vector1& b,
                                 const IndexType row,
@@ -201,7 +210,9 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
 
       __cuda_callable__
       IndexType getPaddingIndex() const;
-   protected:
+
+// TODO: restore it and also in Matrix
+//   protected:
 
       ColumnsVectorType columnIndexes;
 
@@ -210,6 +221,8 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
       IndexAllocator indexAlloctor;
 
       RealAllocator realAllocator;
+
+
 };
 
 }  // namespace Conatiners
diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp
index e26693f6a..3605daaef 100644
--- a/src/TNL/Matrices/SparseMatrix.hpp
+++ b/src/TNL/Matrices/SparseMatrix.hpp
@@ -116,6 +116,32 @@ setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths )
    this->columnIndexes = this->getPaddingIndex();
 }
 
+template< typename Real,
+          template< typename, typename > class Segments,
+          typename Device,
+          typename Index,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename Vector >
+void
+SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+getCompressedRowLengths( Vector& rowLengths ) const
+{
+   rowLengths.setSize( this->getRows() );
+   rowLengths = 0;
+   auto rowLengths_view = rowLengths.getView();
+   auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, const RealType& value ) -> IndexType {
+      return ( value != 0.0 );
+   };
+   auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) {
+      aux += a;
+   };
+   auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable {
+      rowLengths_view[ rowIdx ] = value;
+   };
+   this->allRowsReduction( fetch, reduce, keep, 0 );
+}
+
 template< typename Real,
           template< typename, typename > class Segments,
           typename Device,
@@ -554,6 +580,43 @@ allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zer
    this->rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero );
 }
 
+template< typename Real,
+          template< typename, typename > class Segments,
+          typename Device,
+          typename Index,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename Function >
+void
+SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+forRows( IndexType first, IndexType last, Function& function ) const
+{
+   const auto columns_view = this->columnIndexes.getConstView();
+   const auto values_view = this->values.getConstView();
+   const IndexType paddingIndex_ = this->getPaddingIndex();
+   auto fetch_ = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx ) mutable -> decltype( fetch( IndexType(), IndexType(), RealType() ) ) {
+      IndexType columnIdx = columns_view[ globalIdx ];
+      if( columnIdx != paddingIndex_ )
+         return fetch( rowIdx, columnIdx, values_view[ globalIdx ] );
+      return zero;
+   };
+   this->segments.segmentsReduction( first, last, fetch_, reduce, keep, zero );
+
+}
+
+template< typename Real,
+          template< typename, typename > class Segments,
+          typename Device,
+          typename Index,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename Function >
+void
+SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+forAllRows( Function& function ) const
+{
+   this->forRows( 0, this->getRows(), function );
+}
 
 /*template< typename Real,
           template< typename, typename > class Segments,
@@ -638,12 +701,31 @@ SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >&
 SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
 operator=( const SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2, IndexAllocator2 >& matrix )
 {
+   using RHSMatrixType = SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2, IndexAllocator2 >;
    if( std::is_same< Device, Device2 >::value )
    {
-      
+      RowsCapacitiesType rowLengths;
+      matrix.getCompressedRowLengths( rowLengths );
+      this->setCompressedRowLengths( rowLengths );
+      // TODO: Replace this with SparseMatrixView
+      const auto matrix_columns_view = matrix.columnIndexes.getConstView();
+      const auto matrix_values_view = matrix.values.getConstView();
+      const auto segments_view = this->segments.getConstView();
+      auto this_columns_view = this->columnIndexes.getView();
+      auto this_values_view = this->values.getView();
+      const IndexType paddingIndex = this->getPaddingIndex();
+      auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx ) {
+         const IndexType column = matrix_columns_view[ globalIdx ];
+         if( column != paddingIndex )
+         {
+            const RealType value = matrix_values_view[ globalIdx ];
+            IndexType thisGlobalIdx = segments_view.getGlobalIdx( rowIdx, localIdx );
+            this_columns_view[ thisGlobalIdx ] = column;
+            this_values_view[ thisGlobalIdx ] = value;
+         }
+      };
+      matrix.forAllRows( f );
    }
-      
-   
 }
 
 template< typename Real,
diff --git a/src/UnitTests/Matrices/SparseMatrixTest.hpp b/src/UnitTests/Matrices/SparseMatrixTest.hpp
index b366f4e2f..07a60178f 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest.hpp
+++ b/src/UnitTests/Matrices/SparseMatrixTest.hpp
@@ -18,7 +18,7 @@
 #include <TNL/Matrices/AdEllpack.h>
 #include <TNL/Matrices/BiEllpack.h>
 
-#ifdef HAVE_GTEST 
+#ifdef HAVE_GTEST
 #include <gtest/gtest.h>
 
 template< typename MatrixHostFloat, typename MatrixHostInt >
@@ -36,7 +36,7 @@ void cuda_test_GetType()
     bool testRan = false;
     EXPECT_TRUE( testRan );
     std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n";
-    std::cerr << "This test has not been implemented properly yet.\n" << std::endl;    
+    std::cerr << "This test has not been implemented properly yet.\n" << std::endl;
 }
 
 template< typename Matrix >
@@ -45,13 +45,13 @@ void test_SetDimensions()
     using RealType = typename Matrix::RealType;
     using DeviceType = typename Matrix::DeviceType;
     using IndexType = typename Matrix::IndexType;
-    
+
     const IndexType rows = 9;
     const IndexType cols = 8;
-    
+
     Matrix m;
     m.setDimensions( rows, cols );
-    
+
     EXPECT_EQ( m.getRows(), 9 );
     EXPECT_EQ( m.getColumns(), 8 );
 }
@@ -62,41 +62,41 @@ void test_SetCompressedRowLengths()
     using RealType = typename Matrix::RealType;
     using DeviceType = typename Matrix::DeviceType;
     using IndexType = typename Matrix::IndexType;
-    
+
     const IndexType rows = 10;
     const IndexType cols = 11;
-    
+
     Matrix m;
     m.reset();
     m.setDimensions( rows, cols );
     typename Matrix::CompressedRowLengthsVector rowLengths;
     rowLengths.setSize( rows );
     rowLengths.setValue( 3 );
-    
+
     IndexType rowLength = 1;
     for( IndexType i = 2; i < rows; i++ )
         rowLengths.setElement( i, rowLength++ );
-    
+
     m.setCompressedRowLengths( rowLengths );
-    
+
     // Insert values into the rows.
     RealType value = 1;
-    
+
     for( IndexType i = 0; i < 3; i++ )      // 0th row
         m.setElement( 0, i, value++ );
-    
+
     for( IndexType i = 0; i < 3; i++ )      // 1st row
         m.setElement( 1, i, value++ );
-    
+
     for( IndexType i = 0; i < 1; i++ )      // 2nd row
         m.setElement( 2, i, value++ );
-    
+
     for( IndexType i = 0; i < 2; i++ )      // 3rd row
         m.setElement( 3, i, value++ );
-        
+
     for( IndexType i = 0; i < 3; i++ )      // 4th row
         m.setElement( 4, i, value++ );
-        
+
     for( IndexType i = 0; i < 4; i++ )      // 5th row
         m.setElement( 5, i, value++ );
 
@@ -111,8 +111,8 @@ void test_SetCompressedRowLengths()
 
     for( IndexType i = 0; i < 8; i++ )      // 9th row
         m.setElement( 9, i, value++ );
-    
-    
+
+
     EXPECT_EQ( m.getNonZeroRowLength( 0 ), 3 );
     EXPECT_EQ( m.getNonZeroRowLength( 1 ), 3 );
     EXPECT_EQ( m.getNonZeroRowLength( 2 ), 1 );
@@ -131,21 +131,21 @@ void test_SetLike()
     using RealType = typename Matrix1::RealType;
     using DeviceType = typename Matrix1::DeviceType;
     using IndexType = typename Matrix1::IndexType;
-        
+
     const IndexType rows = 8;
     const IndexType cols = 7;
-    
+
     Matrix1 m1;
     m1.reset();
     m1.setDimensions( rows + 1, cols + 2 );
-    
+
     Matrix2 m2;
     m2.reset();
     m2.setDimensions( rows, cols );
-    
+
     m1.setLike( m2 );
-    
-    
+
+
     EXPECT_EQ( m1.getRows(), m2.getRows() );
     EXPECT_EQ( m1.getColumns(), m2.getColumns() );
 }
@@ -174,7 +174,7 @@ void test_GetNumberOfNonzeroMatrixElements()
 
    const IndexType rows = 10;
    const IndexType cols = 10;
-    
+
    Matrix m;
    m.reset();
 
@@ -225,7 +225,7 @@ void test_Reset()
     using RealType = typename Matrix::RealType;
     using DeviceType = typename Matrix::DeviceType;
     using IndexType = typename Matrix::IndexType;
-    
+
 /*
  * Sets up the following 5x4 sparse matrix:
  *
@@ -235,16 +235,16 @@ void test_Reset()
  *    |  0  0  0  0 |
  *    \  0  0  0  0 /
  */
-    
+
     const IndexType rows = 5;
     const IndexType cols = 4;
-    
+
     Matrix m;
     m.setDimensions( rows, cols );
-    
+
     m.reset();
-    
-    
+
+
     EXPECT_EQ( m.getRows(), 0 );
     EXPECT_EQ( m.getColumns(), 0 );
 }
@@ -255,7 +255,7 @@ void test_SetElement()
     using RealType = typename Matrix::RealType;
     using DeviceType = typename Matrix::DeviceType;
     using IndexType = typename Matrix::IndexType;
-    
+
 /*
  * Sets up the following 10x10 sparse matrix:
  *
@@ -270,15 +270,15 @@ void test_SetElement()
  *    | 22 23 24 25 26 27 28 29 30 31  |
  *    \ 32 33 34 35 36 37 38 39 40 41 /
  */
-    
+
     const IndexType rows = 10;
     const IndexType cols = 10;
-    
+
     Matrix m;
     m.reset();
-    
+
     m.setDimensions( rows, cols );
-    
+
     typename Matrix::CompressedRowLengthsVector rowLengths;
     rowLengths.setSize( rows );
     rowLengths.setElement( 0, 4 );
@@ -292,29 +292,29 @@ void test_SetElement()
     rowLengths.setElement( 8, 10 );
     rowLengths.setElement( 9, 10 );
     m.setCompressedRowLengths( rowLengths );
-    
+
     RealType value = 1;
     for( IndexType i = 0; i < 4; i++ )
         m.setElement( 0, 2 * i, value++ );
-    
+
     for( IndexType i = 0; i < 3; i++ )
         m.setElement( 1, i, value++ );
-    
+
     for( IndexType i = 0; i < 8; i++ )
         m.setElement( 2, i, value++ );
-    
+
     for( IndexType i = 0; i < 2; i++ )
         m.setElement( 3, i, value++ );
-    
+
     for( IndexType i = 4; i < 8; i++ )
         m.setElement( i, 0, value++ );
-    
+
     for( IndexType j = 8; j < rows; j++)
     {
         for( IndexType i = 0; i < cols; i++ )
             m.setElement( j, i, value++ );
     }
-    
+
     EXPECT_EQ( m.getElement( 0, 0 ),  1 );
     EXPECT_EQ( m.getElement( 0, 1 ),  0 );
     EXPECT_EQ( m.getElement( 0, 2 ),  2 );
@@ -325,7 +325,7 @@ void test_SetElement()
     EXPECT_EQ( m.getElement( 0, 7 ),  0 );
     EXPECT_EQ( m.getElement( 0, 8 ),  0 );
     EXPECT_EQ( m.getElement( 0, 9 ),  0 );
-    
+
     EXPECT_EQ( m.getElement( 1, 0 ),  5 );
     EXPECT_EQ( m.getElement( 1, 1 ),  6 );
     EXPECT_EQ( m.getElement( 1, 2 ),  7 );
@@ -336,7 +336,7 @@ void test_SetElement()
     EXPECT_EQ( m.getElement( 1, 7 ),  0 );
     EXPECT_EQ( m.getElement( 1, 8 ),  0 );
     EXPECT_EQ( m.getElement( 1, 9 ),  0 );
-    
+
     EXPECT_EQ( m.getElement( 2, 0 ),  8 );
     EXPECT_EQ( m.getElement( 2, 1 ),  9 );
     EXPECT_EQ( m.getElement( 2, 2 ), 10 );
@@ -347,7 +347,7 @@ void test_SetElement()
     EXPECT_EQ( m.getElement( 2, 7 ), 15 );
     EXPECT_EQ( m.getElement( 2, 8 ),  0 );
     EXPECT_EQ( m.getElement( 2, 9 ),  0 );
-    
+
     EXPECT_EQ( m.getElement( 3, 0 ), 16 );
     EXPECT_EQ( m.getElement( 3, 1 ), 17 );
     EXPECT_EQ( m.getElement( 3, 2 ),  0 );
@@ -358,7 +358,7 @@ void test_SetElement()
     EXPECT_EQ( m.getElement( 3, 7 ),  0 );
     EXPECT_EQ( m.getElement( 3, 8 ),  0 );
     EXPECT_EQ( m.getElement( 3, 9 ),  0 );
-    
+
     EXPECT_EQ( m.getElement( 4, 0 ), 18 );
     EXPECT_EQ( m.getElement( 4, 1 ),  0 );
     EXPECT_EQ( m.getElement( 4, 2 ),  0 );
@@ -369,7 +369,7 @@ void test_SetElement()
     EXPECT_EQ( m.getElement( 4, 7 ),  0 );
     EXPECT_EQ( m.getElement( 4, 8 ),  0 );
     EXPECT_EQ( m.getElement( 4, 9 ),  0 );
-    
+
     EXPECT_EQ( m.getElement( 5, 0 ), 19 );
     EXPECT_EQ( m.getElement( 5, 1 ),  0 );
     EXPECT_EQ( m.getElement( 5, 2 ),  0 );
@@ -380,7 +380,7 @@ void test_SetElement()
     EXPECT_EQ( m.getElement( 5, 7 ),  0 );
     EXPECT_EQ( m.getElement( 5, 8 ),  0 );
     EXPECT_EQ( m.getElement( 5, 9 ),  0 );
-    
+
     EXPECT_EQ( m.getElement( 6, 0 ), 20 );
     EXPECT_EQ( m.getElement( 6, 1 ),  0 );
     EXPECT_EQ( m.getElement( 6, 2 ),  0 );
@@ -391,7 +391,7 @@ void test_SetElement()
     EXPECT_EQ( m.getElement( 6, 7 ),  0 );
     EXPECT_EQ( m.getElement( 6, 8 ),  0 );
     EXPECT_EQ( m.getElement( 6, 9 ),  0 );
-    
+
     EXPECT_EQ( m.getElement( 7, 0 ), 21 );
     EXPECT_EQ( m.getElement( 7, 1 ),  0 );
     EXPECT_EQ( m.getElement( 7, 2 ),  0 );
@@ -402,7 +402,7 @@ void test_SetElement()
     EXPECT_EQ( m.getElement( 7, 7 ),  0 );
     EXPECT_EQ( m.getElement( 7, 8 ),  0 );
     EXPECT_EQ( m.getElement( 7, 9 ),  0 );
-    
+
     EXPECT_EQ( m.getElement( 8, 0 ), 22 );
     EXPECT_EQ( m.getElement( 8, 1 ), 23 );
     EXPECT_EQ( m.getElement( 8, 2 ), 24 );
@@ -413,7 +413,7 @@ void test_SetElement()
     EXPECT_EQ( m.getElement( 8, 7 ), 29 );
     EXPECT_EQ( m.getElement( 8, 8 ), 30 );
     EXPECT_EQ( m.getElement( 8, 9 ), 31 );
-    
+
     EXPECT_EQ( m.getElement( 9, 0 ), 32 );
     EXPECT_EQ( m.getElement( 9, 1 ), 33 );
     EXPECT_EQ( m.getElement( 9, 2 ), 34 );
@@ -432,7 +432,7 @@ void test_AddElement()
     using RealType = typename Matrix::RealType;
     using DeviceType = typename Matrix::DeviceType;
     using IndexType = typename Matrix::IndexType;
-    
+
 /*
  * Sets up the following 6x5 sparse matrix:
  *
@@ -443,10 +443,10 @@ void test_AddElement()
  *    |  0 11  0  0  0 |
  *    \  0  0  0 12  0 /
  */
-    
+
     const IndexType rows = 6;
     const IndexType cols = 5;
-    
+
     Matrix m;
     m.reset();
     m.setDimensions( rows, cols );
@@ -454,61 +454,61 @@ void test_AddElement()
     rowLengths.setSize( rows );
     rowLengths.setValue( 3 );
     m.setCompressedRowLengths( rowLengths );
-    
+
     RealType value = 1;
     for( IndexType i = 0; i < cols - 2; i++ )     // 0th row
         m.setElement( 0, i, value++ );
-    
+
     for( IndexType i = 1; i < cols - 1; i++ )     // 1st row
         m.setElement( 1, i, value++ );
-        
+
     for( IndexType i = 2; i < cols; i++ )         // 2nd row
         m.setElement( 2, i, value++ );
-        
+
     m.setElement( 3, 0, value++ );      // 3rd row
-     
+
     m.setElement( 4, 1, value++ );      // 4th row
- 
+
     m.setElement( 5, 3, value++ );      // 5th row
-    
-        
+
+
     // Check the set elements
     EXPECT_EQ( m.getElement( 0, 0 ),  1 );
     EXPECT_EQ( m.getElement( 0, 1 ),  2 );
     EXPECT_EQ( m.getElement( 0, 2 ),  3 );
     EXPECT_EQ( m.getElement( 0, 3 ),  0 );
     EXPECT_EQ( m.getElement( 0, 4 ),  0 );
-    
+
     EXPECT_EQ( m.getElement( 1, 0 ),  0 );
     EXPECT_EQ( m.getElement( 1, 1 ),  4 );
     EXPECT_EQ( m.getElement( 1, 2 ),  5 );
     EXPECT_EQ( m.getElement( 1, 3 ),  6 );
     EXPECT_EQ( m.getElement( 1, 4 ),  0 );
-    
+
     EXPECT_EQ( m.getElement( 2, 0 ),  0 );
     EXPECT_EQ( m.getElement( 2, 1 ),  0 );
     EXPECT_EQ( m.getElement( 2, 2 ),  7 );
     EXPECT_EQ( m.getElement( 2, 3 ),  8 );
     EXPECT_EQ( m.getElement( 2, 4 ),  9 );
-    
+
     EXPECT_EQ( m.getElement( 3, 0 ), 10 );
     EXPECT_EQ( m.getElement( 3, 1 ),  0 );
     EXPECT_EQ( m.getElement( 3, 2 ),  0 );
     EXPECT_EQ( m.getElement( 3, 3 ),  0 );
     EXPECT_EQ( m.getElement( 3, 4 ),  0 );
-    
+
     EXPECT_EQ( m.getElement( 4, 0 ),  0 );
     EXPECT_EQ( m.getElement( 4, 1 ), 11 );
     EXPECT_EQ( m.getElement( 4, 2 ),  0 );
     EXPECT_EQ( m.getElement( 4, 3 ),  0 );
     EXPECT_EQ( m.getElement( 4, 4 ),  0 );
-    
+
     EXPECT_EQ( m.getElement( 5, 0 ),  0 );
     EXPECT_EQ( m.getElement( 5, 1 ),  0 );
     EXPECT_EQ( m.getElement( 5, 2 ),  0 );
     EXPECT_EQ( m.getElement( 5, 3 ), 12 );
     EXPECT_EQ( m.getElement( 5, 4 ),  0 );
-    
+
     // Add new elements to the old elements with a multiplying factor applied to the old elements.
 
 /*
@@ -521,7 +521,7 @@ void test_AddElement()
  *    |  0 11  0  0  0 |
  *    \  0  0  0 12  0 /
  */
-    
+
 /*
  * The following setup results in the following 6x5 sparse matrix:
  *
@@ -532,57 +532,57 @@ void test_AddElement()
  *    |  0 35 14 15  0 |
  *    \  0  0 16 41 18 /
  */
-    
+
     RealType newValue = 1;
     for( IndexType i = 0; i < cols - 2; i++ )         // 0th row
         m.addElement( 0, i, newValue++, 2.0 );
-    
+
     for( IndexType i = 1; i < cols - 1; i++ )         // 1st row
         m.addElement( 1, i, newValue++, 2.0 );
-        
+
     for( IndexType i = 2; i < cols; i++ )             // 2nd row
         m.addElement( 2, i, newValue++, 2.0 );
-        
+
     for( IndexType i = 0; i < cols - 2; i++ )         // 3rd row
         m.addElement( 3, i, newValue++, 2.0 );
-    
+
     for( IndexType i = 1; i < cols - 1; i++ )         // 4th row
         m.addElement( 4, i, newValue++, 2.0 );
-    
+
     for( IndexType i = 2; i < cols; i++ )             // 5th row
         m.addElement( 5, i, newValue++, 2.0 );
-    
-    
+
+
     EXPECT_EQ( m.getElement( 0, 0 ),  3 );
     EXPECT_EQ( m.getElement( 0, 1 ),  6 );
     EXPECT_EQ( m.getElement( 0, 2 ),  9 );
     EXPECT_EQ( m.getElement( 0, 3 ),  0 );
     EXPECT_EQ( m.getElement( 0, 4 ),  0 );
-    
+
     EXPECT_EQ( m.getElement( 1, 0 ),  0 );
     EXPECT_EQ( m.getElement( 1, 1 ), 12 );
     EXPECT_EQ( m.getElement( 1, 2 ), 15 );
     EXPECT_EQ( m.getElement( 1, 3 ), 18 );
     EXPECT_EQ( m.getElement( 1, 4 ),  0 );
-    
+
     EXPECT_EQ( m.getElement( 2, 0 ),  0 );
     EXPECT_EQ( m.getElement( 2, 1 ),  0 );
     EXPECT_EQ( m.getElement( 2, 2 ), 21 );
     EXPECT_EQ( m.getElement( 2, 3 ), 24 );
     EXPECT_EQ( m.getElement( 2, 4 ), 27 );
-    
+
     EXPECT_EQ( m.getElement( 3, 0 ), 30 );
     EXPECT_EQ( m.getElement( 3, 1 ), 11 );
     EXPECT_EQ( m.getElement( 3, 2 ), 12 );
     EXPECT_EQ( m.getElement( 3, 3 ),  0 );
     EXPECT_EQ( m.getElement( 3, 4 ),  0 );
-    
+
     EXPECT_EQ( m.getElement( 4, 0 ),  0 );
     EXPECT_EQ( m.getElement( 4, 1 ), 35 );
     EXPECT_EQ( m.getElement( 4, 2 ), 14 );
     EXPECT_EQ( m.getElement( 4, 3 ), 15 );
     EXPECT_EQ( m.getElement( 4, 4 ),  0 );
-    
+
     EXPECT_EQ( m.getElement( 5, 0 ),  0 );
     EXPECT_EQ( m.getElement( 5, 1 ),  0 );
     EXPECT_EQ( m.getElement( 5, 2 ), 16 );
@@ -596,7 +596,7 @@ void test_SetRow()
     using RealType = typename Matrix::RealType;
     using DeviceType = typename Matrix::DeviceType;
     using IndexType = typename Matrix::IndexType;
-    
+
 /*
  * Sets up the following 3x7 sparse matrix:
  *
@@ -604,10 +604,10 @@ void test_SetRow()
  *    |  2  2  2  0  0  0  0 |
  *    \  3  3  3  0  0  0  0 /
  */
-    
+
     const IndexType rows = 3;
     const IndexType cols = 7;
-    
+
     Matrix m;
     m.reset();
     m.setDimensions( rows, cols );
@@ -616,7 +616,7 @@ void test_SetRow()
     rowLengths.setValue( 6 );
     rowLengths.setElement( 1, 3 );
     m.setCompressedRowLengths( rowLengths );
-    
+
     RealType value = 1;
     for( IndexType i = 0; i < 3; i++ )
     {
@@ -624,19 +624,19 @@ void test_SetRow()
         m.setElement( 1, i, value + 1 );
         m.setElement( 2, i, value + 2 );
     }
-    
+
     RealType row1 [ 3 ] = { 11, 11, 11 }; IndexType colIndexes1 [ 3 ] = { 0, 1, 2 };
     RealType row2 [ 3 ] = { 22, 22, 22 }; IndexType colIndexes2 [ 3 ] = { 0, 1, 2 };
     RealType row3 [ 3 ] = { 33, 33, 33 }; IndexType colIndexes3 [ 3 ] = { 3, 4, 5 };
-    
+
     RealType row = 0;
     IndexType elements = 3;
-    
+
     m.setRow( row++, colIndexes1, row1, elements );
     m.setRow( row++, colIndexes2, row2, elements );
     m.setRow( row++, colIndexes3, row3, elements );
-    
-    
+
+
     EXPECT_EQ( m.getElement( 0, 0 ), 11 );
     EXPECT_EQ( m.getElement( 0, 1 ), 11 );
     EXPECT_EQ( m.getElement( 0, 2 ), 11 );
@@ -644,7 +644,7 @@ void test_SetRow()
     EXPECT_EQ( m.getElement( 0, 4 ),  0 );
     EXPECT_EQ( m.getElement( 0, 5 ),  0 );
     EXPECT_EQ( m.getElement( 0, 6 ),  0 );
-    
+
     EXPECT_EQ( m.getElement( 1, 0 ), 22 );
     EXPECT_EQ( m.getElement( 1, 1 ), 22 );
     EXPECT_EQ( m.getElement( 1, 2 ), 22 );
@@ -652,7 +652,7 @@ void test_SetRow()
     EXPECT_EQ( m.getElement( 1, 4 ),  0 );
     EXPECT_EQ( m.getElement( 1, 5 ),  0 );
     EXPECT_EQ( m.getElement( 1, 6 ),  0 );
-    
+
     EXPECT_EQ( m.getElement( 2, 0 ),  0 );
     EXPECT_EQ( m.getElement( 2, 1 ),  0 );
     EXPECT_EQ( m.getElement( 2, 2 ),  0 );
@@ -669,7 +669,7 @@ void test_VectorProduct()
     using DeviceType = typename Matrix::DeviceType;
     using IndexType = typename Matrix::IndexType;
     using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >;
-    
+
 /*
  * Sets up the following 4x4 sparse matrix:
  *
@@ -678,10 +678,10 @@ void test_VectorProduct()
  *    |  0  4  0  0 |
  *    \  0  0  5  0 /
  */
-    
+
     const IndexType m_rows_1 = 4;
     const IndexType m_cols_1 = 4;
-    
+
     Matrix m_1;
     m_1.reset();
     m_1.setDimensions( m_rows_1, m_cols_1 );
@@ -692,37 +692,37 @@ void test_VectorProduct()
     rowLengths_1.setElement( 2, 1 );
     rowLengths_1.setElement( 3, 1 );
     m_1.setCompressedRowLengths( rowLengths_1 );
-    
+
     RealType value_1 = 1;
     m_1.setElement( 0, 0, value_1++ );      // 0th row
-    
+
     m_1.setElement( 1, 1, value_1++ );      // 1st row
     m_1.setElement( 1, 3, value_1++ );
-        
+
     m_1.setElement( 2, 1, value_1++ );      // 2nd row
-        
+
     m_1.setElement( 3, 2, value_1++ );      // 3rd row
-    
+
     VectorType inVector_1;
     inVector_1.setSize( m_cols_1 );
-    for( IndexType i = 0; i < inVector_1.getSize(); i++ )        
+    for( IndexType i = 0; i < inVector_1.getSize(); i++ )
         inVector_1.setElement( i, 2 );
 
-    VectorType outVector_1;  
+    VectorType outVector_1;
     outVector_1.setSize( m_rows_1 );
     for( IndexType j = 0; j < outVector_1.getSize(); j++ )
         outVector_1.setElement( j, 0 );
- 
-    
+
+
     m_1.vectorProduct( inVector_1, outVector_1 );
-    
-   
+
+
     EXPECT_EQ( outVector_1.getElement( 0 ),  2 );
     EXPECT_EQ( outVector_1.getElement( 1 ), 10 );
     EXPECT_EQ( outVector_1.getElement( 2 ),  8 );
     EXPECT_EQ( outVector_1.getElement( 3 ), 10 );
-    
-    
+
+
 /*
  * Sets up the following 4x4 sparse matrix:
  *
@@ -731,10 +731,10 @@ void test_VectorProduct()
  *    |  5  6  7  0 |
  *    \  0  8  0  0 /
  */
-    
+
     const IndexType m_rows_2 = 4;
     const IndexType m_cols_2 = 4;
-    
+
     Matrix m_2;
     m_2.reset();
     m_2.setDimensions( m_rows_2, m_cols_2 );
@@ -744,39 +744,39 @@ void test_VectorProduct()
     rowLengths_2.setElement( 1, 1 );
     rowLengths_2.setElement( 3, 1 );
     m_2.setCompressedRowLengths( rowLengths_2 );
-    
+
     RealType value_2 = 1;
     for( IndexType i = 0; i < 3; i++ )   // 0th row
         m_2.setElement( 0, i, value_2++ );
-    
+
     m_2.setElement( 1, 3, value_2++ );      // 1st row
-        
+
     for( IndexType i = 0; i < 3; i++ )   // 2nd row
         m_2.setElement( 2, i, value_2++ );
-        
+
     for( IndexType i = 1; i < 2; i++ )       // 3rd row
         m_2.setElement( 3, i, value_2++ );
-    
+
     VectorType inVector_2;
     inVector_2.setSize( m_cols_2 );
-    for( IndexType i = 0; i < inVector_2.getSize(); i++ )        
+    for( IndexType i = 0; i < inVector_2.getSize(); i++ )
         inVector_2.setElement( i, 2 );
 
-    VectorType outVector_2;  
+    VectorType outVector_2;
     outVector_2.setSize( m_rows_2 );
     for( IndexType j = 0; j < outVector_2.getSize(); j++ )
         outVector_2.setElement( j, 0 );
- 
-    
+
+
     m_2.vectorProduct( inVector_2, outVector_2 );
-    
-   
+
+
     EXPECT_EQ( outVector_2.getElement( 0 ), 12 );
     EXPECT_EQ( outVector_2.getElement( 1 ),  8 );
     EXPECT_EQ( outVector_2.getElement( 2 ), 36 );
     EXPECT_EQ( outVector_2.getElement( 3 ), 16 );
-    
-    
+
+
 /*
  * Sets up the following 4x4 sparse matrix:
  *
@@ -785,10 +785,10 @@ void test_VectorProduct()
  *    |  7  8  9  0 |
  *    \  0 10 11 12 /
  */
-    
+
     const IndexType m_rows_3 = 4;
     const IndexType m_cols_3 = 4;
-    
+
     Matrix m_3;
     m_3.reset();
     m_3.setDimensions( m_rows_3, m_cols_3 );
@@ -796,40 +796,40 @@ void test_VectorProduct()
     rowLengths_3.setSize( m_rows_3 );
     rowLengths_3.setValue( 3 );
     m_3.setCompressedRowLengths( rowLengths_3 );
-    
+
     RealType value_3 = 1;
     for( IndexType i = 0; i < 3; i++ )          // 0th row
         m_3.setElement( 0, i, value_3++ );
-    
+
     for( IndexType i = 1; i < 4; i++ )
         m_3.setElement( 1, i, value_3++ );      // 1st row
-        
+
     for( IndexType i = 0; i < 3; i++ )          // 2nd row
         m_3.setElement( 2, i, value_3++ );
-        
+
     for( IndexType i = 1; i < 4; i++ )          // 3rd row
         m_3.setElement( 3, i, value_3++ );
-    
+
     VectorType inVector_3;
     inVector_3.setSize( m_cols_3 );
-    for( IndexType i = 0; i < inVector_3.getSize(); i++ )        
+    for( IndexType i = 0; i < inVector_3.getSize(); i++ )
         inVector_3.setElement( i, 2 );
 
-    VectorType outVector_3;  
+    VectorType outVector_3;
     outVector_3.setSize( m_rows_3 );
     for( IndexType j = 0; j < outVector_3.getSize(); j++ )
         outVector_3.setElement( j, 0 );
- 
-    
+
+
     m_3.vectorProduct( inVector_3, outVector_3 );
-    
-   
+
+
     EXPECT_EQ( outVector_3.getElement( 0 ), 12 );
     EXPECT_EQ( outVector_3.getElement( 1 ), 30 );
     EXPECT_EQ( outVector_3.getElement( 2 ), 48 );
     EXPECT_EQ( outVector_3.getElement( 3 ), 66 );
-    
-    
+
+
 /*
  * Sets up the following 8x8 sparse matrix:
  *
@@ -842,10 +842,10 @@ void test_VectorProduct()
  *    | 26 27 28 29 30  0  0  0 |
  *    \ 31 32 33 34 35  0  0  0 /
  */
-    
+
     const IndexType m_rows_4 = 8;
     const IndexType m_cols_4 = 8;
-    
+
     Matrix m_4;
     m_4.reset();
     m_4.setDimensions( m_rows_4, m_cols_4 );
@@ -856,48 +856,48 @@ void test_VectorProduct()
     rowLengths_4.setElement( 6, 5 );
     rowLengths_4.setElement( 7, 5 );
     m_4.setCompressedRowLengths( rowLengths_4 );
-    
+
     RealType value_4 = 1;
     for( IndexType i = 0; i < 3; i++ )       // 0th row
         m_4.setElement( 0, i, value_4++ );
-    
+
     m_4.setElement( 0, 5, value_4++ );
-    
+
     for( IndexType i = 1; i < 5; i++ )       // 1st row
         m_4.setElement( 1, i, value_4++ );
-    
+
     for( IndexType i = 0; i < 5; i++ )       // 2nd row
         m_4.setElement( 2, i, value_4++ );
-    
+
     for( IndexType i = 1; i < 5; i++ )       // 3rd row
         m_4.setElement( 3, i, value_4++ );
-    
+
     for( IndexType i = 2; i < 6; i++ )       // 4th row
         m_4.setElement( 4, i, value_4++ );
-    
+
     for( IndexType i = 3; i < 7; i++ )       // 5th row
         m_4.setElement( 5, i, value_4++ );
-    
+
     for( IndexType i = 0; i < 5; i++ )       // 6th row
         m_4.setElement( 6, i, value_4++ );
-    
+
     for( IndexType i = 0; i < 5; i++ )       // 7th row
         m_4.setElement( 7, i, value_4++ );
-    
+
     VectorType inVector_4;
     inVector_4.setSize( m_cols_4 );
-    for( IndexType i = 0; i < inVector_4.getSize(); i++ )        
+    for( IndexType i = 0; i < inVector_4.getSize(); i++ )
         inVector_4.setElement( i, 2 );
 
-    VectorType outVector_4;  
+    VectorType outVector_4;
     outVector_4.setSize( m_rows_4 );
     for( IndexType j = 0; j < outVector_4.getSize(); j++ )
         outVector_4.setElement( j, 0 );
-    
-    
+
+
     m_4.vectorProduct( inVector_4, outVector_4 );
-    
-   
+
+
     EXPECT_EQ( outVector_4.getElement( 0 ),  20 );
     EXPECT_EQ( outVector_4.getElement( 1 ),  52 );
     EXPECT_EQ( outVector_4.getElement( 2 ), 110 );
@@ -906,8 +906,8 @@ void test_VectorProduct()
     EXPECT_EQ( outVector_4.getElement( 5 ), 188 );
     EXPECT_EQ( outVector_4.getElement( 6 ), 280 );
     EXPECT_EQ( outVector_4.getElement( 7 ), 330 );
-    
-  
+
+
    /*
     * Sets up the following 8x8 sparse matrix:
     *
@@ -976,7 +976,7 @@ void test_VectorProduct()
     for( IndexType i = 0; i < inVector_5.getSize(); i++ )
         inVector_5.setElement( i, 2 );
 
-    VectorType outVector_5;  
+    VectorType outVector_5;
     outVector_5.setSize( m_rows_5 );
     for( IndexType j = 0; j < outVector_5.getSize(); j++ )
         outVector_5.setElement( j, 0 );
@@ -1077,6 +1077,8 @@ void test_RowsReduction()
    };
    m.allRowsReduction( fetch, reduce, keep, 0 );
    EXPECT_EQ( rowsCapacities, rowLengths );
+   m.getCompressedRowLengths( rowLengths );
+   EXPECT_EQ( rowsCapacities, rowLengths );
 
    ////
    // Compute max norm
@@ -1102,7 +1104,7 @@ void test_PerformSORIteration()
     using RealType = typename Matrix::RealType;
     using DeviceType = typename Matrix::DeviceType;
     using IndexType = typename Matrix::IndexType;
-    
+
 /*
  * Sets up the following 4x4 sparse matrix:
  *
@@ -1111,10 +1113,10 @@ void test_PerformSORIteration()
  *    |  0  1  4  1 |
  *    \  0  0  1  4 /
  */
-    
+
     const IndexType m_rows = 4;
     const IndexType m_cols = 4;
-    
+
     Matrix m;
     m.reset();
     m.setDimensions( m_rows, m_cols );
@@ -1122,54 +1124,54 @@ void test_PerformSORIteration()
     rowLengths.setSize( m_rows );
     rowLengths.setValue( 3 );
     m.setCompressedRowLengths( rowLengths );
-    
+
     m.setElement( 0, 0, 4.0 );        // 0th row
     m.setElement( 0, 1, 1.0);
-        
+
     m.setElement( 1, 0, 1.0 );        // 1st row
     m.setElement( 1, 1, 4.0 );
     m.setElement( 1, 2, 1.0 );
-        
+
     m.setElement( 2, 1, 1.0 );        // 2nd row
     m.setElement( 2, 2, 4.0 );
     m.setElement( 2, 3, 1.0 );
-        
+
     m.setElement( 3, 2, 1.0 );        // 3rd row
     m.setElement( 3, 3, 4.0 );
-    
+
     RealType bVector [ 4 ] = { 1, 1, 1, 1 };
     RealType xVector [ 4 ] = { 1, 1, 1, 1 };
-    
+
     IndexType row = 0;
     RealType omega = 1;
-    
-    
+
+
     m.performSORIteration( bVector, row++, xVector, omega);
-    
+
     EXPECT_EQ( xVector[ 0 ], 0.0 );
     EXPECT_EQ( xVector[ 1 ], 1.0 );
     EXPECT_EQ( xVector[ 2 ], 1.0 );
     EXPECT_EQ( xVector[ 3 ], 1.0 );
-    
-    
+
+
     m.performSORIteration( bVector, row++, xVector, omega);
-    
+
     EXPECT_EQ( xVector[ 0 ], 0.0 );
     EXPECT_EQ( xVector[ 1 ], 0.0 );
     EXPECT_EQ( xVector[ 2 ], 1.0 );
     EXPECT_EQ( xVector[ 3 ], 1.0 );
-    
-    
+
+
     m.performSORIteration( bVector, row++, xVector, omega);
-    
+
     EXPECT_EQ( xVector[ 0 ], 0.0 );
     EXPECT_EQ( xVector[ 1 ], 0.0 );
     EXPECT_EQ( xVector[ 2 ], 0.0 );
     EXPECT_EQ( xVector[ 3 ], 1.0 );
-    
-    
+
+
     m.performSORIteration( bVector, row++, xVector, omega);
-    
+
     EXPECT_EQ( xVector[ 0 ], 0.0 );
     EXPECT_EQ( xVector[ 1 ], 0.0 );
     EXPECT_EQ( xVector[ 2 ], 0.0 );
@@ -1183,7 +1185,7 @@ void test_OperatorEquals()
    using RealType = typename Matrix::RealType;
    using DeviceType = typename Matrix::DeviceType;
    using IndexType = typename Matrix::IndexType;
-   
+
    if( std::is_same< DeviceType, TNL::Devices::Cuda >::value )
        return;
    else
@@ -1229,33 +1231,33 @@ void test_OperatorEquals()
 
         m_host.setElement( 0, 4, value++ );           // 0th row
         m_host.setElement( 0, 5, value++ );
-        
+
         m_host.setElement( 1, 1, value++ );           // 1st row
         m_host.setElement( 1, 3, value++ );
 
         for( IndexType i = 1; i < 3; i++ )            // 2nd row
             m_host.setElement( 2, i, value++ );
-        
+
         m_host.setElement( 2, 4, value++ );           // 2nd row
 
-        
+
         for( IndexType i = 1; i < 5; i++ )            // 3rd row
             m_host.setElement( 3, i, value++ );
 
         m_host.setElement( 4, 1, value++ );           // 4th row
-        
+
         for( IndexType i = 1; i < 7; i++ )            // 5th row
             m_host.setElement( 5, i, value++ );
-        
+
         for( IndexType i = 0; i < 7; i++ )            // 6th row
             m_host.setElement( 6, i, value++ );
-        
+
         for( IndexType i = 0; i < 8; i++ )            // 7th row
             m_host.setElement( 7, i, value++ );
-        
+
         for( IndexType i = 0; i < 7; i++ )            // 1s at the end or rows: 5, 6
             m_host.setElement( i, 7, 1);
-        
+
         EXPECT_EQ( m_host.getElement( 0, 0 ),  1 );
         EXPECT_EQ( m_host.getElement( 0, 1 ),  2 );
         EXPECT_EQ( m_host.getElement( 0, 2 ),  3 );
@@ -1264,7 +1266,7 @@ void test_OperatorEquals()
         EXPECT_EQ( m_host.getElement( 0, 5 ),  5 );
         EXPECT_EQ( m_host.getElement( 0, 6 ),  0 );
         EXPECT_EQ( m_host.getElement( 0, 7 ),  1 );
-        
+
         EXPECT_EQ( m_host.getElement( 1, 0 ),  0 );
         EXPECT_EQ( m_host.getElement( 1, 1 ),  6 );
         EXPECT_EQ( m_host.getElement( 1, 2 ),  0 );
@@ -1273,7 +1275,7 @@ void test_OperatorEquals()
         EXPECT_EQ( m_host.getElement( 1, 5 ),  0 );
         EXPECT_EQ( m_host.getElement( 1, 6 ),  0 );
         EXPECT_EQ( m_host.getElement( 1, 7 ),  1 );
-        
+
         EXPECT_EQ( m_host.getElement( 2, 0 ),  0 );
         EXPECT_EQ( m_host.getElement( 2, 1 ),  8 );
         EXPECT_EQ( m_host.getElement( 2, 2 ),  9 );
@@ -1282,7 +1284,7 @@ void test_OperatorEquals()
         EXPECT_EQ( m_host.getElement( 2, 5 ),  0 );
         EXPECT_EQ( m_host.getElement( 2, 6 ),  0 );
         EXPECT_EQ( m_host.getElement( 2, 7 ),  1 );
-        
+
         EXPECT_EQ( m_host.getElement( 3, 0 ),  0 );
         EXPECT_EQ( m_host.getElement( 3, 1 ), 11 );
         EXPECT_EQ( m_host.getElement( 3, 2 ), 12 );
@@ -1291,7 +1293,7 @@ void test_OperatorEquals()
         EXPECT_EQ( m_host.getElement( 3, 5 ),  0 );
         EXPECT_EQ( m_host.getElement( 3, 6 ),  0 );
         EXPECT_EQ( m_host.getElement( 3, 7 ),  1 );
-        
+
         EXPECT_EQ( m_host.getElement( 4, 0 ),  0 );
         EXPECT_EQ( m_host.getElement( 4, 1 ), 15 );
         EXPECT_EQ( m_host.getElement( 4, 2 ),  0 );
@@ -1300,7 +1302,7 @@ void test_OperatorEquals()
         EXPECT_EQ( m_host.getElement( 4, 5 ),  0 );
         EXPECT_EQ( m_host.getElement( 4, 6 ),  0 );
         EXPECT_EQ( m_host.getElement( 4, 7 ),  1 );
-        
+
         EXPECT_EQ( m_host.getElement( 5, 0 ),  0 );
         EXPECT_EQ( m_host.getElement( 5, 1 ), 16 );
         EXPECT_EQ( m_host.getElement( 5, 2 ), 17 );
@@ -1309,7 +1311,7 @@ void test_OperatorEquals()
         EXPECT_EQ( m_host.getElement( 5, 5 ), 20 );
         EXPECT_EQ( m_host.getElement( 5, 6 ), 21 );
         EXPECT_EQ( m_host.getElement( 5, 7 ),  1 );
-        
+
         EXPECT_EQ( m_host.getElement( 6, 0 ), 22 );
         EXPECT_EQ( m_host.getElement( 6, 1 ), 23 );
         EXPECT_EQ( m_host.getElement( 6, 2 ), 24 );
@@ -1318,7 +1320,7 @@ void test_OperatorEquals()
         EXPECT_EQ( m_host.getElement( 6, 5 ), 27 );
         EXPECT_EQ( m_host.getElement( 6, 6 ), 28 );
         EXPECT_EQ( m_host.getElement( 6, 7 ),  1 );
-        
+
         EXPECT_EQ( m_host.getElement( 7, 0 ), 29 );
         EXPECT_EQ( m_host.getElement( 7, 1 ), 30 );
         EXPECT_EQ( m_host.getElement( 7, 2 ), 31 );
@@ -1348,7 +1350,7 @@ void test_OperatorEquals()
         EXPECT_EQ( m_host.getElement( 0, 5 ),  5 );
         EXPECT_EQ( m_host.getElement( 0, 6 ),  0 );
         EXPECT_EQ( m_host.getElement( 0, 7 ),  1 );
-        
+
         EXPECT_EQ( m_host.getElement( 1, 0 ),  0 );
         EXPECT_EQ( m_host.getElement( 1, 1 ),  6 );
         EXPECT_EQ( m_host.getElement( 1, 2 ),  0 );
@@ -1357,7 +1359,7 @@ void test_OperatorEquals()
         EXPECT_EQ( m_host.getElement( 1, 5 ),  0 );
         EXPECT_EQ( m_host.getElement( 1, 6 ),  0 );
         EXPECT_EQ( m_host.getElement( 1, 7 ),  1 );
-        
+
         EXPECT_EQ( m_host.getElement( 2, 0 ),  0 );
         EXPECT_EQ( m_host.getElement( 2, 1 ),  8 );
         EXPECT_EQ( m_host.getElement( 2, 2 ),  9 );
@@ -1366,7 +1368,7 @@ void test_OperatorEquals()
         EXPECT_EQ( m_host.getElement( 2, 5 ),  0 );
         EXPECT_EQ( m_host.getElement( 2, 6 ),  0 );
         EXPECT_EQ( m_host.getElement( 2, 7 ),  1 );
-        
+
         EXPECT_EQ( m_host.getElement( 3, 0 ),  0 );
         EXPECT_EQ( m_host.getElement( 3, 1 ), 11 );
         EXPECT_EQ( m_host.getElement( 3, 2 ), 12 );
@@ -1375,7 +1377,7 @@ void test_OperatorEquals()
         EXPECT_EQ( m_host.getElement( 3, 5 ),  0 );
         EXPECT_EQ( m_host.getElement( 3, 6 ),  0 );
         EXPECT_EQ( m_host.getElement( 3, 7 ),  1 );
-        
+
         EXPECT_EQ( m_host.getElement( 4, 0 ),  0 );
         EXPECT_EQ( m_host.getElement( 4, 1 ), 15 );
         EXPECT_EQ( m_host.getElement( 4, 2 ),  0 );
@@ -1384,7 +1386,7 @@ void test_OperatorEquals()
         EXPECT_EQ( m_host.getElement( 4, 5 ),  0 );
         EXPECT_EQ( m_host.getElement( 4, 6 ),  0 );
         EXPECT_EQ( m_host.getElement( 4, 7 ),  1 );
-        
+
         EXPECT_EQ( m_host.getElement( 5, 0 ),  0 );
         EXPECT_EQ( m_host.getElement( 5, 1 ), 16 );
         EXPECT_EQ( m_host.getElement( 5, 2 ), 17 );
@@ -1393,7 +1395,7 @@ void test_OperatorEquals()
         EXPECT_EQ( m_host.getElement( 5, 5 ), 20 );
         EXPECT_EQ( m_host.getElement( 5, 6 ), 21 );
         EXPECT_EQ( m_host.getElement( 5, 7 ),  1 );
-        
+
         EXPECT_EQ( m_host.getElement( 6, 0 ), 22 );
         EXPECT_EQ( m_host.getElement( 6, 1 ), 23 );
         EXPECT_EQ( m_host.getElement( 6, 2 ), 24 );
@@ -1402,7 +1404,7 @@ void test_OperatorEquals()
         EXPECT_EQ( m_host.getElement( 6, 5 ), 27 );
         EXPECT_EQ( m_host.getElement( 6, 6 ), 28 );
         EXPECT_EQ( m_host.getElement( 6, 7 ),  1 );
-        
+
         EXPECT_EQ( m_host.getElement( 7, 0 ), 29 );
         EXPECT_EQ( m_host.getElement( 7, 1 ), 30 );
         EXPECT_EQ( m_host.getElement( 7, 2 ), 31 );
@@ -1411,22 +1413,22 @@ void test_OperatorEquals()
         EXPECT_EQ( m_host.getElement( 7, 5 ), 34 );
         EXPECT_EQ( m_host.getElement( 7, 6 ), 35 );
         EXPECT_EQ( m_host.getElement( 7, 7 ), 36 );
-        
+
         // Try vectorProduct with copied cuda matrix to see if it works correctly.
         using VectorType = TNL::Containers::Vector< RealType, TNL::Devices::Cuda, IndexType >;
-    
+
         VectorType inVector;
         inVector.setSize( m_cols );
-        for( IndexType i = 0; i < inVector.getSize(); i++ )        
+        for( IndexType i = 0; i < inVector.getSize(); i++ )
             inVector.setElement( i, 2 );
 
-        VectorType outVector;  
+        VectorType outVector;
         outVector.setSize( m_rows );
         for( IndexType j = 0; j < outVector.getSize(); j++ )
             outVector.setElement( j, 0 );
-        
+
         m_cuda.vectorProduct( inVector, outVector );
-        
+
         EXPECT_EQ( outVector.getElement( 0 ),  32 );
         EXPECT_EQ( outVector.getElement( 1 ),  28 );
         EXPECT_EQ( outVector.getElement( 2 ),  56 );
@@ -1444,7 +1446,7 @@ void test_SaveAndLoad( const char* filename )
    using RealType = typename Matrix::RealType;
    using DeviceType = typename Matrix::DeviceType;
    using IndexType = typename Matrix::IndexType;
-    
+
    /*
     * Sets up the following 4x4 sparse matrix:
     *
@@ -1453,10 +1455,10 @@ void test_SaveAndLoad( const char* filename )
     *    |  6  7  8  0 |
     *    \  0  9 10 11 /
     */
-    
+
     const IndexType m_rows = 4;
     const IndexType m_cols = 4;
-    
+
     Matrix savedMatrix;
     savedMatrix.reset();
     savedMatrix.setDimensions( m_rows, m_cols );
@@ -1464,22 +1466,22 @@ void test_SaveAndLoad( const char* filename )
     rowLengths.setSize( m_rows );
     rowLengths.setValue( 3 );
     savedMatrix.setCompressedRowLengths( rowLengths );
-    
+
     RealType value = 1;
     for( IndexType i = 0; i < m_cols - 1; i++ )   // 0th row
         savedMatrix.setElement( 0, i, value++ );
-        
+
     savedMatrix.setElement( 1, 1, value++ );
     savedMatrix.setElement( 1, 3, value++ );      // 1st row
-        
+
     for( IndexType i = 0; i < m_cols - 1; i++ )   // 2nd row
         savedMatrix.setElement( 2, i, value++ );
-        
+
     for( IndexType i = 1; i < m_cols; i++ )       // 3rd row
         savedMatrix.setElement( 3, i, value++ );
-        
+
     ASSERT_NO_THROW( savedMatrix.save( filename ) );
-    
+
     Matrix loadedMatrix;
     loadedMatrix.reset();
     loadedMatrix.setDimensions( m_rows, m_cols );
@@ -1487,51 +1489,51 @@ void test_SaveAndLoad( const char* filename )
     rowLengths2.setSize( m_rows );
     rowLengths2.setValue( 3 );
     loadedMatrix.setCompressedRowLengths( rowLengths2 );
-    
-    
+
+
     ASSERT_NO_THROW( loadedMatrix.load( filename ) );
-    
-    
+
+
     EXPECT_EQ( savedMatrix.getElement( 0, 0 ), loadedMatrix.getElement( 0, 0 ) );
     EXPECT_EQ( savedMatrix.getElement( 0, 1 ), loadedMatrix.getElement( 0, 1 ) );
     EXPECT_EQ( savedMatrix.getElement( 0, 2 ), loadedMatrix.getElement( 0, 2 ) );
     EXPECT_EQ( savedMatrix.getElement( 0, 3 ), loadedMatrix.getElement( 0, 3 ) );
-    
+
     EXPECT_EQ( savedMatrix.getElement( 1, 0 ), loadedMatrix.getElement( 1, 0 ) );
     EXPECT_EQ( savedMatrix.getElement( 1, 1 ), loadedMatrix.getElement( 1, 1 ) );
     EXPECT_EQ( savedMatrix.getElement( 1, 2 ), loadedMatrix.getElement( 1, 2 ) );
     EXPECT_EQ( savedMatrix.getElement( 1, 3 ), loadedMatrix.getElement( 1, 3 ) );
-    
+
     EXPECT_EQ( savedMatrix.getElement( 2, 0 ), loadedMatrix.getElement( 2, 0 ) );
     EXPECT_EQ( savedMatrix.getElement( 2, 1 ), loadedMatrix.getElement( 2, 1 ) );
     EXPECT_EQ( savedMatrix.getElement( 2, 2 ), loadedMatrix.getElement( 2, 2 ) );
     EXPECT_EQ( savedMatrix.getElement( 2, 3 ), loadedMatrix.getElement( 2, 3 ) );
-    
+
     EXPECT_EQ( savedMatrix.getElement( 3, 0 ), loadedMatrix.getElement( 3, 0 ) );
     EXPECT_EQ( savedMatrix.getElement( 3, 1 ), loadedMatrix.getElement( 3, 1 ) );
     EXPECT_EQ( savedMatrix.getElement( 3, 2 ), loadedMatrix.getElement( 3, 2 ) );
     EXPECT_EQ( savedMatrix.getElement( 3, 3 ), loadedMatrix.getElement( 3, 3 ) );
-    
+
     EXPECT_EQ( savedMatrix.getElement( 0, 0 ),  1 );
     EXPECT_EQ( savedMatrix.getElement( 0, 1 ),  2 );
     EXPECT_EQ( savedMatrix.getElement( 0, 2 ),  3 );
     EXPECT_EQ( savedMatrix.getElement( 0, 3 ),  0 );
-    
+
     EXPECT_EQ( savedMatrix.getElement( 1, 0 ),  0 );
     EXPECT_EQ( savedMatrix.getElement( 1, 1 ),  4 );
     EXPECT_EQ( savedMatrix.getElement( 1, 2 ),  0 );
     EXPECT_EQ( savedMatrix.getElement( 1, 3 ),  5 );
-    
+
     EXPECT_EQ( savedMatrix.getElement( 2, 0 ),  6 );
     EXPECT_EQ( savedMatrix.getElement( 2, 1 ),  7 );
     EXPECT_EQ( savedMatrix.getElement( 2, 2 ),  8 );
     EXPECT_EQ( savedMatrix.getElement( 2, 3 ),  0 );
-    
+
     EXPECT_EQ( savedMatrix.getElement( 3, 0 ),  0 );
     EXPECT_EQ( savedMatrix.getElement( 3, 1 ),  9 );
     EXPECT_EQ( savedMatrix.getElement( 3, 2 ), 10 );
     EXPECT_EQ( savedMatrix.getElement( 3, 3 ), 11 );
-    
+
     EXPECT_EQ( std::remove( filename ), 0 );
 }
 
@@ -1541,7 +1543,7 @@ void test_Print()
     using RealType = typename Matrix::RealType;
     using DeviceType = typename Matrix::DeviceType;
     using IndexType = typename Matrix::IndexType;
-    
+
 /*
  * Sets up the following 5x4 sparse matrix:
  *
@@ -1551,10 +1553,10 @@ void test_Print()
  *    |  0  8  9 10 |
  *    \  0  0 11 12 /
  */
-    
+
     const IndexType m_rows = 5;
     const IndexType m_cols = 4;
-    
+
     Matrix m;
     m.reset();
     m.setDimensions( m_rows, m_cols );
@@ -1562,40 +1564,40 @@ void test_Print()
     rowLengths.setSize( m_rows );
     rowLengths.setValue( 3 );
     m.setCompressedRowLengths( rowLengths );
-    
+
     RealType value = 1;
     for( IndexType i = 0; i < m_cols - 1; i++ )   // 0th row
         m.setElement( 0, i, value++ );
-    
+
     m.setElement( 1, 3, value++ );      // 1st row
-        
+
     for( IndexType i = 0; i < m_cols - 1; i++ )   // 2nd row
         m.setElement( 2, i, value++ );
-        
+
     for( IndexType i = 1; i < m_cols; i++ )       // 3rd row
         m.setElement( 3, i, value++ );
-        
+
     for( IndexType i = 2; i < m_cols; i++ )       // 4th row
         m.setElement( 4, i, value++ );
-    
+
     #include <sstream>
     std::stringstream printed;
     std::stringstream couted;
-    
+
     //change the underlying buffer and save the old buffer
-    auto old_buf = std::cout.rdbuf(printed.rdbuf()); 
+    auto old_buf = std::cout.rdbuf(printed.rdbuf());
 
     m.print( std::cout ); //all the std::cout goes to ss
 
     std::cout.rdbuf(old_buf); //reset
-    
+
     couted << "Row: 0 ->  Col:0->1	 Col:1->2	 Col:2->3\t\n"
                "Row: 1 ->  Col:3->4\t\n"
                "Row: 2 ->  Col:0->5	 Col:1->6	 Col:2->7\t\n"
                "Row: 3 ->  Col:1->8	 Col:2->9	 Col:3->10\t\n"
                "Row: 4 ->  Col:2->11	 Col:3->12\t\n";
-    
-    
+
+
     EXPECT_EQ( printed.str(), couted.str() );
 }
 
-- 
GitLab


From 91d38ffa77b55f590c79578cbd0f02cb4c1dcad4 Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Wed, 11 Dec 2019 18:12:36 +0100
Subject: [PATCH 029/179] Commenting out the code which cannot be compiled.

---
 src/TNL/Matrices/SparseMatrix.hpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp
index 3605daaef..0d9ee0b06 100644
--- a/src/TNL/Matrices/SparseMatrix.hpp
+++ b/src/TNL/Matrices/SparseMatrix.hpp
@@ -594,13 +594,13 @@ forRows( IndexType first, IndexType last, Function& function ) const
    const auto columns_view = this->columnIndexes.getConstView();
    const auto values_view = this->values.getConstView();
    const IndexType paddingIndex_ = this->getPaddingIndex();
-   auto fetch_ = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx ) mutable -> decltype( fetch( IndexType(), IndexType(), RealType() ) ) {
+   /*auto fetch_ = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx ) mutable -> decltype( fetch( IndexType(), IndexType(), RealType() ) ) {
       IndexType columnIdx = columns_view[ globalIdx ];
       if( columnIdx != paddingIndex_ )
          return fetch( rowIdx, columnIdx, values_view[ globalIdx ] );
       return zero;
    };
-   this->segments.segmentsReduction( first, last, fetch_, reduce, keep, zero );
+   this->segments.segmentsReduction( first, last, fetch_, reduce, keep, zero );*/
 
 }
 
@@ -704,7 +704,7 @@ operator=( const SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2
    using RHSMatrixType = SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2, IndexAllocator2 >;
    if( std::is_same< Device, Device2 >::value )
    {
-      RowsCapacitiesType rowLengths;
+      /*RowsCapacitiesType rowLengths;
       matrix.getCompressedRowLengths( rowLengths );
       this->setCompressedRowLengths( rowLengths );
       // TODO: Replace this with SparseMatrixView
@@ -724,7 +724,7 @@ operator=( const SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2
             this_values_view[ thisGlobalIdx ] = value;
          }
       };
-      matrix.forAllRows( f );
+      matrix.forAllRows( f );*/
    }
 }
 
-- 
GitLab


From 170d652a6fdd247efecacbac22bd6230f488af1b Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Wed, 11 Dec 2019 18:24:21 +0100
Subject: [PATCH 030/179] Adding CSRView.

---
 src/TNL/Containers/Segments/CSRView.h   | 105 +++++++++++
 src/TNL/Containers/Segments/CSRView.hpp | 221 ++++++++++++++++++++++++
 2 files changed, 326 insertions(+)
 create mode 100644 src/TNL/Containers/Segments/CSRView.h
 create mode 100644 src/TNL/Containers/Segments/CSRView.hpp

diff --git a/src/TNL/Containers/Segments/CSRView.h b/src/TNL/Containers/Segments/CSRView.h
new file mode 100644
index 000000000..5eeb7ecb3
--- /dev/null
+++ b/src/TNL/Containers/Segments/CSRView.h
@@ -0,0 +1,105 @@
+/***************************************************************************
+                          CSRView.h -  description
+                             -------------------
+    begin                : Dec 11, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Containers/Vector.h>
+
+namespace TNL {
+   namespace Containers {
+      namespace Segments {
+
+template< typename Device,
+          typename Index >
+class CSRView
+{
+   public:
+
+      using DeviceType = Device;
+      using IndexType = Index;
+      using OffsetsHolderView = typedef Containers::Vector< IndexType, DeviceType, IndexType >::ViewType;
+
+      __cuda_callable__
+      CSRView();
+
+      __cuda_callable__
+      CSRView( const OffsetsHolderView& offsets );
+
+      __cuda_callable__
+      CSRView( const CSRView& csr_view );
+
+      __cuda_callable__
+      CSRView( const CSRView&& csr_view );
+
+      /**
+       * \brief Number segments.
+       */
+      __cuda_callable__
+      IndexType getSegmentsCount() const;
+
+      /***
+       * \brief Returns size of the segment number \r segmentIdx
+       */
+      __cuda_callable__
+      IndexType getSegmentSize( const IndexType segmentIdx ) const;
+
+      /***
+       * \brief Returns number of elements managed by all segments.
+       */
+      __cuda_callable__
+      IndexType getSize() const;
+
+      /***
+       * \brief Returns number of elements that needs to be allocated.
+       */
+      __cuda_callable__
+      IndexType getStorageSize() const;
+
+      __cuda_callable__
+      IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const;
+
+      __cuda_callable__
+      void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const;
+
+      /***
+       * \brief Go over all segments and for each segment element call
+       * function 'f' with arguments 'args'. The return type of 'f' is bool.
+       * When its true, the for-loop continues. Once 'f' returns false, the for-loop
+       * is terminated.
+       */
+      template< typename Function, typename... Args >
+      void forSegments( IndexType first, IndexType last, Function& f, Args... args ) const;
+
+      template< typename Function, typename... Args >
+      void forAll( Function& f, Args... args ) const;
+
+
+      /***
+       * \brief Go over all segments and perform a reduction in each of them.
+       */
+      template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
+      void segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const;
+
+      template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
+      void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const;
+
+      void save( File& file ) const;
+
+      void load( File& file );
+
+   protected:
+
+      OffsetsHolderView offsets;
+};
+      } // namespace Segements
+   }  // namespace Conatiners
+} // namespace TNL
+
+#include <TNL/Containers/Segments/CSRView.hpp>
diff --git a/src/TNL/Containers/Segments/CSRView.hpp b/src/TNL/Containers/Segments/CSRView.hpp
new file mode 100644
index 000000000..30ed24071
--- /dev/null
+++ b/src/TNL/Containers/Segments/CSRView.hpp
@@ -0,0 +1,221 @@
+/***************************************************************************
+                          CSRView.hpp -  description
+                             -------------------
+    begin                : Dec 11, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Containers/Vector.h>
+#include <TNL/Algorithms/ParallelFor.h>
+#include <TNL/Containers/Segments/CSRView.h>
+
+namespace TNL {
+   namespace Containers {
+      namespace Segments {
+
+
+template< typename Device,
+          typename Index >
+__cuda_callable__
+CSRView< Device, Index >::
+CSRView()
+{
+}
+
+template< typename Device,
+          typename Index >
+__cuda_callable__
+CSRView< Device, Index >::
+CSRView( const OffsetsHolderView& offsets_view )
+   : offsets( offsets_view )
+{
+}
+
+template< typename Device,
+          typename Index >
+__cuda_callable__
+CSRView< Device, Index >::
+CSRView( const CSRView& csr_view )
+   : offsets( csr_view.offsest )
+{
+
+}
+
+template< typename Device,
+          typename Index >
+__cuda_callable__
+CSRView< Device, Index >::
+CSRView( const CSRView&& csr_view )
+   : offsets( std::move( csr_view.offsest ) )
+{
+
+}
+
+template< typename Device,
+          typename Index >
+__cuda_callable__
+Index
+CSRView< Device, Index >::
+getSegmentsCount() const
+{
+   return this->offsets.getSize() - 1;
+}
+
+template< typename Device,
+          typename Index >
+__cuda_callable__
+Index
+CSRView< Device, Index >::
+getSegmentSize( const IndexType segmentIdx ) const
+{
+   if( ! std::is_same< DeviceType, Devices::Host >::value )
+   {
+#ifdef __CUDA_ARCH__
+      return offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ];
+#else
+      return offsets.getElement( segmentIdx + 1 ) - offsets.getElement( segmentIdx );
+#endif
+   }
+   return offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ];
+}
+
+template< typename Device,
+          typename Index >
+__cuda_callable__
+Index
+CSRView< Device, Index >::
+getSize() const
+{
+   return this->getStorageSize();
+}
+
+template< typename Device,
+          typename Index >
+__cuda_callable__
+Index
+CSRView< Device, Index >::
+getStorageSize() const
+{
+   if( ! std::is_same< DeviceType, Devices::Host >::value )
+   {
+#ifdef __CUDA_ARCH__
+      return offsets[ this->getSegmentsCount() ];
+#else
+      return offsets.getElement( this->getSegmentsCount() );
+#endif
+   }
+   return offsets[ this->getSegmentsCount() ];
+}
+
+template< typename Device,
+          typename Index >
+__cuda_callable__
+Index
+CSRView< Device, Index >::
+getGlobalIndex( const Index segmentIdx, const Index localIdx ) const
+{
+   if( ! std::is_same< DeviceType, Devices::Host >::value )
+   {
+#ifdef __CUDA_ARCH__
+      return offsets[ segmentIdx ] + localIdx;
+#else
+      return offsets.getElement( segmentIdx ) + localIdx;
+#endif
+   }
+   return offsets[ segmentIdx ] + localIdx;
+}
+
+template< typename Device,
+          typename Index >
+__cuda_callable__
+void
+CSRView< Device, Index >::
+getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const
+{
+}
+
+template< typename Device,
+          typename Index >
+   template< typename Function, typename... Args >
+void
+CSRView< Device, Index >::
+forSegments( IndexType first, IndexType last, Function& f, Args... args ) const
+{
+   const auto offsetsView = this->offsets.getConstView();
+   auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable {
+      const IndexType begin = offsetsView[ segmentIdx ];
+      const IndexType end = offsetsView[ segmentIdx + 1 ];
+      IndexType localIdx( 0 );
+      for( IndexType globalIdx = begin; globalIdx < end; globalIdx++  )
+         if( ! f( segmentIdx, localIdx++, globalIdx, args... ) )
+            break;
+   };
+   Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
+}
+
+template< typename Device,
+          typename Index >
+   template< typename Function, typename... Args >
+void
+CSRView< Device, Index >::
+forAll( Function& f, Args... args ) const
+{
+   this->forSegments( 0, this->getSize(), f, args... );
+}
+
+template< typename Device,
+          typename Index >
+   template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
+void
+CSRView< Device, Index >::
+segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
+{
+   using RealType = decltype( fetch( IndexType(), IndexType() ) );
+   const auto offsetsView = this->offsets.getConstView();
+   auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable {
+      const IndexType begin = offsetsView[ i ];
+      const IndexType end = offsetsView[ i + 1 ];
+      RealType aux( zero );
+      for( IndexType j = begin; j < end; j++  )
+         reduction( aux, fetch( i, j, args... ) );
+      keeper( i, aux );
+   };
+   Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
+}
+
+template< typename Device,
+          typename Index >
+   template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
+void
+CSRView< Device, Index >::
+allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
+{
+   this->segmentsReduction( 0, this->getSegmentsCount(), fetch, reduction, keeper, zero, args... );
+}
+
+template< typename Device,
+          typename Index >
+void
+CSRView< Device, Index >::
+save( File& file ) const
+{
+   file << this->offsets;
+}
+
+template< typename Device,
+          typename Index >
+void
+CSRView< Device, Index >::
+load( File& file )
+{
+   file >> this->offsets;
+}
+
+      } // namespace Segments
+   }  // namespace Conatiners
+} // namespace TNL
-- 
GitLab


From 46e99b135c8ebc3bb23765261a6ba7f3ab2287c8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Thu, 12 Dec 2019 21:44:39 +0100
Subject: [PATCH 031/179] Adding segments views.

---
 src/Benchmarks/SpMV/spmv.h                    |   8 +-
 src/TNL/Containers/Segments/CSR.h             |  14 +-
 src/TNL/Containers/Segments/CSR.hpp           | 111 ++++--
 src/TNL/Containers/Segments/CSRView.h         |  18 +-
 src/TNL/Containers/Segments/CSRView.hpp       |  33 +-
 src/TNL/Containers/Segments/Ellpack.h         |   9 +
 src/TNL/Containers/Segments/Ellpack.hpp       |  80 +++-
 src/TNL/Containers/Segments/EllpackView.h     | 111 ++++++
 src/TNL/Containers/Segments/EllpackView.hpp   | 293 +++++++++++++++
 src/TNL/Containers/Segments/SlicedEllpack.h   |  10 +-
 src/TNL/Containers/Segments/SlicedEllpack.hpp |  77 +++-
 .../Containers/Segments/SlicedEllpackView.h   | 116 ++++++
 .../Containers/Segments/SlicedEllpackView.hpp | 342 ++++++++++++++++++
 src/TNL/Containers/Segments/details/CSR.h     |  89 +++++
 src/TNL/Containers/Segments/details/Ellpack.h | 107 ++++++
 .../Segments/details/SlicedEllpack.h          | 106 ++++++
 src/TNL/Matrices/SparseMatrix.h               |  12 +-
 src/TNL/Matrices/SparseMatrix.hpp             |  90 ++---
 .../Containers/Segments/SegmentsTest.hpp      |  25 ++
 .../SparseMatrixTest_Ellpack_segments.h       |   8 +-
 .../SparseMatrixTest_SlicedEllpack_segments.h |   8 +-
 21 files changed, 1521 insertions(+), 146 deletions(-)
 create mode 100644 src/TNL/Containers/Segments/EllpackView.h
 create mode 100644 src/TNL/Containers/Segments/EllpackView.hpp
 create mode 100644 src/TNL/Containers/Segments/SlicedEllpackView.h
 create mode 100644 src/TNL/Containers/Segments/SlicedEllpackView.hpp
 create mode 100644 src/TNL/Containers/Segments/details/CSR.h
 create mode 100644 src/TNL/Containers/Segments/details/Ellpack.h
 create mode 100644 src/TNL/Containers/Segments/details/SlicedEllpack.h

diff --git a/src/Benchmarks/SpMV/spmv.h b/src/Benchmarks/SpMV/spmv.h
index a6acb52fd..66f4fb236 100644
--- a/src/Benchmarks/SpMV/spmv.h
+++ b/src/Benchmarks/SpMV/spmv.h
@@ -45,14 +45,14 @@ using SlicedEllpackAlias = Matrices::SlicedEllpack< Real, Device, Index >;
 template< typename Real, typename Device, typename Index >
 using SparseMatrix_CSR = Matrices::SparseMatrix< Real, Containers::Segments::CSR, Device, Index >;
 
-template< typename Device, typename Index >
-using EllpackSegments = Containers::Segments::Ellpack< Device, Index >;
+template< typename Device, typename Index, typename IndexAllocator >
+using EllpackSegments = Containers::Segments::Ellpack< Device, Index, IndexAllocator >;
 
 template< typename Real, typename Device, typename Index >
 using SparseMatrix_Ellpack = Matrices::SparseMatrix< Real, EllpackSegments, Device, Index >;
 
-template< typename Device, typename Index >
-using SlicedEllpackSegments = Containers::Segments::SlicedEllpack< Device, Index >;
+template< typename Device, typename Index, typename IndexAllocator >
+using SlicedEllpackSegments = Containers::Segments::SlicedEllpack< Device, Index, IndexAllocator >;
 
 template< typename Real, typename Device, typename Index >
 using SparseMatrix_SlicedEllpack = Matrices::SparseMatrix< Real, SlicedEllpackSegments, Device, Index >;
diff --git a/src/TNL/Containers/Segments/CSR.h b/src/TNL/Containers/Segments/CSR.h
index ecd1de983..b83e43f1d 100644
--- a/src/TNL/Containers/Segments/CSR.h
+++ b/src/TNL/Containers/Segments/CSR.h
@@ -10,22 +10,28 @@
 
 #pragma once
 
+#include <type_traits>
+
 #include <TNL/Containers/Vector.h>
+#include <TNL/Containers/Segments/CSRView.h>
 
 namespace TNL {
    namespace Containers {
       namespace Segments {
 
 template< typename Device,
-          typename Index >
+          typename Index,
+          typename IndexAllocator = typename Allocators::Default< Device >::template Allocator< Index > >
 class CSR
 {
    public:
 
       using DeviceType = Device;
       using IndexType = Index;
-      using OffsetsHolder = Containers::Vector< IndexType, DeviceType, IndexType >;
+      using OffsetsHolder = Containers::Vector< IndexType, DeviceType, typename std::remove_const< IndexType >::type, IndexAllocator >;
       using SegmentsSizes = OffsetsHolder;
+      using ViewType = CSRView< Device, Index >;
+      using ConstViewType = CSRView< Device, std::add_const_t< Index > >;
 
       CSR();
 
@@ -41,6 +47,10 @@ class CSR
       template< typename SizesHolder = OffsetsHolder >
       void setSegmentsSizes( const SizesHolder& sizes );
 
+      ViewType getView();
+
+      ConstViewType getConstView() const;
+
       /**
        * \brief Number segments.
        */
diff --git a/src/TNL/Containers/Segments/CSR.hpp b/src/TNL/Containers/Segments/CSR.hpp
index ccb483125..a8f12e7dc 100644
--- a/src/TNL/Containers/Segments/CSR.hpp
+++ b/src/TNL/Containers/Segments/CSR.hpp
@@ -13,6 +13,7 @@
 #include <TNL/Containers/Vector.h>
 #include <TNL/Algorithms/ParallelFor.h>
 #include <TNL/Containers/Segments/CSR.h>
+#include <TNL/Containers/Segments/details/CSR.h>
 
 namespace TNL {
    namespace Containers {
@@ -20,64 +21,92 @@ namespace TNL {
 
 
 template< typename Device,
-          typename Index >
-CSR< Device, Index >::
+          typename Index,
+          typename IndexAllocator >
+CSR< Device, Index, IndexAllocator >::
 CSR()
 {
 }
 
 template< typename Device,
-          typename Index >
-CSR< Device, Index >::
+          typename Index,
+          typename IndexAllocator >
+CSR< Device, Index, IndexAllocator >::
 CSR( const SegmentsSizes& segmentsSizes )
 {
    this->setSegmentsSizes( segmentsSizes );
 }
 
 template< typename Device,
-          typename Index >
-CSR< Device, Index >::
+          typename Index,
+          typename IndexAllocator >
+CSR< Device, Index, IndexAllocator >::
 CSR( const CSR& csr ) : offsets( csr.offsets )
 {
 }
 
 template< typename Device,
-          typename Index >
-CSR< Device, Index >::
+          typename Index,
+          typename IndexAllocator >
+CSR< Device, Index, IndexAllocator >::
 CSR( const CSR&& csr ) : offsets( std::move( csr.offsets ) )
 {
 
 }
 
 template< typename Device,
-          typename Index >
+          typename Index,
+          typename IndexAllocator >
    template< typename SizesHolder >
 void
-CSR< Device, Index >::
+CSR< Device, Index, IndexAllocator >::
 setSegmentsSizes( const SizesHolder& sizes )
 {
-   this->offsets.setSize( sizes.getSize() + 1 );
+   details::CSR< Device, Index >::setSegmentsSizes( sizes, this->offsets );
+   /*this->offsets.setSize( sizes.getSize() + 1 );
    auto view = this->offsets.getView( 0, sizes.getSize() );
    view = sizes;
    this->offsets.setElement( sizes.getSize(), 0 );
-   this->offsets.template scan< Algorithms::ScanType::Exclusive >();
+   this->offsets.template scan< Algorithms::ScanType::Exclusive >();*/
 }
 
 template< typename Device,
-          typename Index >
+          typename Index,
+          typename IndexAllocator >
+typename CSR< Device, Index, IndexAllocator >::ViewType
+CSR< Device, Index, IndexAllocator >::
+getView()
+{
+   return ViewType( this->offsets.getView() );
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator >
+typename CSR< Device, Index, IndexAllocator >::ConstViewType
+CSR< Device, Index, IndexAllocator >::
+getConstView() const
+{
+   return ConstViewType( this->offsets.getConstView() );
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator >
 __cuda_callable__
 Index
-CSR< Device, Index >::
+CSR< Device, Index, IndexAllocator >::
 getSegmentsCount() const
 {
    return this->offsets.getSize() - 1;
 }
 
 template< typename Device,
-          typename Index >
+          typename Index,
+          typename IndexAllocator >
 __cuda_callable__
 Index
-CSR< Device, Index >::
+CSR< Device, Index, IndexAllocator >::
 getSegmentSize( const IndexType segmentIdx ) const
 {
    if( ! std::is_same< DeviceType, Devices::Host >::value )
@@ -92,20 +121,22 @@ getSegmentSize( const IndexType segmentIdx ) const
 }
 
 template< typename Device,
-          typename Index >
+          typename Index,
+          typename IndexAllocator >
 __cuda_callable__
 Index
-CSR< Device, Index >::
+CSR< Device, Index, IndexAllocator >::
 getSize() const
 {
    return this->getStorageSize();
 }
 
 template< typename Device,
-          typename Index >
+          typename Index,
+          typename IndexAllocator >
 __cuda_callable__
 Index
-CSR< Device, Index >::
+CSR< Device, Index, IndexAllocator >::
 getStorageSize() const
 {
    if( ! std::is_same< DeviceType, Devices::Host >::value )
@@ -120,10 +151,11 @@ getStorageSize() const
 }
 
 template< typename Device,
-          typename Index >
+          typename Index,
+          typename IndexAllocator >
 __cuda_callable__
 Index
-CSR< Device, Index >::
+CSR< Device, Index, IndexAllocator >::
 getGlobalIndex( const Index segmentIdx, const Index localIdx ) const
 {
    if( ! std::is_same< DeviceType, Devices::Host >::value )
@@ -138,19 +170,21 @@ getGlobalIndex( const Index segmentIdx, const Index localIdx ) const
 }
 
 template< typename Device,
-          typename Index >
+          typename Index,
+          typename IndexAllocator >
 __cuda_callable__
 void
-CSR< Device, Index >::
+CSR< Device, Index, IndexAllocator >::
 getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const
 {
 }
 
 template< typename Device,
-          typename Index >
+          typename Index,
+          typename IndexAllocator >
    template< typename Function, typename... Args >
 void
-CSR< Device, Index >::
+CSR< Device, Index, IndexAllocator >::
 forSegments( IndexType first, IndexType last, Function& f, Args... args ) const
 {
    const auto offsetsView = this->offsets.getConstView();
@@ -166,20 +200,22 @@ forSegments( IndexType first, IndexType last, Function& f, Args... args ) const
 }
 
 template< typename Device,
-          typename Index >
+          typename Index,
+          typename IndexAllocator>
    template< typename Function, typename... Args >
 void
-CSR< Device, Index >::
+CSR< Device, Index, IndexAllocator >::
 forAll( Function& f, Args... args ) const
 {
    this->forSegments( 0, this->getSize(), f, args... );
 }
 
 template< typename Device,
-          typename Index >
+          typename Index,
+          typename IndexAllocator >
    template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
 void
-CSR< Device, Index >::
+CSR< Device, Index, IndexAllocator >::
 segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
 {
    using RealType = decltype( fetch( IndexType(), IndexType() ) );
@@ -196,28 +232,31 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red
 }
 
 template< typename Device,
-          typename Index >
+          typename Index,
+          typename IndexAllocator >
    template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
 void
-CSR< Device, Index >::
+CSR< Device, Index, IndexAllocator >::
 allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
 {
    this->segmentsReduction( 0, this->getSegmentsCount(), fetch, reduction, keeper, zero, args... );
 }
 
 template< typename Device,
-          typename Index >
+          typename Index,
+          typename IndexAllocator >
 void
-CSR< Device, Index >::
+CSR< Device, Index, IndexAllocator >::
 save( File& file ) const
 {
    file << this->offsets;
 }
 
 template< typename Device,
-          typename Index >
+          typename Index,
+          typename IndexAllocator >
 void
-CSR< Device, Index >::
+CSR< Device, Index, IndexAllocator >::
 load( File& file )
 {
    file >> this->offsets;
diff --git a/src/TNL/Containers/Segments/CSRView.h b/src/TNL/Containers/Segments/CSRView.h
index 5eeb7ecb3..2f8957970 100644
--- a/src/TNL/Containers/Segments/CSRView.h
+++ b/src/TNL/Containers/Segments/CSRView.h
@@ -10,6 +10,8 @@
 
 #pragma once
 
+#include <type_traits>
+
 #include <TNL/Containers/Vector.h>
 
 namespace TNL {
@@ -24,13 +26,19 @@ class CSRView
 
       using DeviceType = Device;
       using IndexType = Index;
-      using OffsetsHolderView = typedef Containers::Vector< IndexType, DeviceType, IndexType >::ViewType;
+      using OffsetsView = typename Containers::VectorView< IndexType, DeviceType, IndexType >;
+      using ConstOffsetsView = typename Containers::Vector< IndexType, DeviceType, IndexType >::ConstViewType;
+      using ViewType = CSRView;
+      using ConstViewType = CSRView< Device, std::add_const_t< Index > >;
 
       __cuda_callable__
       CSRView();
 
       __cuda_callable__
-      CSRView( const OffsetsHolderView& offsets );
+      CSRView( const OffsetsView&& offsets );
+
+      __cuda_callable__
+      CSRView( const ConstOffsetsView&& offsets );
 
       __cuda_callable__
       CSRView( const CSRView& csr_view );
@@ -38,6 +46,10 @@ class CSRView
       __cuda_callable__
       CSRView( const CSRView&& csr_view );
 
+      ViewType getView();
+
+      ConstViewType getConstView() const;
+
       /**
        * \brief Number segments.
        */
@@ -96,7 +108,7 @@ class CSRView
 
    protected:
 
-      OffsetsHolderView offsets;
+      OffsetsView offsets;
 };
       } // namespace Segements
    }  // namespace Conatiners
diff --git a/src/TNL/Containers/Segments/CSRView.hpp b/src/TNL/Containers/Segments/CSRView.hpp
index 30ed24071..f50a74985 100644
--- a/src/TNL/Containers/Segments/CSRView.hpp
+++ b/src/TNL/Containers/Segments/CSRView.hpp
@@ -31,7 +31,7 @@ template< typename Device,
           typename Index >
 __cuda_callable__
 CSRView< Device, Index >::
-CSRView( const OffsetsHolderView& offsets_view )
+CSRView( const OffsetsView&& offsets_view )
    : offsets( offsets_view )
 {
 }
@@ -40,10 +40,18 @@ template< typename Device,
           typename Index >
 __cuda_callable__
 CSRView< Device, Index >::
-CSRView( const CSRView& csr_view )
-   : offsets( csr_view.offsest )
+CSRView( const ConstOffsetsView&& offsets_view )
+   : offsets( offsets_view )
 {
+}
 
+template< typename Device,
+          typename Index >
+__cuda_callable__
+CSRView< Device, Index >::
+CSRView( const CSRView& csr_view )
+   : offsets( csr_view.offsets )
+{
 }
 
 template< typename Device,
@@ -51,9 +59,26 @@ template< typename Device,
 __cuda_callable__
 CSRView< Device, Index >::
 CSRView( const CSRView&& csr_view )
-   : offsets( std::move( csr_view.offsest ) )
+   : offsets( std::move( csr_view.offsets ) )
+{
+}
+
+template< typename Device,
+          typename Index >
+typename CSRView< Device, Index >::ViewType
+CSRView< Device, Index >::
+getView()
 {
+   return ViewType( this->offsets );
+}
 
+template< typename Device,
+          typename Index >
+typename CSRView< Device, Index >::ConstViewType
+CSRView< Device, Index >::
+getConstView() const
+{
+   return ConstViewType( this->offsets.getConstView() );
 }
 
 template< typename Device,
diff --git a/src/TNL/Containers/Segments/Ellpack.h b/src/TNL/Containers/Segments/Ellpack.h
index b08ad0f04..9c81a8428 100644
--- a/src/TNL/Containers/Segments/Ellpack.h
+++ b/src/TNL/Containers/Segments/Ellpack.h
@@ -11,6 +11,7 @@
 #pragma once
 
 #include <TNL/Containers/Vector.h>
+#include <TNL/Containers/Segments/EllpackView.h>
 
 namespace TNL {
    namespace Containers {
@@ -18,6 +19,7 @@ namespace TNL {
 
 template< typename Device,
           typename Index,
+          typename IndexAllocator = typename Allocators::Default< Device >::template Allocator< Index >,
           bool RowMajorOrder = std::is_same< Device, Devices::Host >::value,
           int Alignment = 32 >
 class Ellpack
@@ -30,6 +32,9 @@ class Ellpack
       static constexpr bool getRowMajorOrder() { return RowMajorOrder; }
       using OffsetsHolder = Containers::Vector< IndexType, DeviceType, IndexType >;
       using SegmentsSizes = OffsetsHolder;
+      using ViewType = EllpackView< Device, Index, RowMajorOrder, Alignment >;
+      //using ConstViewType = EllpackView< Device, std::add_const_t< Index >, RowMajorOrder, Alignment >;
+
 
       Ellpack();
 
@@ -41,6 +46,10 @@ class Ellpack
 
       Ellpack( const Ellpack&& segments );
 
+      ViewType getView();
+
+      //ConstViewType getConstView() const;
+
       /**
        * \brief Set sizes of particular segments.
        */
diff --git a/src/TNL/Containers/Segments/Ellpack.hpp b/src/TNL/Containers/Segments/Ellpack.hpp
index 337009e99..482c87d4f 100644
--- a/src/TNL/Containers/Segments/Ellpack.hpp
+++ b/src/TNL/Containers/Segments/Ellpack.hpp
@@ -21,9 +21,10 @@ namespace TNL {
 
 template< typename Device,
           typename Index,
+          typename IndexAllocator,
           bool RowMajorOrder,
           int Alignment >
-Ellpack< Device, Index, RowMajorOrder, Alignment >::
+Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >::
 Ellpack()
    : segmentSize( 0 ), size( 0 ), alignedSize( 0 )
 {
@@ -31,9 +32,10 @@ Ellpack()
 
 template< typename Device,
           typename Index,
+          typename IndexAllocator,
           bool RowMajorOrder,
           int Alignment >
-Ellpack< Device, Index, RowMajorOrder, Alignment >::
+Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >::
 Ellpack( const SegmentsSizes& segmentsSizes )
    : segmentSize( 0 ), size( 0 ), alignedSize( 0 )
 {
@@ -42,9 +44,10 @@ Ellpack( const SegmentsSizes& segmentsSizes )
 
 template< typename Device,
           typename Index,
+          typename IndexAllocator,
           bool RowMajorOrder,
           int Alignment >
-Ellpack< Device, Index, RowMajorOrder, Alignment >::
+Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >::
 Ellpack( const IndexType segmentsCount, const IndexType segmentSize )
    : segmentSize( 0 ), size( 0 ), alignedSize( 0 )
 {
@@ -53,9 +56,10 @@ Ellpack( const IndexType segmentsCount, const IndexType segmentSize )
 
 template< typename Device,
           typename Index,
+          typename IndexAllocator,
           bool RowMajorOrder,
           int Alignment >
-Ellpack< Device, Index, RowMajorOrder, Alignment >::
+Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >::
 Ellpack( const Ellpack& ellpack )
    : segmentSize( ellpack.segmentSize ), size( ellpack.size ), alignedSize( ellpack.alignedSize )
 {
@@ -63,9 +67,10 @@ Ellpack( const Ellpack& ellpack )
 
 template< typename Device,
           typename Index,
+          typename IndexAllocator,
           bool RowMajorOrder,
           int Alignment >
-Ellpack< Device, Index, RowMajorOrder, Alignment >::
+Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >::
 Ellpack( const Ellpack&& ellpack )
    : segmentSize( ellpack.segmentSize ), size( ellpack.size ), alignedSize( ellpack.alignedSize )
 {
@@ -73,11 +78,35 @@ Ellpack( const Ellpack&& ellpack )
 
 template< typename Device,
           typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder,
+          int Alignment >
+typename Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >::ViewType
+Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >::
+getView()
+{
+   return ViewType( segmentSize, size, alignedSize );
+}
+
+/*template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int Alignment >
+typename Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >::ConstViewType
+Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >::
+getConstView() const
+{
+   return ConstViewType( segmentSize, size, alignedSize );
+}*/
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
           bool RowMajorOrder,
           int Alignment >
    template< typename SizesHolder >
 void
-Ellpack< Device, Index, RowMajorOrder, Alignment >::
+Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >::
 setSegmentsSizes( const SizesHolder& sizes )
 {
    this->segmentSize = max( sizes );
@@ -90,10 +119,11 @@ setSegmentsSizes( const SizesHolder& sizes )
 
 template< typename Device,
           typename Index,
+          typename IndexAllocator,
           bool RowMajorOrder,
           int Alignment >
 void
-Ellpack< Device, Index, RowMajorOrder, Alignment >::
+Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >::
 setSegmentsSizes( const IndexType segmentsCount, const IndexType segmentSize )
 {
    this->segmentSize = segmentSize;
@@ -107,11 +137,12 @@ setSegmentsSizes( const IndexType segmentsCount, const IndexType segmentSize )
 
 template< typename Device,
           typename Index,
+          typename IndexAllocator,
           bool RowMajorOrder,
           int Alignment >
 __cuda_callable__
 Index
-Ellpack< Device, Index, RowMajorOrder, Alignment >::
+Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >::
 getSegmentsCount() const
 {
    return this->size;
@@ -119,11 +150,12 @@ getSegmentsCount() const
 
 template< typename Device,
           typename Index,
+          typename IndexAllocator,
           bool RowMajorOrder,
           int Alignment >
 __cuda_callable__
 Index
-Ellpack< Device, Index, RowMajorOrder, Alignment >::
+Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >::
 getSegmentSize( const IndexType segmentIdx ) const
 {
    return this->segmentSize;
@@ -131,11 +163,12 @@ getSegmentSize( const IndexType segmentIdx ) const
 
 template< typename Device,
           typename Index,
+          typename IndexAllocator,
           bool RowMajorOrder,
           int Alignment >
 __cuda_callable__
 Index
-Ellpack< Device, Index, RowMajorOrder, Alignment >::
+Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >::
 getSize() const
 {
    return this->size * this->segmentSize;
@@ -144,11 +177,12 @@ getSize() const
 
 template< typename Device,
           typename Index,
+          typename IndexAllocator,
           bool RowMajorOrder,
           int Alignment >
 __cuda_callable__
 Index
-Ellpack< Device, Index, RowMajorOrder, Alignment >::
+Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >::
 getStorageSize() const
 {
    return this->alignedSize * this->segmentSize;
@@ -156,11 +190,12 @@ getStorageSize() const
 
 template< typename Device,
           typename Index,
+          typename IndexAllocator,
           bool RowMajorOrder,
           int Alignment >
 __cuda_callable__
 Index
-Ellpack< Device, Index, RowMajorOrder, Alignment >::
+Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >::
 getGlobalIndex( const Index segmentIdx, const Index localIdx ) const
 {
    if( RowMajorOrder )
@@ -171,22 +206,24 @@ getGlobalIndex( const Index segmentIdx, const Index localIdx ) const
 
 template< typename Device,
           typename Index,
+          typename IndexAllocator,
           bool RowMajorOrder,
           int Alignment >
 __cuda_callable__
 void
-Ellpack< Device, Index, RowMajorOrder, Alignment >::
+Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >::
 getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const
 {
 }
 
 template< typename Device,
           typename Index,
+          typename IndexAllocator,
           bool RowMajorOrder,
           int Alignment >
    template< typename Function, typename... Args >
 void
-Ellpack< Device, Index, RowMajorOrder, Alignment >::
+Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >::
 forSegments( IndexType first, IndexType last, Function& f, Args... args ) const
 {
    if( RowMajorOrder )
@@ -220,11 +257,12 @@ forSegments( IndexType first, IndexType last, Function& f, Args... args ) const
 
 template< typename Device,
           typename Index,
+          typename IndexAllocator,
           bool RowMajorOrder,
           int Alignment >
    template< typename Function, typename... Args >
 void
-Ellpack< Device, Index, RowMajorOrder, Alignment >::
+Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >::
 forAll( Function& f, Args... args ) const
 {
    this->forSegments( 0, this->getSize(), f, args... );
@@ -232,11 +270,12 @@ forAll( Function& f, Args... args ) const
 
 template< typename Device,
           typename Index,
+          typename IndexAllocator,
           bool RowMajorOrder,
           int Alignment >
    template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
 void
-Ellpack< Device, Index, RowMajorOrder, Alignment >::
+Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >::
 segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
 {
    if( RowMajorOrder )
@@ -272,11 +311,12 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red
 
 template< typename Device,
           typename Index,
+          typename IndexAllocator,
           bool RowMajorOrder,
           int Alignment >
    template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
 void
-Ellpack< Device, Index, RowMajorOrder, Alignment >::
+Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >::
 allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
 {
    this->segmentsReduction( 0, this->getSegmentsCount(), fetch, reduction, keeper, zero, args... );
@@ -284,10 +324,11 @@ allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Re
 
 template< typename Device,
           typename Index,
+          typename IndexAllocator,
           bool RowMajorOrder,
           int Alignment >
 void
-Ellpack< Device, Index, RowMajorOrder, Alignment >::
+Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >::
 save( File& file ) const
 {
    file.save( &segmentSize );
@@ -297,10 +338,11 @@ save( File& file ) const
 
 template< typename Device,
           typename Index,
+          typename IndexAllocator,
           bool RowMajorOrder,
           int Alignment >
 void
-Ellpack< Device, Index, RowMajorOrder, Alignment >::
+Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >::
 load( File& file )
 {
    file.load( &segmentSize );
diff --git a/src/TNL/Containers/Segments/EllpackView.h b/src/TNL/Containers/Segments/EllpackView.h
new file mode 100644
index 000000000..adbfee629
--- /dev/null
+++ b/src/TNL/Containers/Segments/EllpackView.h
@@ -0,0 +1,111 @@
+/***************************************************************************
+                          EllpackView.h -  description
+                             -------------------
+    begin                : Dec 12, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <type_traits>
+
+#include <TNL/Containers/Vector.h>
+
+
+namespace TNL {
+   namespace Containers {
+      namespace Segments {
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder = std::is_same< Device, Devices::Host >::value,
+          int Alignment = 32 >
+class EllpackView
+{
+   public:
+
+      using DeviceType = Device;
+      using IndexType = Index;
+      static constexpr int getAlignment() { return Alignment; }
+      static constexpr bool getRowMajorOrder() { return RowMajorOrder; }
+      using OffsetsHolder = Containers::Vector< IndexType, DeviceType, IndexType >;
+      using SegmentsSizes = OffsetsHolder;
+      using ViewType = EllpackView;
+      //using ConstViewType = EllpackView< Device, std::add_const_t< Index > >;
+
+      __cuda_callable__
+      EllpackView();
+
+      __cuda_callable__
+      EllpackView( IndexType segmentSize, IndexType size, IndexType alignedSize );
+
+      __cuda_callable__
+      EllpackView( const EllpackView& ellpackView );
+
+      __cuda_callable__
+      EllpackView( const EllpackView&& ellpackView );
+
+      ViewType getView();
+
+      //ConstViewType getConstView() const;
+
+      /**
+       * \brief Number segments.
+       */
+      __cuda_callable__
+      IndexType getSegmentsCount() const;
+
+      __cuda_callable__
+      IndexType getSegmentSize( const IndexType segmentIdx ) const;
+
+      __cuda_callable__
+      IndexType getSize() const;
+
+      __cuda_callable__
+      IndexType getStorageSize() const;
+
+      __cuda_callable__
+      IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const;
+
+      __cuda_callable__
+      void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const;
+
+      /***
+       * \brief Go over all segments and for each segment element call
+       * function 'f' with arguments 'args'. The return type of 'f' is bool.
+       * When its true, the for-loop continues. Once 'f' returns false, the for-loop
+       * is terminated.
+       */
+      template< typename Function, typename... Args >
+      void forSegments( IndexType first, IndexType last, Function& f, Args... args ) const;
+
+      template< typename Function, typename... Args >
+      void forAll( Function& f, Args... args ) const;
+
+
+      /***
+       * \brief Go over all segments and perform a reduction in each of them.
+       */
+      template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
+      void segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const;
+
+      template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
+      void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const;
+
+      void save( File& file ) const;
+
+      void load( File& file );
+
+   protected:
+
+      IndexType segmentSize, size, alignedSize;
+};
+
+      } // namespace Segements
+   }  // namespace Conatiners
+} // namespace TNL
+
+#include <TNL/Containers/Segments/EllpackView.hpp>
diff --git a/src/TNL/Containers/Segments/EllpackView.hpp b/src/TNL/Containers/Segments/EllpackView.hpp
new file mode 100644
index 000000000..d124633ff
--- /dev/null
+++ b/src/TNL/Containers/Segments/EllpackView.hpp
@@ -0,0 +1,293 @@
+/***************************************************************************
+                          EllpackView.hpp -  description
+                             -------------------
+    begin                : Dec 12, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Containers/Vector.h>
+#include <TNL/Algorithms/ParallelFor.h>
+#include <TNL/Containers/Segments/EllpackView.h>
+
+namespace TNL {
+   namespace Containers {
+      namespace Segments {
+
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int Alignment >
+__cuda_callable__
+EllpackView< Device, Index, RowMajorOrder, Alignment >::
+EllpackView()
+   : segmentSize( 0 ), size( 0 ), alignedSize( 0 )
+{
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int Alignment >
+__cuda_callable__
+EllpackView< Device, Index, RowMajorOrder, Alignment >::
+EllpackView( IndexType segmentSize, IndexType size, IndexType alignedSize )
+   : segmentSize( segmentSize ), size( size ), alignedSize( alignedSize )
+{
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int Alignment >
+__cuda_callable__
+EllpackView< Device, Index, RowMajorOrder, Alignment >::
+EllpackView( const EllpackView& ellpack )
+   : segmentSize( ellpack.segmentSize ), size( ellpack.size ), alignedSize( ellpack.alignedSize )
+{
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int Alignment >
+__cuda_callable__
+EllpackView< Device, Index, RowMajorOrder, Alignment >::
+EllpackView( const EllpackView&& ellpack )
+   : segmentSize( ellpack.segmentSize ), size( ellpack.size ), alignedSize( ellpack.alignedSize )
+{
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int Alignment >
+typename EllpackView< Device, Index, RowMajorOrder, Alignment >::ViewType
+EllpackView< Device, Index, RowMajorOrder, Alignment >::
+getView()
+{
+   return ViewType( segmentSize, size, alignedSize );
+}
+
+/*template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int Alignment >
+typename EllpackView< Device, Index, RowMajorOrder, Alignment >::ConstViewType
+EllpackView< Device, Index, RowMajorOrder, Alignment >::
+getConstView() const
+{
+   return ConstViewType( segmentSize, size, alignedSize );
+}*/
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int Alignment >
+__cuda_callable__
+Index
+EllpackView< Device, Index, RowMajorOrder, Alignment >::
+getSegmentsCount() const
+{
+   return this->size;
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int Alignment >
+__cuda_callable__
+Index
+EllpackView< Device, Index, RowMajorOrder, Alignment >::
+getSegmentSize( const IndexType segmentIdx ) const
+{
+   return this->segmentSize;
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int Alignment >
+__cuda_callable__
+Index
+EllpackView< Device, Index, RowMajorOrder, Alignment >::
+getSize() const
+{
+   return this->size * this->segmentSize;
+}
+
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int Alignment >
+__cuda_callable__
+Index
+EllpackView< Device, Index, RowMajorOrder, Alignment >::
+getStorageSize() const
+{
+   return this->alignedSize * this->segmentSize;
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int Alignment >
+__cuda_callable__
+Index
+EllpackView< Device, Index, RowMajorOrder, Alignment >::
+getGlobalIndex( const Index segmentIdx, const Index localIdx ) const
+{
+   if( RowMajorOrder )
+      return segmentIdx * this->segmentSize + localIdx;
+   else
+      return segmentIdx + this->alignedSize * localIdx;
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int Alignment >
+__cuda_callable__
+void
+EllpackView< Device, Index, RowMajorOrder, Alignment >::
+getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const
+{
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int Alignment >
+   template< typename Function, typename... Args >
+void
+EllpackView< Device, Index, RowMajorOrder, Alignment >::
+forSegments( IndexType first, IndexType last, Function& f, Args... args ) const
+{
+   if( RowMajorOrder )
+   {
+      const IndexType segmentSize = this->segmentSize;
+      auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable {
+         const IndexType begin = segmentIdx * segmentSize;
+         const IndexType end = begin + segmentSize;
+         IndexType localIdx( 0 );
+         for( IndexType globalIdx = begin; globalIdx < end; globalIdx++  )
+            if( ! f( segmentIdx, localIdx++, globalIdx,  args... ) )
+               break;
+      };
+      Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
+   }
+   else
+   {
+      const IndexType storageSize = this->getStorageSize();
+      const IndexType alignedSize = this->alignedSize;
+      auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable {
+         const IndexType begin = segmentIdx;
+         const IndexType end = storageSize;
+         IndexType localIdx( 0 );
+         for( IndexType globalIdx = begin; globalIdx < end; globalIdx += alignedSize )
+            if( ! f( segmentIdx, localIdx++, globalIdx, args... ) )
+               break;
+      };
+      Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
+   }
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int Alignment >
+   template< typename Function, typename... Args >
+void
+EllpackView< Device, Index, RowMajorOrder, Alignment >::
+forAll( Function& f, Args... args ) const
+{
+   this->forSegments( 0, this->getSize(), f, args... );
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int Alignment >
+   template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
+void
+EllpackView< Device, Index, RowMajorOrder, Alignment >::
+segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
+{
+   if( RowMajorOrder )
+   {
+      using RealType = decltype( fetch( IndexType(), IndexType() ) );
+      const IndexType segmentSize = this->segmentSize;
+      auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable {
+         const IndexType begin = i * segmentSize;
+         const IndexType end = begin + segmentSize;
+         RealType aux( zero );
+         for( IndexType j = begin; j < end; j++  )
+            reduction( aux, fetch( i, j, args... ) );
+         keeper( i, aux );
+      };
+      Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
+   }
+   else
+   {
+      using RealType = decltype( fetch( IndexType(), IndexType() ) );
+      const IndexType storageSize = this->getStorageSize();
+      const IndexType alignedSize = this->alignedSize;
+      auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable {
+         const IndexType begin = i;
+         const IndexType end = storageSize;
+         RealType aux( zero );
+         for( IndexType j = begin; j < end; j += alignedSize  )
+            reduction( aux, fetch( i, j, args... ) );
+         keeper( i, aux );
+      };
+      Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
+   }
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int Alignment >
+   template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
+void
+EllpackView< Device, Index, RowMajorOrder, Alignment >::
+allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
+{
+   this->segmentsReduction( 0, this->getSegmentsCount(), fetch, reduction, keeper, zero, args... );
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int Alignment >
+void
+EllpackView< Device, Index, RowMajorOrder, Alignment >::
+save( File& file ) const
+{
+   file.save( &segmentSize );
+   file.save( &size );
+   file.save( &alignedSize );
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int Alignment >
+void
+EllpackView< Device, Index, RowMajorOrder, Alignment >::
+load( File& file )
+{
+   file.load( &segmentSize );
+   file.load( &size );
+   file.load( &alignedSize );
+}
+
+      } // namespace Segments
+   }  // namespace Conatiners
+} // namespace TNL
diff --git a/src/TNL/Containers/Segments/SlicedEllpack.h b/src/TNL/Containers/Segments/SlicedEllpack.h
index ecc2c8c7e..fc514c51f 100644
--- a/src/TNL/Containers/Segments/SlicedEllpack.h
+++ b/src/TNL/Containers/Segments/SlicedEllpack.h
@@ -11,6 +11,7 @@
 #pragma once
 
 #include <TNL/Containers/Vector.h>
+#include <TNL/Containers/Segments/SlicedEllpackView.h>
 
 namespace TNL {
    namespace Containers {
@@ -18,6 +19,7 @@ namespace TNL {
 
 template< typename Device,
           typename Index,
+          typename IndexAllocator = typename Allocators::Default< Device >::template Allocator< Index >,
           bool RowMajorOrder = std::is_same< Device, Devices::Host >::value,
           int SliceSize = 32 >
 class SlicedEllpack
@@ -26,9 +28,11 @@ class SlicedEllpack
 
       using DeviceType = Device;
       using IndexType = Index;
-      using OffsetsHolder = Containers::Vector< IndexType, DeviceType, IndexType >;
+      using OffsetsHolder = Containers::Vector< IndexType, DeviceType, typename std::remove_const< IndexType >::type, IndexAllocator >;
       static constexpr int getSliceSize() { return SliceSize; }
       static constexpr bool getRowMajorOrder() { return RowMajorOrder; }
+      using ViewType = SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >;
+      using ConstViewType = SlicedEllpackView< Device, std::add_const_t< Index >, RowMajorOrder, SliceSize >;
 
       SlicedEllpack();
 
@@ -38,6 +42,10 @@ class SlicedEllpack
 
       SlicedEllpack( const SlicedEllpack&& segments );
 
+      ViewType getView();
+
+      ConstViewType getConstView() const;
+
       /**
        * \brief Set sizes of particular segments.
        */
diff --git a/src/TNL/Containers/Segments/SlicedEllpack.hpp b/src/TNL/Containers/Segments/SlicedEllpack.hpp
index d721edb00..bdf28ff73 100644
--- a/src/TNL/Containers/Segments/SlicedEllpack.hpp
+++ b/src/TNL/Containers/Segments/SlicedEllpack.hpp
@@ -22,9 +22,10 @@ namespace TNL {
 
 template< typename Device,
           typename Index,
+          typename IndexAllocator,
           bool RowMajorOrder,
           int SliceSize >
-SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >::
+SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >::
 SlicedEllpack()
    : size( 0 ), alignedSize( 0 ), segmentsCount( 0 )
 {
@@ -32,9 +33,10 @@ SlicedEllpack()
 
 template< typename Device,
           typename Index,
+          typename IndexAllocator,
           bool RowMajorOrder,
           int SliceSize >
-SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >::
+SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >::
 SlicedEllpack( const Vector< IndexType, DeviceType, IndexType >& sizes )
    : size( 0 ), alignedSize( 0 ), segmentsCount( 0 )
 {
@@ -43,9 +45,10 @@ SlicedEllpack( const Vector< IndexType, DeviceType, IndexType >& sizes )
 
 template< typename Device,
           typename Index,
+          typename IndexAllocator,
           bool RowMajorOrder,
           int SliceSize >
-SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >::
+SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >::
 SlicedEllpack( const SlicedEllpack& slicedEllpack )
    : size( slicedEllpack.size ), alignedSize( slicedEllpack.alignedSize ),
      segmentsCount( slicedEllpack.segmentsCount ), sliceOffsets( slicedEllpack.sliceOffsets ),
@@ -55,9 +58,10 @@ SlicedEllpack( const SlicedEllpack& slicedEllpack )
 
 template< typename Device,
           typename Index,
+          typename IndexAllocator,
           bool RowMajorOrder,
           int SliceSize >
-SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >::
+SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >::
 SlicedEllpack( const SlicedEllpack&& slicedEllpack )
    : size( slicedEllpack.size ), alignedSize( slicedEllpack.alignedSize ),
      segmentsCount( slicedEllpack.segmentsCount ), sliceOffsets( slicedEllpack.sliceOffsets ),
@@ -67,11 +71,36 @@ SlicedEllpack( const SlicedEllpack&& slicedEllpack )
 
 template< typename Device,
           typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder,
+          int SliceSize >
+typename SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >::ViewType
+SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >::
+getView()
+{
+   return ViewType( size, alignedSize, segmentsCount, sliceOffsets.getView(), sliceSegmentSizes.getView() );
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder,
+          int SliceSize >
+typename SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >::ConstViewType
+SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >::
+getConstView() const
+{
+   return ConstViewType( size, alignedSize, segmentsCount, sliceOffsets.getConstView(), sliceSegmentSizes.getConstView() );
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
           bool RowMajorOrder,
           int SliceSize >
    template< typename SizesHolder >
 void
-SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >::
+SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >::
 setSegmentsSizes( const SizesHolder& sizes )
 {
    this->segmentsCount = sizes.getSize();
@@ -79,7 +108,7 @@ setSegmentsSizes( const SizesHolder& sizes )
    this->sliceOffsets.setSize( slicesCount + 1 );
    this->sliceOffsets = 0;
    this->sliceSegmentSizes.setSize( slicesCount );
-   Ellpack< DeviceType, IndexType, true > ellpack;
+   Ellpack< DeviceType, IndexType, IndexAllocator, true > ellpack;
    ellpack.setSegmentsSizes( slicesCount, SliceSize );
 
    const IndexType _size = sizes.getSize();
@@ -106,11 +135,12 @@ setSegmentsSizes( const SizesHolder& sizes )
 
 template< typename Device,
           typename Index,
+          typename IndexAllocator,
           bool RowMajorOrder,
           int SliceSize >
 __cuda_callable__
 Index
-SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >::
+SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >::
 getSegmentsCount() const
 {
    return this->segmentsCount;
@@ -118,11 +148,12 @@ getSegmentsCount() const
 
 template< typename Device,
           typename Index,
+          typename IndexAllocator,
           bool RowMajorOrder,
           int SliceSize >
 __cuda_callable__
 Index
-SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >::
+SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >::
 getSegmentSize( const IndexType segmentIdx ) const
 {
    const Index sliceIdx = segmentIdx / SliceSize;
@@ -140,11 +171,12 @@ getSegmentSize( const IndexType segmentIdx ) const
 
 template< typename Device,
           typename Index,
+          typename IndexAllocator,
           bool RowMajorOrder,
           int SliceSize >
 __cuda_callable__
 Index
-SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >::
+SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >::
 getSize() const
 {
    return this->size;
@@ -152,11 +184,12 @@ getSize() const
 
 template< typename Device,
           typename Index,
+          typename IndexAllocator,
           bool RowMajorOrder,
           int SliceSize >
 __cuda_callable__
 Index
-SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >::
+SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >::
 getStorageSize() const
 {
    return this->alignedSize;
@@ -164,11 +197,12 @@ getStorageSize() const
 
 template< typename Device,
           typename Index,
+          typename IndexAllocator,
           bool RowMajorOrder,
           int SliceSize >
 __cuda_callable__
 Index
-SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >::
+SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >::
 getGlobalIndex( const Index segmentIdx, const Index localIdx ) const
 {
    const IndexType sliceIdx = segmentIdx / SliceSize;
@@ -197,22 +231,24 @@ getGlobalIndex( const Index segmentIdx, const Index localIdx ) const
 
 template< typename Device,
           typename Index,
+          typename IndexAllocator,
           bool RowMajorOrder,
           int SliceSize >
 __cuda_callable__
 void
-SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >::
+SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >::
 getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const
 {
 }
 
 template< typename Device,
           typename Index,
+          typename IndexAllocator,
           bool RowMajorOrder,
           int SliceSize >
    template< typename Function, typename... Args >
 void
-SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >::
+SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >::
 forSegments( IndexType first, IndexType last, Function& f, Args... args ) const
 {
    const auto sliceSegmentSizes_view = this->sliceSegmentSizes.getConstView();
@@ -251,11 +287,12 @@ forSegments( IndexType first, IndexType last, Function& f, Args... args ) const
 
 template< typename Device,
           typename Index,
+          typename IndexAllocator,
           bool RowMajorOrder,
           int SliceSize >
    template< typename Function, typename... Args >
 void
-SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >::
+SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >::
 forAll( Function& f, Args... args ) const
 {
    this->forSegments( 0, this->getSegmentsCount(), f, args... );
@@ -263,11 +300,12 @@ forAll( Function& f, Args... args ) const
 
 template< typename Device,
           typename Index,
+          typename IndexAllocator,
           bool RowMajorOrder,
           int SliceSize >
    template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
 void
-SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >::
+SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >::
 segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
 {
    using RealType = decltype( fetch( IndexType(), IndexType() ) );
@@ -307,11 +345,12 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red
 
 template< typename Device,
           typename Index,
+          typename IndexAllocator,
           bool RowMajorOrder,
           int SliceSize >
    template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
 void
-SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >::
+SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >::
 allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
 {
    this->segmentsReduction( 0, this->getSegmentsCount(), fetch, reduction, keeper, zero, args... );
@@ -319,10 +358,11 @@ allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Re
 
 template< typename Device,
           typename Index,
+          typename IndexAllocator,
           bool RowMajorOrder,
           int SliceSize >
 void
-SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >::
+SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >::
 save( File& file ) const
 {
    file.save( &size );
@@ -334,10 +374,11 @@ save( File& file ) const
 
 template< typename Device,
           typename Index,
+          typename IndexAllocator,
           bool RowMajorOrder,
           int SliceSize >
 void
-SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >::
+SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >::
 load( File& file )
 {
    file.load( &size );
diff --git a/src/TNL/Containers/Segments/SlicedEllpackView.h b/src/TNL/Containers/Segments/SlicedEllpackView.h
new file mode 100644
index 000000000..275baacf5
--- /dev/null
+++ b/src/TNL/Containers/Segments/SlicedEllpackView.h
@@ -0,0 +1,116 @@
+/***************************************************************************
+                          SlicedEllpackView.h -  description
+                             -------------------
+    begin                : Dec 12, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <type_traits>
+
+#include <TNL/Containers/Vector.h>
+
+namespace TNL {
+   namespace Containers {
+      namespace Segments {
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder = std::is_same< Device, Devices::Host >::value,
+          int SliceSize = 32 >
+class SlicedEllpackView
+{
+   public:
+
+      using DeviceType = Device;
+      using IndexType = Index;
+      using OffsetsView = typename Containers::VectorView< IndexType, DeviceType, typename std::remove_const < IndexType >::type >;
+      static constexpr int getSliceSize() { return SliceSize; }
+      static constexpr bool getRowMajorOrder() { return RowMajorOrder; }
+      using ViewType = SlicedEllpackView;
+      using ConstViewType = SlicedEllpackView< Device, std::add_const_t< Index > >;
+
+      __cuda_callable__
+      SlicedEllpackView();
+
+      __cuda_callable__
+      SlicedEllpackView( IndexType size,
+                         IndexType alignedSize,
+                         IndexType segmentsCount,
+                         OffsetsView&& sliceOffsets,
+                         OffsetsView&& sliceSegmentSizes );
+
+      __cuda_callable__
+      SlicedEllpackView( const SlicedEllpackView& slicedEllpackView );
+
+      __cuda_callable__
+      SlicedEllpackView( const SlicedEllpackView&& slicedEllpackView );
+
+      ViewType getView();
+
+      ConstViewType getConstView() const;
+
+      __cuda_callable__
+      IndexType getSegmentsCount() const;
+
+      __cuda_callable__
+      IndexType getSegmentSize( const IndexType segmentIdx ) const;
+
+      /**
+       * \brief Number segments.
+       */
+      __cuda_callable__
+      IndexType getSize() const;
+
+
+      __cuda_callable__
+      IndexType getStorageSize() const;
+
+      __cuda_callable__
+      IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const;
+
+      __cuda_callable__
+      void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const;
+
+      /***
+       * \brief Go over all segments and for each segment element call
+       * function 'f' with arguments 'args'. The return type of 'f' is bool.
+       * When its true, the for-loop continues. Once 'f' returns false, the for-loop
+       * is terminated.
+       */
+      template< typename Function, typename... Args >
+      void forSegments( IndexType first, IndexType last, Function& f, Args... args ) const;
+
+      template< typename Function, typename... Args >
+      void forAll( Function& f, Args... args ) const;
+
+
+      /***
+       * \brief Go over all segments and perform a reduction in each of them.
+       */
+      template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
+      void segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const;
+
+      template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
+      void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const;
+
+      void save( File& file ) const;
+
+      void load( File& file );
+
+   protected:
+
+      IndexType size, alignedSize, segmentsCount;
+
+      OffsetsView sliceOffsets, sliceSegmentSizes;
+};
+
+      } // namespace Segements
+   }  // namespace Conatiners
+} // namespace TNL
+
+#include <TNL/Containers/Segments/SlicedEllpackView.hpp>
diff --git a/src/TNL/Containers/Segments/SlicedEllpackView.hpp b/src/TNL/Containers/Segments/SlicedEllpackView.hpp
new file mode 100644
index 000000000..f2e03bd38
--- /dev/null
+++ b/src/TNL/Containers/Segments/SlicedEllpackView.hpp
@@ -0,0 +1,342 @@
+/***************************************************************************
+                          SlicedEllpackView.hpp -  description
+                             -------------------
+    begin                : Dec 4, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Containers/Vector.h>
+#include <TNL/Algorithms/ParallelFor.h>
+#include <TNL/Containers/Segments/SlicedEllpackView.h>
+
+namespace TNL {
+   namespace Containers {
+      namespace Segments {
+
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int SliceSize >
+__cuda_callable__
+SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >::
+SlicedEllpackView()
+   : size( 0 ), alignedSize( 0 ), segmentsCount( 0 )
+{
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int SliceSize >
+__cuda_callable__
+SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >::
+SlicedEllpackView(  IndexType size,
+                    IndexType alignedSize,
+                    IndexType segmentsCount,
+                    OffsetsView&& sliceOffsets,
+                    OffsetsView&& sliceSegmentSizes )
+   : size( size ), alignedSize( alignedSize ), segmentsCount( segmentsCount ),
+     sliceOffsets( std::forward< OffsetsView >( sliceOffsets ) ), sliceSegmentSizes( std::forward< OffsetsView >( sliceSegmentSizes ) )
+{
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int SliceSize >
+__cuda_callable__
+SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >::
+SlicedEllpackView( const SlicedEllpackView& slicedEllpackView )
+   : size( slicedEllpackView.size ), alignedSize( slicedEllpackView.alignedSize ),
+     segmentsCount( slicedEllpackView.segmentsCount ), sliceOffsets( slicedEllpackView.sliceOffsets ),
+     sliceSegmentSizes( slicedEllpackView.sliceSegmentSizes )
+{
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int SliceSize >
+__cuda_callable__
+SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >::
+SlicedEllpackView( const SlicedEllpackView&& slicedEllpackView )
+   : size( slicedEllpackView.size ), alignedSize( slicedEllpackView.alignedSize ),
+     segmentsCount( slicedEllpackView.segmentsCount ), sliceOffsets( slicedEllpackView.sliceOffsets ),
+     sliceSegmentSizes( slicedEllpackView.sliceSegmentSizes )
+{
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int SliceSize >
+typename SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >::ViewType
+SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >::
+getView()
+{
+   return ViewType( size, alignedSize, segmentsCount, sliceOffsets, sliceSegmentSizes );
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int SliceSize >
+typename SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >::ConstViewType
+SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >::
+getConstView() const
+{
+   return ConstViewType( size, alignedSize, segmentsCount, sliceOffsets.getConstView(), sliceSegmentSizes.getConstView() );
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int SliceSize >
+__cuda_callable__
+Index
+SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >::
+getSegmentsCount() const
+{
+   return this->segmentsCount;
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int SliceSize >
+__cuda_callable__
+Index
+SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >::
+getSegmentSize( const IndexType segmentIdx ) const
+{
+   const Index sliceIdx = segmentIdx / SliceSize;
+   if( std::is_same< DeviceType, Devices::Host >::value )
+      return this->sliceSegmentSizes[ sliceIdx ];
+   else
+   {
+#ifdef __CUDA_ARCH__
+   return this->sliceSegmentSizes[ sliceIdx ];
+#else
+   return this->sliceSegmentSizes.getElement( sliceIdx );
+#endif
+   }
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int SliceSize >
+__cuda_callable__
+Index
+SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >::
+getSize() const
+{
+   return this->size;
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int SliceSize >
+__cuda_callable__
+Index
+SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >::
+getStorageSize() const
+{
+   return this->alignedSize;
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int SliceSize >
+__cuda_callable__
+Index
+SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >::
+getGlobalIndex( const Index segmentIdx, const Index localIdx ) const
+{
+   const IndexType sliceIdx = segmentIdx / SliceSize;
+   const IndexType segmentInSliceIdx = segmentIdx % SliceSize;
+   IndexType sliceOffset, segmentSize;
+   if( std::is_same< DeviceType, Devices::Host >::value )
+   {
+      sliceOffset = this->sliceOffsets[ sliceIdx ];
+      segmentSize = this->sliceSegmentSizes[ sliceIdx ];
+   }
+   else
+   {
+#ifdef __CUDA__ARCH__
+      sliceOffset = this->sliceOffsets[ sliceIdx ];
+      segmentSize = this->sliceSegmentSizes[ sliceIdx ];
+#else
+      sliceOffset = this->sliceOffsets.getElement( sliceIdx );
+      segmentSize = this->sliceSegmentSizes.getElement( sliceIdx );
+#endif
+   }
+   if( RowMajorOrder )
+      return sliceOffset + segmentInSliceIdx * segmentSize + localIdx;
+   else
+      return sliceOffset + segmentInSliceIdx + SliceSize * localIdx;
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int SliceSize >
+__cuda_callable__
+void
+SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >::
+getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const
+{
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int SliceSize >
+   template< typename Function, typename... Args >
+void
+SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >::
+forSegments( IndexType first, IndexType last, Function& f, Args... args ) const
+{
+   const auto sliceSegmentSizes_view = this->sliceSegmentSizes.getConstView();
+   const auto sliceOffsets_view = this->sliceOffsets.getConstView();
+   if( RowMajorOrder )
+   {
+      auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) {
+         const IndexType sliceIdx = segmentIdx / SliceSize;
+         const IndexType segmentInSliceIdx = segmentIdx % SliceSize;
+         const IndexType segmentSize = sliceSegmentSizes_view[ sliceIdx ];
+         const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx * segmentSize;
+         const IndexType end = begin + segmentSize;
+         IndexType localIdx( 0 );
+         for( IndexType globalIdx = begin; globalIdx < end; globalIdx++  )
+            if( ! f( segmentIdx, localIdx++, globalIdx, args... ) )
+               break;
+      };
+      Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
+   }
+   else
+   {
+      auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) {
+         const IndexType sliceIdx = segmentIdx / SliceSize;
+         const IndexType segmentInSliceIdx = segmentIdx % SliceSize;
+         const IndexType segmentSize = sliceSegmentSizes_view[ sliceIdx ];
+         const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx;
+         const IndexType end = sliceOffsets_view[ sliceIdx + 1 ];
+         IndexType localIdx( 0 );
+         for( IndexType globalIdx = begin; globalIdx < end; globalIdx += SliceSize )
+            if( ! f( segmentIdx, localIdx++, globalIdx, args... ) )
+               break;
+      };
+      Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
+   }
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int SliceSize >
+   template< typename Function, typename... Args >
+void
+SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >::
+forAll( Function& f, Args... args ) const
+{
+   this->forSegments( 0, this->getSegmentsCount(), f, args... );
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int SliceSize >
+   template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
+void
+SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >::
+segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
+{
+   using RealType = decltype( fetch( IndexType(), IndexType() ) );
+   const auto sliceSegmentSizes_view = this->sliceSegmentSizes.getConstView();
+   const auto sliceOffsets_view = this->sliceOffsets.getConstView();
+   if( RowMajorOrder )
+   {
+      auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable {
+         const IndexType sliceIdx = segmentIdx / SliceSize;
+         const IndexType segmentInSliceIdx = segmentIdx % SliceSize;
+         const IndexType segmentSize = sliceSegmentSizes_view[ sliceIdx ];
+         const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx * segmentSize;
+         const IndexType end = begin + segmentSize;
+         RealType aux( zero );
+         for( IndexType globalIdx = begin; globalIdx< end; globalIdx++  )
+            reduction( aux, fetch( segmentIdx, globalIdx, args... ) );
+         keeper( segmentIdx, aux );
+      };
+      Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
+   }
+   else
+   {
+      auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable {
+         const IndexType sliceIdx = segmentIdx / SliceSize;
+         const IndexType segmentInSliceIdx = segmentIdx % SliceSize;
+         const IndexType segmentSize = sliceSegmentSizes_view[ sliceIdx ];
+         const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx;
+         const IndexType end = sliceOffsets_view[ sliceIdx + 1 ];
+         RealType aux( zero );
+         for( IndexType globalIdx = begin; globalIdx < end; globalIdx += SliceSize  )
+            reduction( aux, fetch( segmentIdx, globalIdx, args... ) );
+         keeper( segmentIdx, aux );
+      };
+      Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
+   }
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int SliceSize >
+   template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
+void
+SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >::
+allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
+{
+   this->segmentsReduction( 0, this->getSegmentsCount(), fetch, reduction, keeper, zero, args... );
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int SliceSize >
+void
+SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >::
+save( File& file ) const
+{
+   file.save( &size );
+   file.save( &alignedSize );
+   file.save( &segmentsCount );
+   file << this->sliceOffsets;
+   file << this->sliceSegmentSizes;
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int SliceSize >
+void
+SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >::
+load( File& file )
+{
+   file.load( &size );
+   file.load( &alignedSize );
+   file.load( &segmentsCount );
+   file >> this->sliceOffsets;
+   file >> this->sliceSegmentSizes;
+}
+
+      } // namespace Segments
+   }  // namespace Conatiners
+} // namespace TNL
diff --git a/src/TNL/Containers/Segments/details/CSR.h b/src/TNL/Containers/Segments/details/CSR.h
new file mode 100644
index 000000000..47e768d28
--- /dev/null
+++ b/src/TNL/Containers/Segments/details/CSR.h
@@ -0,0 +1,89 @@
+/***************************************************************************
+                          CSR.h -  description
+                             -------------------
+    begin                : Dec 12, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+
+namespace TNL {
+   namespace Containers {
+      namespace Segments {
+         namespace details {
+
+template< typename Device,
+          typename Index >
+class CSR
+{
+   public:
+
+      using DeviceType = Device;
+      using IndexType = Index;
+
+      template< typename SizesHolder, typename CSROffsets >
+      static void setSegmentsSizes( const SizesHolder& sizes, CSROffsets& offsets )
+      {
+         offsets.setSize( sizes.getSize() + 1 );
+         auto view = offsets.getView( 0, sizes.getSize() );
+         view = sizes;
+         offsets.setElement( sizes.getSize(), 0 );
+         offsets.template scan< Algorithms::ScanType::Exclusive >();
+      }
+
+      /***
+       * \brief Returns size of the segment number \r segmentIdx
+       */
+      __cuda_callable__
+      IndexType getSegmentSize( const IndexType segmentIdx ) const;
+
+      /***
+       * \brief Returns number of elements managed by all segments.
+       */
+      __cuda_callable__
+      IndexType getSize() const;
+
+      /***
+       * \brief Returns number of elements that needs to be allocated.
+       */
+      __cuda_callable__
+      IndexType getStorageSize() const;
+
+      __cuda_callable__
+      IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const;
+
+      __cuda_callable__
+      void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const;
+
+      /***
+       * \brief Go over all segments and for each segment element call
+       * function 'f' with arguments 'args'. The return type of 'f' is bool.
+       * When its true, the for-loop continues. Once 'f' returns false, the for-loop
+       * is terminated.
+       */
+      template< typename Function, typename... Args >
+      void forSegments( IndexType first, IndexType last, Function& f, Args... args ) const;
+
+      template< typename Function, typename... Args >
+      void forAll( Function& f, Args... args ) const;
+
+
+      /***
+       * \brief Go over all segments and perform a reduction in each of them.
+       */
+      template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
+      void segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const;
+
+      template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
+      void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const;
+};
+         } // namespace details
+      } // namespace Segements
+   }  // namespace Conatiners
+} // namespace TNL
+
+#include <TNL/Containers/Segments/CSR.hpp>
diff --git a/src/TNL/Containers/Segments/details/Ellpack.h b/src/TNL/Containers/Segments/details/Ellpack.h
new file mode 100644
index 000000000..b08ad0f04
--- /dev/null
+++ b/src/TNL/Containers/Segments/details/Ellpack.h
@@ -0,0 +1,107 @@
+/***************************************************************************
+                          Ellpack.h -  description
+                             -------------------
+    begin                : Dec 3, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Containers/Vector.h>
+
+namespace TNL {
+   namespace Containers {
+      namespace Segments {
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder = std::is_same< Device, Devices::Host >::value,
+          int Alignment = 32 >
+class Ellpack
+{
+   public:
+
+      using DeviceType = Device;
+      using IndexType = Index;
+      static constexpr int getAlignment() { return Alignment; }
+      static constexpr bool getRowMajorOrder() { return RowMajorOrder; }
+      using OffsetsHolder = Containers::Vector< IndexType, DeviceType, IndexType >;
+      using SegmentsSizes = OffsetsHolder;
+
+      Ellpack();
+
+      Ellpack( const SegmentsSizes& sizes );
+
+      Ellpack( const IndexType segmentsCount, const IndexType segmentSize );
+
+      Ellpack( const Ellpack& segments );
+
+      Ellpack( const Ellpack&& segments );
+
+      /**
+       * \brief Set sizes of particular segments.
+       */
+      template< typename SizesHolder = OffsetsHolder >
+      void setSegmentsSizes( const SizesHolder& sizes );
+
+      void setSegmentsSizes( const IndexType segmentsCount, const IndexType segmentSize );
+      /**
+       * \brief Number segments.
+       */
+      __cuda_callable__
+      IndexType getSegmentsCount() const;
+
+      __cuda_callable__
+      IndexType getSegmentSize( const IndexType segmentIdx ) const;
+
+      __cuda_callable__
+      IndexType getSize() const;
+
+      __cuda_callable__
+      IndexType getStorageSize() const;
+
+      __cuda_callable__
+      IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const;
+
+      __cuda_callable__
+      void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const;
+
+      /***
+       * \brief Go over all segments and for each segment element call
+       * function 'f' with arguments 'args'. The return type of 'f' is bool.
+       * When its true, the for-loop continues. Once 'f' returns false, the for-loop
+       * is terminated.
+       */
+      template< typename Function, typename... Args >
+      void forSegments( IndexType first, IndexType last, Function& f, Args... args ) const;
+
+      template< typename Function, typename... Args >
+      void forAll( Function& f, Args... args ) const;
+
+
+      /***
+       * \brief Go over all segments and perform a reduction in each of them.
+       */
+      template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
+      void segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const;
+
+      template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
+      void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const;
+
+      void save( File& file ) const;
+
+      void load( File& file );
+
+   protected:
+
+      IndexType segmentSize, size, alignedSize;
+};
+
+      } // namespace Segements
+   }  // namespace Conatiners
+} // namespace TNL
+
+#include <TNL/Containers/Segments/Ellpack.hpp>
diff --git a/src/TNL/Containers/Segments/details/SlicedEllpack.h b/src/TNL/Containers/Segments/details/SlicedEllpack.h
new file mode 100644
index 000000000..ecc2c8c7e
--- /dev/null
+++ b/src/TNL/Containers/Segments/details/SlicedEllpack.h
@@ -0,0 +1,106 @@
+/***************************************************************************
+                          SlicedEllpack.h -  description
+                             -------------------
+    begin                : Dec 4, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Containers/Vector.h>
+
+namespace TNL {
+   namespace Containers {
+      namespace Segments {
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder = std::is_same< Device, Devices::Host >::value,
+          int SliceSize = 32 >
+class SlicedEllpack
+{
+   public:
+
+      using DeviceType = Device;
+      using IndexType = Index;
+      using OffsetsHolder = Containers::Vector< IndexType, DeviceType, IndexType >;
+      static constexpr int getSliceSize() { return SliceSize; }
+      static constexpr bool getRowMajorOrder() { return RowMajorOrder; }
+
+      SlicedEllpack();
+
+      SlicedEllpack( const Vector< IndexType, DeviceType, IndexType >& sizes );
+
+      SlicedEllpack( const SlicedEllpack& segments );
+
+      SlicedEllpack( const SlicedEllpack&& segments );
+
+      /**
+       * \brief Set sizes of particular segments.
+       */
+      template< typename SizesHolder = OffsetsHolder >
+      void setSegmentsSizes( const SizesHolder& sizes );
+
+      __cuda_callable__
+      IndexType getSegmentsCount() const;
+
+      __cuda_callable__
+      IndexType getSegmentSize( const IndexType segmentIdx ) const;
+
+      /**
+       * \brief Number segments.
+       */
+      __cuda_callable__
+      IndexType getSize() const;
+
+
+      __cuda_callable__
+      IndexType getStorageSize() const;
+
+      __cuda_callable__
+      IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const;
+
+      __cuda_callable__
+      void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const;
+
+      /***
+       * \brief Go over all segments and for each segment element call
+       * function 'f' with arguments 'args'. The return type of 'f' is bool.
+       * When its true, the for-loop continues. Once 'f' returns false, the for-loop
+       * is terminated.
+       */
+      template< typename Function, typename... Args >
+      void forSegments( IndexType first, IndexType last, Function& f, Args... args ) const;
+
+      template< typename Function, typename... Args >
+      void forAll( Function& f, Args... args ) const;
+
+
+      /***
+       * \brief Go over all segments and perform a reduction in each of them.
+       */
+      template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
+      void segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const;
+
+      template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
+      void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const;
+
+      void save( File& file ) const;
+
+      void load( File& file );
+
+   protected:
+
+      IndexType size, alignedSize, segmentsCount;
+
+      OffsetsHolder sliceOffsets, sliceSegmentSizes;
+};
+
+      } // namespace Segements
+   }  // namespace Conatiners
+} // namespace TNL
+
+#include <TNL/Containers/Segments/SlicedEllpack.hpp>
diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h
index b6a618e10..1512f8574 100644
--- a/src/TNL/Matrices/SparseMatrix.h
+++ b/src/TNL/Matrices/SparseMatrix.h
@@ -17,7 +17,7 @@ namespace TNL {
 namespace Matrices {
 
 template< typename Real,
-          template< typename Device_, typename Index_ > class Segments,
+          template< typename Device_, typename Index_, typename IndexAllocator_ > class Segments,
           typename Device = Devices::Host,
           typename Index = int,
           typename RealAllocator = typename Allocators::Default< Device >::template Allocator< Real >,
@@ -27,9 +27,9 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
    public:
 
       using RealType = Real;
-      template< typename Device_, typename Index_ >
-      using SegmentsTemplate = Segments< Device_, Index_ >;
-      using SegmentsType = Segments< Device, Index >;
+      template< typename Device_, typename Index_, typename IndexAllocator_ >
+      using SegmentsTemplate = Segments< Device_, Index_, IndexAllocator_ >;
+      using SegmentsType = Segments< Device, Index, IndexAllocator >;
       using DeviceType = Device;
       using IndexType = Index;
       using RealAllocatorType = RealAllocator;
@@ -77,7 +77,7 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
       __cuda_callable__
       IndexType getNonZeroRowLengthFast( const IndexType row ) const;
 
-      template< typename Real2, template< typename, typename > class Segments2, typename Device2, typename Index2, typename RealAllocator2, typename IndexAllocator2 >
+      template< typename Real2, template< typename, typename, typename > class Segments2, typename Device2, typename Index2, typename RealAllocator2, typename IndexAllocator2 >
       void setLike( const SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2, IndexAllocator2 >& matrix );
 
       IndexType getNumberOfNonzeroMatrixElements() const;
@@ -191,7 +191,7 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
 
       // cross-device copy assignment
       template< typename Real2,
-                template< typename, typename > class Segments2,
+                template< typename, typename, typename > class Segments2,
                 typename Device2,
                 typename Index2,
                 typename RealAllocator2,
diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp
index 0d9ee0b06..e24ed2f44 100644
--- a/src/TNL/Matrices/SparseMatrix.hpp
+++ b/src/TNL/Matrices/SparseMatrix.hpp
@@ -18,7 +18,7 @@ namespace TNL {
 namespace Matrices {
 
    template< typename Real,
-          template< typename, typename > class Segments,
+          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
           typename RealAllocator,
@@ -31,7 +31,7 @@ SparseMatrix( const RealAllocatorType& realAllocator,
 }
 
 template< typename Real,
-          template< typename, typename > class Segments,
+          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
           typename RealAllocator,
@@ -43,7 +43,7 @@ SparseMatrix( const SparseMatrix& m )
 }
 
 template< typename Real,
-          template< typename, typename > class Segments,
+          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
           typename RealAllocator,
@@ -55,7 +55,7 @@ SparseMatrix( const SparseMatrix&& m )
 }
 
 template< typename Real,
-          template< typename, typename > class Segments,
+          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
           typename RealAllocator,
@@ -70,7 +70,7 @@ SparseMatrix( const IndexType rows,
 }
 
 template< typename Real,
-          template< typename, typename > class Segments,
+          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
           typename RealAllocator,
@@ -86,7 +86,7 @@ getSerializationType()
 }
 
 template< typename Real,
-          template< typename, typename > class Segments,
+          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
           typename RealAllocator,
@@ -99,7 +99,7 @@ getSerializationTypeVirtual() const
 }
 
 template< typename Real,
-          template< typename, typename > class Segments,
+          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
           typename RealAllocator,
@@ -117,7 +117,7 @@ setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths )
 }
 
 template< typename Real,
-          template< typename, typename > class Segments,
+          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
           typename RealAllocator,
@@ -143,7 +143,7 @@ getCompressedRowLengths( Vector& rowLengths ) const
 }
 
 template< typename Real,
-          template< typename, typename > class Segments,
+          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
           typename RealAllocator,
@@ -156,7 +156,7 @@ getRowLength( const IndexType row ) const
 }
 
 template< typename Real,
-          template< typename, typename > class Segments,
+          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
           typename RealAllocator,
@@ -170,7 +170,7 @@ getRowLengthFast( const IndexType row ) const
 }
 
 template< typename Real,
-          template< typename, typename > class Segments,
+          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
           typename RealAllocator,
@@ -183,7 +183,7 @@ getNonZeroRowLength( const IndexType row ) const
 }
 
 template< typename Real,
-          template< typename, typename > class Segments,
+          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
           typename RealAllocator,
@@ -197,12 +197,12 @@ getNonZeroRowLengthFast( const IndexType row ) const
 }
 
 template< typename Real,
-          template< typename, typename > class Segments,
+          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
           typename RealAllocator,
           typename IndexAllocator >
-   template< typename Real2, template< typename, typename > class Segments2,  typename Device2, typename Index2, typename RealAllocator2, typename IndexAllocator2 >
+   template< typename Real2, template< typename, typename, typename > class Segments2,  typename Device2, typename Index2, typename RealAllocator2, typename IndexAllocator2 >
 void
 SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
 setLike( const SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2, IndexAllocator2 >& matrix )
@@ -211,7 +211,7 @@ setLike( const SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2,
 }
 
 template< typename Real,
-          template< typename, typename > class Segments,
+          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
           typename RealAllocator,
@@ -229,7 +229,7 @@ getNumberOfNonzeroMatrixElements() const
 }
 
 template< typename Real,
-          template< typename, typename > class Segments,
+          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
           typename RealAllocator,
@@ -244,7 +244,7 @@ reset()
 }
 
 template< typename Real,
-          template< typename, typename > class Segments,
+          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
           typename RealAllocator,
@@ -260,7 +260,7 @@ setElementFast( const IndexType row,
 }
 
 template< typename Real,
-          template< typename, typename > class Segments,
+          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
           typename RealAllocator,
@@ -275,7 +275,7 @@ setElement( const IndexType row,
 }
 
 template< typename Real,
-          template< typename, typename > class Segments,
+          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
           typename RealAllocator,
@@ -292,7 +292,7 @@ addElementFast( const IndexType row,
 }
 
 template< typename Real,
-          template< typename, typename > class Segments,
+          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
           typename RealAllocator,
@@ -358,7 +358,7 @@ addElement( const IndexType row,
 
 
 template< typename Real,
-          template< typename, typename > class Segments,
+          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
           typename RealAllocator,
@@ -374,7 +374,7 @@ setRowFast( const IndexType row,
 }
 
 template< typename Real,
-          template< typename, typename > class Segments,
+          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
           typename RealAllocator,
@@ -403,7 +403,7 @@ setRow( const IndexType row,
 
 
 template< typename Real,
-          template< typename, typename > class Segments,
+          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
           typename RealAllocator,
@@ -421,7 +421,7 @@ addRowFast( const IndexType row,
 }
 
 template< typename Real,
-          template< typename, typename > class Segments,
+          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
           typename RealAllocator,
@@ -439,7 +439,7 @@ addRow( const IndexType row,
 
 
 template< typename Real,
-          template< typename, typename > class Segments,
+          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
           typename RealAllocator,
@@ -454,7 +454,7 @@ getElementFast( const IndexType row,
 }
 
 template< typename Real,
-          template< typename, typename > class Segments,
+          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
           typename RealAllocator,
@@ -477,7 +477,7 @@ getElement( const IndexType row,
 }
 
 template< typename Real,
-          template< typename, typename > class Segments,
+          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
           typename RealAllocator,
@@ -493,7 +493,7 @@ getRowFast( const IndexType row,
 }
 
 template< typename Real,
-          template< typename, typename > class Segments,
+          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
           typename RealAllocator,
@@ -509,7 +509,7 @@ rowVectorProduct( const IndexType row,
 }
 
 template< typename Real,
-          template< typename, typename > class Segments,
+          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
           typename RealAllocator,
@@ -544,7 +544,7 @@ vectorProduct( const InVector& inVector,
 }
 
 template< typename Real,
-          template< typename, typename > class Segments,
+          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
           typename RealAllocator,
@@ -567,7 +567,7 @@ rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Ke
 }
 
 template< typename Real,
-          template< typename, typename > class Segments,
+          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
           typename RealAllocator,
@@ -581,7 +581,7 @@ allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zer
 }
 
 template< typename Real,
-          template< typename, typename > class Segments,
+          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
           typename RealAllocator,
@@ -605,7 +605,7 @@ forRows( IndexType first, IndexType last, Function& function ) const
 }
 
 template< typename Real,
-          template< typename, typename > class Segments,
+          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
           typename RealAllocator,
@@ -619,7 +619,7 @@ forAllRows( Function& function ) const
 }
 
 /*template< typename Real,
-          template< typename, typename > class Segments,
+          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
           typename RealAllocator,
@@ -635,7 +635,7 @@ addMatrix( const SparseMatrix< Real2, Segments2, Device, Index2, RealAllocator2,
 }
 
 template< typename Real,
-          template< typename, typename > class Segments,
+          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
           typename RealAllocator,
@@ -650,7 +650,7 @@ getTransposition( const SparseMatrix< Real2, Device, Index2 >& matrix,
 }*/
 
 template< typename Real,
-          template< typename, typename > class Segments,
+          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
           typename RealAllocator,
@@ -668,7 +668,7 @@ performSORIteration( const Vector1& b,
 
 // copy assignment
 template< typename Real,
-          template< typename, typename > class Segments,
+          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
           typename RealAllocator,
@@ -686,13 +686,13 @@ operator=( const SparseMatrix& matrix )
 
 // cross-device copy assignment
 template< typename Real,
-          template< typename, typename > class Segments,
+          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
           typename RealAllocator,
           typename IndexAllocator >
    template< typename Real2,
-             template< typename, typename > class Segments2,
+             template< typename, typename, typename > class Segments2,
              typename Device2,
              typename Index2,
              typename RealAllocator2,
@@ -729,7 +729,7 @@ operator=( const SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2
 }
 
 template< typename Real,
-          template< typename, typename > class Segments,
+          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
           typename RealAllocator,
@@ -744,7 +744,7 @@ save( File& file ) const
 }
 
 template< typename Real,
-          template< typename, typename > class Segments,
+          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
           typename RealAllocator,
@@ -759,7 +759,7 @@ load( File& file )
 }
 
 template< typename Real,
-          template< typename, typename > class Segments,
+          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
           typename RealAllocator,
@@ -772,7 +772,7 @@ save( const String& fileName ) const
 }
 
 template< typename Real,
-          template< typename, typename > class Segments,
+          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
           typename RealAllocator,
@@ -785,7 +785,7 @@ load( const String& fileName )
 }
 
 template< typename Real,
-          template< typename, typename > class Segments,
+          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
           typename RealAllocator,
@@ -811,7 +811,7 @@ print( std::ostream& str ) const
 }
 
 template< typename Real,
-          template< typename, typename > class Segments,
+          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
           typename RealAllocator,
diff --git a/src/UnitTests/Containers/Segments/SegmentsTest.hpp b/src/UnitTests/Containers/Segments/SegmentsTest.hpp
index acc75655f..5e74f96b0 100644
--- a/src/UnitTests/Containers/Segments/SegmentsTest.hpp
+++ b/src/UnitTests/Containers/Segments/SegmentsTest.hpp
@@ -52,6 +52,16 @@ void test_SetSegmentsSizes_EqualSizes()
 
    for( IndexType i = 0; i < segmentsCount; i++ )
       EXPECT_EQ( segments3.getSegmentSize( i ), segmentSize );
+
+   using SegmentsView = typename Segments::ViewType;
+
+   SegmentsView segmentsView = segments.getView();
+   EXPECT_EQ( segmentsView.getSegmentsCount(), segmentsCount );
+   EXPECT_EQ( segmentsView.getSize(), segmentsCount * segmentSize );
+   EXPECT_LE( segmentsView.getSize(), segments.getStorageSize() );
+
+   for( IndexType i = 0; i < segmentsCount; i++ )
+      EXPECT_EQ( segmentsView.getSegmentSize( i ), segmentSize );
 }
 
 template< typename Segments >
@@ -89,6 +99,16 @@ void test_SetSegmentsSizes_EqualSizes_EllpackOnly()
 
    for( IndexType i = 0; i < segmentsCount; i++ )
       EXPECT_EQ( segments3.getSegmentSize( i ), segmentSize );
+
+   using SegmentsView = typename Segments::ViewType;
+
+   SegmentsView segmentsView = segments.getView();
+   EXPECT_EQ( segmentsView.getSegmentsCount(), segmentsCount );
+   EXPECT_EQ( segmentsView.getSize(), segmentsCount * segmentSize );
+   EXPECT_LE( segmentsView.getSize(), segments.getStorageSize() );
+
+   for( IndexType i = 0; i < segmentsCount; i++ )
+      EXPECT_EQ( segmentsView.getSegmentSize( i ), segmentSize );
 }
 
 template< typename Segments >
@@ -136,6 +156,11 @@ void test_AllReduction_MaximumInSegments()
 
    for( IndexType i = 0; i < segmentsCount; i++ )
       EXPECT_EQ( result.getElement( i ), ( i + 1 ) * segmentSize );
+
+   result_view = 0;
+   segments.getView().allReduction( fetch, reduce, keep, std::numeric_limits< IndexType >::min() );
+   for( IndexType i = 0; i < segmentsCount; i++ )
+      EXPECT_EQ( result.getElement( i ), ( i + 1 ) * segmentSize );
 }
 
 #endif
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h
index 2c0514c0a..16c22d9ca 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h
+++ b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h
@@ -28,14 +28,14 @@ protected:
 
 ////
 // Row-major format is used for the host system
-template< typename Device, typename Index >
-using RowMajorEllpack = TNL::Containers::Segments::Ellpack< Device, Index, true, 32 >;
+template< typename Device, typename Index, typename IndexAlocator >
+using RowMajorEllpack = TNL::Containers::Segments::Ellpack< Device, Index, IndexAlocator, true, 32 >;
 
 
 ////
 // Column-major format is used for GPUs
-template< typename Device, typename Index >
-using ColumnMajorEllpack = TNL::Containers::Segments::Ellpack< Device, Index, false, 32 >;
+template< typename Device, typename Index, typename IndexAllocator >
+using ColumnMajorEllpack = TNL::Containers::Segments::Ellpack< Device, Index, IndexAllocator, false, 32 >;
 
 // types for which MatrixTest is instantiated
 using EllpackMatrixTypes = ::testing::Types
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h
index 5efcb1eae..8597121e4 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h
+++ b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h
@@ -28,14 +28,14 @@ protected:
 
 ////
 // Row-major format is used for the host system
-template< typename Device, typename Index >
-using RowMajorSlicedEllpack = TNL::Containers::Segments::SlicedEllpack< Device, Index, true, 32 >;
+template< typename Device, typename Index, typename IndexAllocator >
+using RowMajorSlicedEllpack = TNL::Containers::Segments::SlicedEllpack< Device, Index, IndexAllocator, true, 32 >;
 
 
 ////
 // Column-major format is used for GPUs
-template< typename Device, typename Index >
-using ColumnMajorSlicedEllpack = TNL::Containers::Segments::SlicedEllpack< Device, Index, false, 32 >;
+template< typename Device, typename Index, typename IndexAllocator >
+using ColumnMajorSlicedEllpack = TNL::Containers::Segments::SlicedEllpack< Device, Index, IndexAllocator, false, 32 >;
 
 // types for which MatrixTest is instantiated
 using SlicedEllpackMatrixTypes = ::testing::Types
-- 
GitLab


From 482a96529145bae8d6902544ac0fc834f4fa7cc3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Thu, 12 Dec 2019 22:15:49 +0100
Subject: [PATCH 032/179] Refactoring segments.

---
 src/TNL/Containers/Segments/CSR.hpp           | 25 +----------
 src/TNL/Containers/Segments/CSRView.hpp       | 21 ++--------
 src/TNL/Containers/Segments/details/CSR.h     | 41 +++++++++++++++----
 src/TNL/Containers/Segments/details/Ellpack.h |  2 -
 .../Segments/details/SlicedEllpack.h          |  2 -
 5 files changed, 37 insertions(+), 54 deletions(-)

diff --git a/src/TNL/Containers/Segments/CSR.hpp b/src/TNL/Containers/Segments/CSR.hpp
index a8f12e7dc..280ed6ebf 100644
--- a/src/TNL/Containers/Segments/CSR.hpp
+++ b/src/TNL/Containers/Segments/CSR.hpp
@@ -63,11 +63,6 @@ CSR< Device, Index, IndexAllocator >::
 setSegmentsSizes( const SizesHolder& sizes )
 {
    details::CSR< Device, Index >::setSegmentsSizes( sizes, this->offsets );
-   /*this->offsets.setSize( sizes.getSize() + 1 );
-   auto view = this->offsets.getView( 0, sizes.getSize() );
-   view = sizes;
-   this->offsets.setElement( sizes.getSize(), 0 );
-   this->offsets.template scan< Algorithms::ScanType::Exclusive >();*/
 }
 
 template< typename Device,
@@ -109,15 +104,7 @@ Index
 CSR< Device, Index, IndexAllocator >::
 getSegmentSize( const IndexType segmentIdx ) const
 {
-   if( ! std::is_same< DeviceType, Devices::Host >::value )
-   {
-#ifdef __CUDA_ARCH__
-      return offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ];
-#else
-      return offsets.getElement( segmentIdx + 1 ) - offsets.getElement( segmentIdx );
-#endif
-   }
-   return offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ];
+   return details::CSR< Device, Index >::getSegmentSize( this->offsets, segmentIdx );
 }
 
 template< typename Device,
@@ -139,15 +126,7 @@ Index
 CSR< Device, Index, IndexAllocator >::
 getStorageSize() const
 {
-   if( ! std::is_same< DeviceType, Devices::Host >::value )
-   {
-#ifdef __CUDA_ARCH__
-      return offsets[ this->getSegmentsCount() ];
-#else
-      return offsets.getElement( this->getSegmentsCount() );
-#endif
-   }
-   return offsets[ this->getSegmentsCount() ];
+   return details::CSR< Device, Index >::getStorageSize( this->offsets );
 }
 
 template< typename Device,
diff --git a/src/TNL/Containers/Segments/CSRView.hpp b/src/TNL/Containers/Segments/CSRView.hpp
index f50a74985..dd4c434ba 100644
--- a/src/TNL/Containers/Segments/CSRView.hpp
+++ b/src/TNL/Containers/Segments/CSRView.hpp
@@ -13,6 +13,7 @@
 #include <TNL/Containers/Vector.h>
 #include <TNL/Algorithms/ParallelFor.h>
 #include <TNL/Containers/Segments/CSRView.h>
+#include <TNL/Containers/Segments/details/CSR.h>
 
 namespace TNL {
    namespace Containers {
@@ -98,15 +99,7 @@ Index
 CSRView< Device, Index >::
 getSegmentSize( const IndexType segmentIdx ) const
 {
-   if( ! std::is_same< DeviceType, Devices::Host >::value )
-   {
-#ifdef __CUDA_ARCH__
-      return offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ];
-#else
-      return offsets.getElement( segmentIdx + 1 ) - offsets.getElement( segmentIdx );
-#endif
-   }
-   return offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ];
+   return details::CSR< Device, Index >::getSegmentSize( this->offsets, segmentIdx );
 }
 
 template< typename Device,
@@ -126,15 +119,7 @@ Index
 CSRView< Device, Index >::
 getStorageSize() const
 {
-   if( ! std::is_same< DeviceType, Devices::Host >::value )
-   {
-#ifdef __CUDA_ARCH__
-      return offsets[ this->getSegmentsCount() ];
-#else
-      return offsets.getElement( this->getSegmentsCount() );
-#endif
-   }
-   return offsets[ this->getSegmentsCount() ];
+   return details::CSR< Device, Index >::getStorageSize( this->offsets );
 }
 
 template< typename Device,
diff --git a/src/TNL/Containers/Segments/details/CSR.h b/src/TNL/Containers/Segments/details/CSR.h
index 47e768d28..38f097669 100644
--- a/src/TNL/Containers/Segments/details/CSR.h
+++ b/src/TNL/Containers/Segments/details/CSR.h
@@ -35,23 +35,48 @@ class CSR
          offsets.template scan< Algorithms::ScanType::Exclusive >();
       }
 
-      /***
-       * \brief Returns size of the segment number \r segmentIdx
-       */
+      template< typename CSROffsets >
       __cuda_callable__
-      IndexType getSegmentSize( const IndexType segmentIdx ) const;
+      static IndexType getSegmentsCount( const CSROffsets& offsets )
+      {
+         return offsets.getSize() - 1;
+      }
 
       /***
-       * \brief Returns number of elements managed by all segments.
+       * \brief Returns size of the segment number \r segmentIdx
        */
+      template< typename CSROffsets >
       __cuda_callable__
-      IndexType getSize() const;
+      static IndexType getSegmentSize( const CSROffsets& offsets, const IndexType segmentIdx )
+      {
+         if( ! std::is_same< DeviceType, Devices::Host >::value )
+         {
+#ifdef __CUDA_ARCH__
+            return offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ];
+#else
+            return offsets.getElement( segmentIdx + 1 ) - offsets.getElement( segmentIdx );
+#endif
+         }
+         return offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ];
+      }
 
       /***
        * \brief Returns number of elements that needs to be allocated.
        */
+      template< typename CSROffsets >
       __cuda_callable__
-      IndexType getStorageSize() const;
+      static IndexType getStorageSize( const CSROffsets& offsets )
+      {
+         if( ! std::is_same< DeviceType, Devices::Host >::value )
+         {
+#ifdef __CUDA_ARCH__
+            return offsets[ getSegmentsCount( offsets ) ];
+#else
+            return offsets.getElement( getSegmentsCount( offsets ) );
+#endif
+         }
+         return offsets[ getSegmentsCount( offsets ) ];
+      }
 
       __cuda_callable__
       IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const;
@@ -85,5 +110,3 @@ class CSR
       } // namespace Segements
    }  // namespace Conatiners
 } // namespace TNL
-
-#include <TNL/Containers/Segments/CSR.hpp>
diff --git a/src/TNL/Containers/Segments/details/Ellpack.h b/src/TNL/Containers/Segments/details/Ellpack.h
index b08ad0f04..ecfe63107 100644
--- a/src/TNL/Containers/Segments/details/Ellpack.h
+++ b/src/TNL/Containers/Segments/details/Ellpack.h
@@ -103,5 +103,3 @@ class Ellpack
       } // namespace Segements
    }  // namespace Conatiners
 } // namespace TNL
-
-#include <TNL/Containers/Segments/Ellpack.hpp>
diff --git a/src/TNL/Containers/Segments/details/SlicedEllpack.h b/src/TNL/Containers/Segments/details/SlicedEllpack.h
index ecc2c8c7e..6f185bc46 100644
--- a/src/TNL/Containers/Segments/details/SlicedEllpack.h
+++ b/src/TNL/Containers/Segments/details/SlicedEllpack.h
@@ -102,5 +102,3 @@ class SlicedEllpack
       } // namespace Segements
    }  // namespace Conatiners
 } // namespace TNL
-
-#include <TNL/Containers/Segments/SlicedEllpack.hpp>
-- 
GitLab


From a87203a0b91da16f5e16b379e33ee09a8e2a1c57 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Sun, 22 Dec 2019 20:52:43 +0100
Subject: [PATCH 033/179] Implementing sparse matrix assignment.

---
 src/TNL/Containers/Segments/CSR.h             |  6 +-
 src/TNL/Containers/Segments/CSR.hpp           | 11 +++
 src/TNL/Containers/Segments/Ellpack.h         |  5 ++
 src/TNL/Containers/Segments/Ellpack.hpp       | 15 ++++
 src/TNL/Containers/Segments/SlicedEllpack.h   |  5 ++
 src/TNL/Containers/Segments/SlicedEllpack.hpp | 17 ++++
 src/TNL/Matrices/SparseMatrix.h               |  2 +-
 src/TNL/Matrices/SparseMatrix.hpp             | 84 +++++++++++++++----
 src/UnitTests/Matrices/SparseMatrixCopyTest.h | 32 +++++--
 9 files changed, 156 insertions(+), 21 deletions(-)

diff --git a/src/TNL/Containers/Segments/CSR.h b/src/TNL/Containers/Segments/CSR.h
index b83e43f1d..add07f1df 100644
--- a/src/TNL/Containers/Segments/CSR.h
+++ b/src/TNL/Containers/Segments/CSR.h
@@ -93,7 +93,6 @@ class CSR
       template< typename Function, typename... Args >
       void forAll( Function& f, Args... args ) const;
 
-
       /***
        * \brief Go over all segments and perform a reduction in each of them.
        */
@@ -103,6 +102,11 @@ class CSR
       template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
       void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const;
 
+      CSR& operator=( const CSR& rhsSegments ) = default;
+
+      template< typename Device_, typename Index_, typename IndexAllocator_ >
+      CSR& operator=( const CSR< Device_, Index_, IndexAllocator_ >& source );
+
       void save( File& file ) const;
 
       void load( File& file );
diff --git a/src/TNL/Containers/Segments/CSR.hpp b/src/TNL/Containers/Segments/CSR.hpp
index 280ed6ebf..61720869c 100644
--- a/src/TNL/Containers/Segments/CSR.hpp
+++ b/src/TNL/Containers/Segments/CSR.hpp
@@ -221,6 +221,17 @@ allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Re
    this->segmentsReduction( 0, this->getSegmentsCount(), fetch, reduction, keeper, zero, args... );
 }
 
+template< typename Device,
+          typename Index,
+          typename IndexAllocator >
+   template< typename Device_, typename Index_, typename IndexAllocator_ >
+CSR< Device, Index, IndexAllocator >&
+CSR< Device, Index, IndexAllocator >::
+operator=( const CSR< Device_, Index_, IndexAllocator_ >& source )
+{
+   this->offsets = source.offsets;
+}
+
 template< typename Device,
           typename Index,
           typename IndexAllocator >
diff --git a/src/TNL/Containers/Segments/Ellpack.h b/src/TNL/Containers/Segments/Ellpack.h
index 9c81a8428..b9b3e63c1 100644
--- a/src/TNL/Containers/Segments/Ellpack.h
+++ b/src/TNL/Containers/Segments/Ellpack.h
@@ -100,6 +100,11 @@ class Ellpack
       template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
       void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const;
 
+      Ellpack& operator=( const Ellpack& source ) = default;
+
+      template< typename Device_, typename Index_, typename IndexAllocator_, bool RowMajorOrder_, int Alignment_ >
+      Ellpack& operator=( const Ellpack< Device_, Index_, IndexAllocator_, RowMajorOrder_, Alignment_ >& source );
+
       void save( File& file ) const;
 
       void load( File& file );
diff --git a/src/TNL/Containers/Segments/Ellpack.hpp b/src/TNL/Containers/Segments/Ellpack.hpp
index 482c87d4f..97d30d314 100644
--- a/src/TNL/Containers/Segments/Ellpack.hpp
+++ b/src/TNL/Containers/Segments/Ellpack.hpp
@@ -322,6 +322,21 @@ allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Re
    this->segmentsReduction( 0, this->getSegmentsCount(), fetch, reduction, keeper, zero, args... );
 }
 
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder,
+          int Alignment >
+   template< typename Device_, typename Index_, typename IndexAllocator_, bool RowMajorOrder_, int Alignment_ >
+Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >&
+Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >::
+operator=( const Ellpack< Device_, Index_, IndexAllocator_, RowMajorOrder_, Alignment_ >& source )
+{
+   this->segmentSize = source.segmentSize;
+   this->size = source.size;
+   this->alignedSize = roundUpDivision( size, this->getAlignment() ) * this->getAlignment();
+}
+
 template< typename Device,
           typename Index,
           typename IndexAllocator,
diff --git a/src/TNL/Containers/Segments/SlicedEllpack.h b/src/TNL/Containers/Segments/SlicedEllpack.h
index fc514c51f..9c2e7157f 100644
--- a/src/TNL/Containers/Segments/SlicedEllpack.h
+++ b/src/TNL/Containers/Segments/SlicedEllpack.h
@@ -96,6 +96,11 @@ class SlicedEllpack
       template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
       void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const;
 
+      SlicedEllpack& operator=( const SlicedEllpack& source ) = default;
+
+      template< typename Device_, typename Index_, typename IndexAllocator_, bool RowMajorOrder_ >
+      SlicedEllpack& operator=( const SlicedEllpack< Device_, Index_, IndexAllocator_, RowMajorOrder_, SliceSize >& source );
+
       void save( File& file ) const;
 
       void load( File& file );
diff --git a/src/TNL/Containers/Segments/SlicedEllpack.hpp b/src/TNL/Containers/Segments/SlicedEllpack.hpp
index bdf28ff73..ad83f666a 100644
--- a/src/TNL/Containers/Segments/SlicedEllpack.hpp
+++ b/src/TNL/Containers/Segments/SlicedEllpack.hpp
@@ -356,6 +356,23 @@ allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Re
    this->segmentsReduction( 0, this->getSegmentsCount(), fetch, reduction, keeper, zero, args... );
 }
 
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder,
+          int SliceSize >
+   template< typename Device_, typename Index_, typename IndexAllocator_, bool RowMajorOrder_ >
+SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >&
+SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >::
+operator=( const SlicedEllpack< Device_, Index_, IndexAllocator_, RowMajorOrder_, SliceSize >& source )
+{
+   this->size = source.size;
+   this->alignedSize = source.alignedSize;
+   this->segmentsCount = source.segmentsCount;
+   this->sliceOffsets = source.sliceOffsets;
+   this->sliceSegmentSizes = source.sliceSegmentSizes;
+}
+
 template< typename Device,
           typename Index,
           typename IndexAllocator,
diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h
index 1512f8574..8c8fef599 100644
--- a/src/TNL/Matrices/SparseMatrix.h
+++ b/src/TNL/Matrices/SparseMatrix.h
@@ -218,7 +218,7 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
 
       SegmentsType segments;
 
-      IndexAllocator indexAlloctor;
+      IndexAllocator indexAllocator;
 
       RealAllocator realAllocator;
 
diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp
index e24ed2f44..5de4473ab 100644
--- a/src/TNL/Matrices/SparseMatrix.hpp
+++ b/src/TNL/Matrices/SparseMatrix.hpp
@@ -680,7 +680,7 @@ operator=( const SparseMatrix& matrix )
    Matrix< Real, Device, Index >::operator=( matrix );
    this->columnIndexes = matrix.columnIndexes;
    this->segments = matrix.segments;
-   this->indexAlloctor = matrix.indexAllocator;
+   this->indexAllocator = matrix.indexAllocator;
    this->realAllocator = matrix.realAllocator;
 }
 
@@ -702,29 +702,85 @@ SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
 operator=( const SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2, IndexAllocator2 >& matrix )
 {
    using RHSMatrixType = SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2, IndexAllocator2 >;
+   RowsCapacitiesType rowLengths;
+   matrix.getCompressedRowLengths( rowLengths );
+   this->setCompressedRowLengths( rowLengths );
+
+   // TODO: Replace this with SparseMatrixView
+   const auto matrix_columns_view = matrix.columnIndexes.getConstView();
+   const auto matrix_values_view = matrix.values.getConstView();
+   const IndexType paddingIndex = this->getPaddingIndex();
+   auto this_columns_view = this->columnIndexes.getView();
+   auto this_values_view = this->values.getView();
+
    if( std::is_same< Device, Device2 >::value )
    {
-      /*RowsCapacitiesType rowLengths;
-      matrix.getCompressedRowLengths( rowLengths );
-      this->setCompressedRowLengths( rowLengths );
-      // TODO: Replace this with SparseMatrixView
-      const auto matrix_columns_view = matrix.columnIndexes.getConstView();
-      const auto matrix_values_view = matrix.values.getConstView();
-      const auto segments_view = this->segments.getConstView();
-      auto this_columns_view = this->columnIndexes.getView();
-      auto this_values_view = this->values.getView();
-      const IndexType paddingIndex = this->getPaddingIndex();
-      auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx ) {
+      const auto this_segments_view = this->segments.getView();
+      auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx ) mutable {
          const IndexType column = matrix_columns_view[ globalIdx ];
          if( column != paddingIndex )
          {
             const RealType value = matrix_values_view[ globalIdx ];
-            IndexType thisGlobalIdx = segments_view.getGlobalIdx( rowIdx, localIdx );
+            IndexType thisGlobalIdx = this_segments_view.getGlobalIndex( rowIdx, localIdx );
             this_columns_view[ thisGlobalIdx ] = column;
             this_values_view[ thisGlobalIdx ] = value;
          }
       };
-      matrix.forAllRows( f );*/
+      matrix.forAllRows( f );
+   }
+   else
+   {
+      const IndexType maxRowLength = max( rowLengths );
+      const IndexType bufferRowsCount( 128 );
+      const size_t bufferSize = bufferRowsCount * maxRowLength;
+      Containers::Vector< Real2, Device2, Index2, RealAllocator2 > matrixValuesBuffer( bufferSize );
+      Containers::Vector< Index2, Device2, Index2, IndexAllocator2 > matrixColumnsBuffer( bufferSize );
+      Containers::Vector< RealType, DeviceType, IndexType, RealAllocator > thisValuesBuffer( bufferSize );
+      Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocator > thisColumnsBuffer( bufferSize );
+      auto matrixValuesBuffer_view = matrixValuesBuffer.getView();
+      auto matrixColumnsBuffer_view = matrixColumnsBuffer.getView();
+      auto thisValuesBuffer_view = thisValuesBuffer.getView();
+      auto thisColumnsBuffer_view = thisColumnsBuffer.getView();
+
+      IndexType baseRow( 0 );
+      const IndexType rowsCount = this->getRows();
+      while( baseRow < rowsCount )
+      {
+         const IndexType lastRow = min( baseRow + bufferRowsCount, rowsCount );
+         thisColumnsBuffer = paddingIndex;
+
+         ////
+         // Copy matrix elements into buffer
+         auto f1 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx ) mutable {
+            const IndexType column = matrix_columns_view[ globalIdx ];
+            if( column != paddingIndex )
+            {
+               const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx;
+               matrixValuesBuffer_view[ bufferIdx ] = matrix_values_view[ globalIdx ];
+               matrixColumnsBuffer_view[ bufferIdx ] = column;
+            }
+         };
+         matrix.forRows( baseRow, lastRow, f1 );
+
+         ////
+         // Copy the source matrix buffer to this matrix buffer
+         thisValuesBuffer_view = matrixValuesBuffer_view;
+         thisColumnsBuffer_view = matrixColumnsBuffer_view;
+
+         ////
+         // Copy matrix elements from the buffer to the matrix
+         auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx ) mutable {
+            const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx;
+            const IndexType column = thisColumnsBuffer_view[ bufferIdx ];
+            if( column != paddingIndex )
+            {
+               this_columns_view[ globalIdx ] = column;
+               this_values_view[ globalIdx ] = thisValuesBuffer_view[ bufferIdx ];
+            }
+         };
+         this->forRows( baseRow, lastRow, f2 );
+         baseRow += bufferRowsCount;
+      }
    }
 }
 
diff --git a/src/UnitTests/Matrices/SparseMatrixCopyTest.h b/src/UnitTests/Matrices/SparseMatrixCopyTest.h
index 9b09ef4d4..684a6a871 100644
--- a/src/UnitTests/Matrices/SparseMatrixCopyTest.h
+++ b/src/UnitTests/Matrices/SparseMatrixCopyTest.h
@@ -12,12 +12,31 @@
 #include <TNL/Matrices/Ellpack.h>
 #include <TNL/Matrices/SlicedEllpack.h>
 
-using CSR_host = TNL::Matrices::CSR< int, TNL::Devices::Host, int >;
+#include <TNL/Matrices/SparseMatrix.h>
+#include <TNL/Containers/Segments/CSR.h>
+#include <TNL/Containers/Segments/Ellpack.h>
+#include <TNL/Containers/Segments/SlicedEllpack.h>
+
+/*using CSR_host = TNL::Matrices::CSR< int, TNL::Devices::Host, int >;
 using CSR_cuda = TNL::Matrices::CSR< int, TNL::Devices::Cuda, int >;
 using E_host = TNL::Matrices::Ellpack< int, TNL::Devices::Host, int >;
 using E_cuda = TNL::Matrices::Ellpack< int, TNL::Devices::Cuda, int >;
 using SE_host = TNL::Matrices::SlicedEllpack< int, TNL::Devices::Host, int, 2 >;
-using SE_cuda = TNL::Matrices::SlicedEllpack< int, TNL::Devices::Cuda, int, 2 >;
+using SE_cuda = TNL::Matrices::SlicedEllpack< int, TNL::Devices::Cuda, int, 2 >;*/
+
+template< typename Device, typename Index, typename IndexAllocator >
+using EllpackSegments = TNL::Containers::Segments::Ellpack< Device, Index, IndexAllocator >;
+
+template< typename Device, typename Index, typename IndexAllocator >
+using SlicedEllpackSegments = TNL::Containers::Segments::SlicedEllpack< Device, Index, IndexAllocator >;
+
+using CSR_host = TNL::Matrices::SparseMatrix< int, TNL::Containers::Segments::CSR, TNL::Devices::Host, int >;
+using CSR_cuda = TNL::Matrices::SparseMatrix< int, TNL::Containers::Segments::CSR, TNL::Devices::Cuda, int >;
+using E_host   = TNL::Matrices::SparseMatrix< int, EllpackSegments, TNL::Devices::Host, int >;
+using E_cuda   = TNL::Matrices::SparseMatrix< int, EllpackSegments, TNL::Devices::Cuda, int >;
+using SE_host  = TNL::Matrices::SparseMatrix< int, SlicedEllpackSegments, TNL::Devices::Host, int >;
+using SE_cuda  = TNL::Matrices::SparseMatrix< int, SlicedEllpackSegments, TNL::Devices::Cuda, int >;
+
 
 #ifdef HAVE_GTEST 
 #include <gtest/gtest.h>
@@ -388,7 +407,8 @@ void testConversion()
         checkTriDiagMatrix( triDiag1 );
         
         Matrix2 triDiag2;
-        TNL::Matrices::copySparseMatrix( triDiag2, triDiag1 );
+        //TNL::Matrices::copySparseMatrix( triDiag2, triDiag1 );
+        triDiag2 = triDiag1;
         checkTriDiagMatrix( triDiag2 );
    }
    
@@ -400,7 +420,8 @@ void testConversion()
         checkAntiTriDiagMatrix( antiTriDiag1 );
         
         Matrix2 antiTriDiag2;
-        TNL::Matrices::copySparseMatrix( antiTriDiag2, antiTriDiag1 );
+        //TNL::Matrices::copySparseMatrix( antiTriDiag2, antiTriDiag1 );
+        antiTriDiag2 = antiTriDiag1;
         checkAntiTriDiagMatrix( antiTriDiag2 );
    }
    
@@ -411,7 +432,8 @@ void testConversion()
         checkUnevenRowSizeMatrix( unevenRowSize1 );
         
         Matrix2 unevenRowSize2;
-        TNL::Matrices::copySparseMatrix( unevenRowSize2, unevenRowSize1 );
+        //TNL::Matrices::copySparseMatrix( unevenRowSize2, unevenRowSize1 );
+        unevenRowSize2 = unevenRowSize1;
         checkUnevenRowSizeMatrix( unevenRowSize2 );
    }
 }
-- 
GitLab


From 127b3bc9b6108aba737b64368d9d3c16e941f556 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Sun, 22 Dec 2019 23:40:13 +0100
Subject: [PATCH 034/179] Fixing sparse matrix assignment operator.

---
 src/TNL/Containers/Segments/SlicedEllpack.hpp |  4 +--
 src/TNL/Matrices/SparseMatrix.h               |  8 ++++-
 src/TNL/Matrices/SparseMatrix.hpp             | 29 ++++++++++++-------
 3 files changed, 28 insertions(+), 13 deletions(-)

diff --git a/src/TNL/Containers/Segments/SlicedEllpack.hpp b/src/TNL/Containers/Segments/SlicedEllpack.hpp
index ad83f666a..c9c1d8560 100644
--- a/src/TNL/Containers/Segments/SlicedEllpack.hpp
+++ b/src/TNL/Containers/Segments/SlicedEllpack.hpp
@@ -255,7 +255,7 @@ forSegments( IndexType first, IndexType last, Function& f, Args... args ) const
    const auto sliceOffsets_view = this->sliceOffsets.getConstView();
    if( RowMajorOrder )
    {
-      auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) {
+      auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable {
          const IndexType sliceIdx = segmentIdx / SliceSize;
          const IndexType segmentInSliceIdx = segmentIdx % SliceSize;
          const IndexType segmentSize = sliceSegmentSizes_view[ sliceIdx ];
@@ -270,7 +270,7 @@ forSegments( IndexType first, IndexType last, Function& f, Args... args ) const
    }
    else
    {
-      auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) {
+      auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable {
          const IndexType sliceIdx = segmentIdx / SliceSize;
          const IndexType segmentInSliceIdx = segmentIdx % SliceSize;
          const IndexType segmentSize = sliceSegmentSizes_view[ sliceIdx ];
diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h
index 8c8fef599..44ded93a6 100644
--- a/src/TNL/Matrices/SparseMatrix.h
+++ b/src/TNL/Matrices/SparseMatrix.h
@@ -62,7 +62,13 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
 
       virtual String getSerializationTypeVirtual() const;
 
-      void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths );
+      template< typename RowsCapacitiesVector >
+      void setCompressedRowLengths( const RowsCapacitiesVector& rowCapacities );
+
+      // TODO: Remove this when possible
+      void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ) {
+         this->setCompressedRowLengths( rowLengths );
+      };
 
       template< typename Vector >
       void getCompressedRowLengths( Vector& rowLengths ) const;
diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp
index 5de4473ab..964e9eb22 100644
--- a/src/TNL/Matrices/SparseMatrix.hpp
+++ b/src/TNL/Matrices/SparseMatrix.hpp
@@ -104,12 +104,21 @@ template< typename Real,
           typename Index,
           typename RealAllocator,
           typename IndexAllocator >
+   template< typename RowsCapacitiesVector >
 void
 SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
-setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths )
+setCompressedRowLengths( const RowsCapacitiesVector& rowsCapacities )
 {
-   TNL_ASSERT_EQ( rowLengths.getSize(), this->getRows(), "Number of matrix rows does not fit with rowLengths vector size." );
-   this->segments.setSegmentsSizes( rowLengths );
+   TNL_ASSERT_EQ( rowsCapacities.getSize(), this->getRows(), "Number of matrix rows does not fit with rowLengths vector size." );
+   using RowsCapacitiesVectorDevice = typename RowsCapacitiesVector::DeviceType;
+   if( std::is_same< DeviceType, RowsCapacitiesVectorDevice >::value )
+      this->segments.setSegmentsSizes( rowsCapacities );
+   else
+   {
+      RowsCapacitiesType thisRowsCapacities;
+      thisRowsCapacities = rowsCapacities;
+      this->segments.setSegmentsSizes( thisRowsCapacities );
+   }
    this->values.setSize( this->segments.getStorageSize() );
    this->values = ( RealType ) 0;
    this->columnIndexes.setSize( this->segments.getStorageSize() );
@@ -594,13 +603,11 @@ forRows( IndexType first, IndexType last, Function& function ) const
    const auto columns_view = this->columnIndexes.getConstView();
    const auto values_view = this->values.getConstView();
    const IndexType paddingIndex_ = this->getPaddingIndex();
-   /*auto fetch_ = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx ) mutable -> decltype( fetch( IndexType(), IndexType(), RealType() ) ) {
-      IndexType columnIdx = columns_view[ globalIdx ];
-      if( columnIdx != paddingIndex_ )
-         return fetch( rowIdx, columnIdx, values_view[ globalIdx ] );
-      return zero;
+   auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx ) mutable -> bool {
+      function( rowIdx, localIdx, globalIdx );
+      return true;
    };
-   this->segments.segmentsReduction( first, last, fetch_, reduce, keep, zero );*/
+   this->segments.forSegments( first, last, f );
 
 }
 
@@ -702,8 +709,9 @@ SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
 operator=( const SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2, IndexAllocator2 >& matrix )
 {
    using RHSMatrixType = SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2, IndexAllocator2 >;
-   RowsCapacitiesType rowLengths;
+   typename RHSMatrixType::RowsCapacitiesType rowLengths;
    matrix.getCompressedRowLengths( rowLengths );
+   this->setDimensions( matrix.getRows(), matrix.getColumns() );
    this->setCompressedRowLengths( rowLengths );
 
    // TODO: Replace this with SparseMatrixView
@@ -712,6 +720,7 @@ operator=( const SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2
    const IndexType paddingIndex = this->getPaddingIndex();
    auto this_columns_view = this->columnIndexes.getView();
    auto this_values_view = this->values.getView();
+   this_columns_view = paddingIndex;
 
    if( std::is_same< Device, Device2 >::value )
    {
-- 
GitLab


From 5a68640d3ba8d308c3096cb55024acd2268dff38 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Mon, 23 Dec 2019 16:15:02 +0100
Subject: [PATCH 035/179] Fixed sparse matrix assignment operator.

---
 src/TNL/Containers/Segments/SlicedEllpackView.hpp | 2 +-
 src/TNL/Matrices/SparseMatrix.h                   | 2 +-
 src/TNL/Matrices/SparseMatrix.hpp                 | 8 +++++++-
 src/UnitTests/Matrices/SparseMatrixCopyTest.h     | 1 -
 4 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/src/TNL/Containers/Segments/SlicedEllpackView.hpp b/src/TNL/Containers/Segments/SlicedEllpackView.hpp
index f2e03bd38..66cfce195 100644
--- a/src/TNL/Containers/Segments/SlicedEllpackView.hpp
+++ b/src/TNL/Containers/Segments/SlicedEllpackView.hpp
@@ -171,7 +171,7 @@ getGlobalIndex( const Index segmentIdx, const Index localIdx ) const
    }
    else
    {
-#ifdef __CUDA__ARCH__
+#ifdef __CUDA_ARCH__
       sliceOffset = this->sliceOffsets[ sliceIdx ];
       segmentSize = this->sliceSegmentSizes[ sliceIdx ];
 #else
diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h
index 44ded93a6..a5effce93 100644
--- a/src/TNL/Matrices/SparseMatrix.h
+++ b/src/TNL/Matrices/SparseMatrix.h
@@ -34,7 +34,7 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
       using IndexType = Index;
       using RealAllocatorType = RealAllocator;
       using IndexAllocatorType = IndexAllocator;
-      using RowsCapacitiesType = Containers::Vector< IndexType, DeviceType, IndexType >;
+      using RowsCapacitiesType = Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocatorType >;
       using RowsCapacitiesView = Containers::VectorView< IndexType, DeviceType, IndexType >;
       using ConstRowsCapacitiesView = typename RowsCapacitiesView::ConstViewType;
       using ValuesVectorType = typename Matrix< Real, Device, Index, RealAllocator >::ValuesVector;
diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp
index 964e9eb22..75f505f5f 100644
--- a/src/TNL/Matrices/SparseMatrix.hpp
+++ b/src/TNL/Matrices/SparseMatrix.hpp
@@ -739,8 +739,9 @@ operator=( const SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2
    }
    else
    {
+      //std::cerr << "Matrix = " << std::endl << matrix << std::endl;
       const IndexType maxRowLength = max( rowLengths );
-      const IndexType bufferRowsCount( 128 );
+      const IndexType bufferRowsCount( 8 );
       const size_t bufferSize = bufferRowsCount * maxRowLength;
       Containers::Vector< Real2, Device2, Index2, RealAllocator2 > matrixValuesBuffer( bufferSize );
       Containers::Vector< Index2, Device2, Index2, IndexAllocator2 > matrixColumnsBuffer( bufferSize );
@@ -757,6 +758,7 @@ operator=( const SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2
       {
          const IndexType lastRow = min( baseRow + bufferRowsCount, rowsCount );
          thisColumnsBuffer = paddingIndex;
+         matrixColumnsBuffer_view = paddingIndex;
 
          ////
          // Copy matrix elements into buffer
@@ -765,12 +767,15 @@ operator=( const SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2
             if( column != paddingIndex )
             {
                const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx;
+               //printf( ">>>RowIdx = %d GlobalIdx = %d  column = %d bufferIdx = %d \n", rowIdx, globalIdx, column, bufferIdx );
                matrixValuesBuffer_view[ bufferIdx ] = matrix_values_view[ globalIdx ];
                matrixColumnsBuffer_view[ bufferIdx ] = column;
             }
          };
          matrix.forRows( baseRow, lastRow, f1 );
 
+         //std::cerr << "Values = " << matrixValuesBuffer_view << std::endl;
+         //std::cerr << "Columns = " << matrixColumnsBuffer_view << std::endl;
          ////
          // Copy the source matrix buffer to this matrix buffer
          thisValuesBuffer_view = matrixValuesBuffer_view;
@@ -790,6 +795,7 @@ operator=( const SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2
          this->forRows( baseRow, lastRow, f2 );
          baseRow += bufferRowsCount;
       }
+      //std::cerr << "This matrix = " << std::endl << *this << std::endl;
    }
 }
 
diff --git a/src/UnitTests/Matrices/SparseMatrixCopyTest.h b/src/UnitTests/Matrices/SparseMatrixCopyTest.h
index 684a6a871..e3fb505d3 100644
--- a/src/UnitTests/Matrices/SparseMatrixCopyTest.h
+++ b/src/UnitTests/Matrices/SparseMatrixCopyTest.h
@@ -438,7 +438,6 @@ void testConversion()
    }
 }
 
-
 TEST( SparseMatrixCopyTest, CSR_HostToHost )
 {
    testCopyAssignment< CSR_host, CSR_host >();
-- 
GitLab


From 2e1ddabef943ff457f13eb339509bf14fe7c9aee Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Fri, 27 Dec 2019 20:31:39 +0100
Subject: [PATCH 036/179] Fixed sparse matrix assignment operator.

---
 src/TNL/Matrices/SparseMatrix.hpp             |  4 +-
 src/UnitTests/Matrices/SparseMatrixCopyTest.h | 53 +++++++++----------
 2 files changed, 27 insertions(+), 30 deletions(-)

diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp
index 75f505f5f..68f33b93e 100644
--- a/src/TNL/Matrices/SparseMatrix.hpp
+++ b/src/TNL/Matrices/SparseMatrix.hpp
@@ -687,8 +687,7 @@ operator=( const SparseMatrix& matrix )
    Matrix< Real, Device, Index >::operator=( matrix );
    this->columnIndexes = matrix.columnIndexes;
    this->segments = matrix.segments;
-   this->indexAllocator = matrix.indexAllocator;
-   this->realAllocator = matrix.realAllocator;
+   return *this;
 }
 
 // cross-device copy assignment
@@ -797,6 +796,7 @@ operator=( const SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2
       }
       //std::cerr << "This matrix = " << std::endl << *this << std::endl;
    }
+   return *this;
 }
 
 template< typename Real,
diff --git a/src/UnitTests/Matrices/SparseMatrixCopyTest.h b/src/UnitTests/Matrices/SparseMatrixCopyTest.h
index e3fb505d3..34ffd600d 100644
--- a/src/UnitTests/Matrices/SparseMatrixCopyTest.h
+++ b/src/UnitTests/Matrices/SparseMatrixCopyTest.h
@@ -360,38 +360,35 @@ template< typename Matrix1, typename Matrix2 >
 void testCopyAssignment()
 {
    {
-        SCOPED_TRACE("Tri Diagonal Matrix");
-        
-        Matrix1 triDiag1;
-        setupTriDiagMatrix( triDiag1 );
-        checkTriDiagMatrix( triDiag1 );
-        
-        Matrix2 triDiag2;
-        triDiag2 = triDiag1;
-        checkTriDiagMatrix( triDiag2 );
+      SCOPED_TRACE("Tri Diagonal Matrix");
+
+      Matrix1 triDiag1;
+      setupTriDiagMatrix( triDiag1 );
+      checkTriDiagMatrix( triDiag1 );
+
+      Matrix2 triDiag2;
+      triDiag2 = triDiag1;
+      checkTriDiagMatrix( triDiag2 );
    }
-   
    {
-        SCOPED_TRACE("Anti Tri Diagonal Matrix");
-                
-        Matrix1 antiTriDiag1;
-        setupAntiTriDiagMatrix( antiTriDiag1 );
-        checkAntiTriDiagMatrix( antiTriDiag1 );
-        
-        Matrix2 antiTriDiag2;
-        antiTriDiag2 = antiTriDiag1;
-        checkAntiTriDiagMatrix( antiTriDiag2 );
+      SCOPED_TRACE("Anti Tri Diagonal Matrix");
+      Matrix1 antiTriDiag1;
+      setupAntiTriDiagMatrix( antiTriDiag1 );
+      checkAntiTriDiagMatrix( antiTriDiag1 );
+
+      Matrix2 antiTriDiag2;
+      antiTriDiag2 = antiTriDiag1;
+      checkAntiTriDiagMatrix( antiTriDiag2 );
    }
-   
    {
-        SCOPED_TRACE("Uneven Row Size Matrix");
-        Matrix1 unevenRowSize1;
-        setupUnevenRowSizeMatrix( unevenRowSize1 );
-        checkUnevenRowSizeMatrix( unevenRowSize1 );
-        
-        Matrix2 unevenRowSize2;
-        unevenRowSize2 = unevenRowSize1;
-        checkUnevenRowSizeMatrix( unevenRowSize2 );
+      SCOPED_TRACE("Uneven Row Size Matrix");
+      Matrix1 unevenRowSize1;
+      setupUnevenRowSizeMatrix( unevenRowSize1 );
+      checkUnevenRowSizeMatrix( unevenRowSize1 );
+
+      Matrix2 unevenRowSize2;
+      unevenRowSize2 = unevenRowSize1;
+      checkUnevenRowSizeMatrix( unevenRowSize2 );
    }
 }
 
-- 
GitLab


From 5eb09d2d627578b16ac5b8e717e4957c6ee6a2bc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Fri, 27 Dec 2019 21:19:22 +0100
Subject: [PATCH 037/179] SpMV benchmark is printing full matrix types.

---
 src/Benchmarks/SpMV/spmv.h | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/src/Benchmarks/SpMV/spmv.h b/src/Benchmarks/SpMV/spmv.h
index 66f4fb236..26ef145c9 100644
--- a/src/Benchmarks/SpMV/spmv.h
+++ b/src/Benchmarks/SpMV/spmv.h
@@ -159,7 +159,7 @@ benchmarkSpMV( Benchmark& benchmark,
           { "non-zeros", convertToString( hostMatrix.getNumberOfNonzeroMatrixElements() ) },
           { "rows", convertToString( hostMatrix.getRows() ) },
           { "columns", convertToString( hostMatrix.getColumns() ) },
-          { "matrix format", convertToString( getMatrixFormat( hostMatrix ) ) }
+          { "matrix format", convertToString( getType( hostMatrix ) ) }
        } ));
 
     hostVector.setSize( hostMatrix.getColumns() );
@@ -287,22 +287,18 @@ benchmarkSpmvSynthetic( Benchmark& benchmark,
 {
    benchmarkSpMV< Real, Matrices::CSR >( benchmark, inputFileName, verboseMR );
    benchmarkSpMV< Real, Matrices::Ellpack >( benchmark, inputFileName, verboseMR );
-   benchmarkSpMV< Real, SlicedEllpackAlias >( benchmark, inputFileName, verboseMR );
-   benchmarkSpMV< Real, Matrices::ChunkedEllpack >( benchmark, inputFileName, verboseMR );
+   //benchmarkSpMV< Real, SlicedEllpackAlias >( benchmark, inputFileName, verboseMR );
+   //benchmarkSpMV< Real, Matrices::ChunkedEllpack >( benchmark, inputFileName, verboseMR );
 
    ////
    // Segments based sparse matrices
-   std::cerr << "*********************************" << std::endl;
    benchmarkSpMV< Real, SparseMatrix_CSR >( benchmark, inputFileName, verboseMR );
-   std::cerr << "*********************************" << std::endl;
    benchmarkSpMV< Real, SparseMatrix_Ellpack >( benchmark, inputFileName, verboseMR );
-   std::cerr << "*********************************" << std::endl;
-   benchmarkSpMV< Real, SparseMatrix_SlicedEllpack >( benchmark, inputFileName, verboseMR );
-   std::cerr << "*********************************" << std::endl;
+   //benchmarkSpMV< Real, SparseMatrix_SlicedEllpack >( benchmark, inputFileName, verboseMR );
 
    // AdEllpack is broken
    // benchmarkSpMV< Real, Matrices::AdEllpack >( benchmark, inputFileName, verboseMR );
-   benchmarkSpMV< Real, Matrices::BiEllpack >( benchmark, inputFileName, verboseMR );
+   //benchmarkSpMV< Real, Matrices::BiEllpack >( benchmark, inputFileName, verboseMR );
 }
 
 } // namespace Benchmarks
-- 
GitLab


From 15308a7caae4c6c0605b1ab1cf2c43fd592f1dff Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Sat, 28 Dec 2019 11:53:37 +0100
Subject: [PATCH 038/179] Added template parameter MatrixType to SparseMatrix
 and rearrangement of the template parameters.

---
 src/Benchmarks/SpMV/spmv.h                    |   7 +-
 src/TNL/Matrices/SparseMatrix.h               |  15 +-
 src/TNL/Matrices/SparseMatrix.hpp             | 227 +++++++++++-------
 src/UnitTests/Matrices/SparseMatrixCopyTest.h |  13 +-
 .../Matrices/SparseMatrixTest_CSR_segments.h  |  48 ++--
 .../SparseMatrixTest_Ellpack_segments.h       |  48 ++--
 .../SparseMatrixTest_SlicedEllpack_segments.h |  49 ++--
 7 files changed, 228 insertions(+), 179 deletions(-)

diff --git a/src/Benchmarks/SpMV/spmv.h b/src/Benchmarks/SpMV/spmv.h
index 26ef145c9..8a1b0614e 100644
--- a/src/Benchmarks/SpMV/spmv.h
+++ b/src/Benchmarks/SpMV/spmv.h
@@ -27,6 +27,7 @@
 #include <TNL/Matrices/MatrixReader.h>
 
 #include <TNL/Matrices/SparseMatrix.h>
+#include <TNL/Matrices/MatrixType.h>
 #include <TNL/Containers/Segments/CSR.h>
 #include <TNL/Containers/Segments/Ellpack.h>
 #include <TNL/Containers/Segments/SlicedEllpack.h>
@@ -43,19 +44,19 @@ using SlicedEllpackAlias = Matrices::SlicedEllpack< Real, Device, Index >;
 
 // Segments based sparse matrix aliases
 template< typename Real, typename Device, typename Index >
-using SparseMatrix_CSR = Matrices::SparseMatrix< Real, Containers::Segments::CSR, Device, Index >;
+using SparseMatrix_CSR = Matrices::SparseMatrix< Real, Device, Index, Matrices::GeneralMatrix, Containers::Segments::CSR >;
 
 template< typename Device, typename Index, typename IndexAllocator >
 using EllpackSegments = Containers::Segments::Ellpack< Device, Index, IndexAllocator >;
 
 template< typename Real, typename Device, typename Index >
-using SparseMatrix_Ellpack = Matrices::SparseMatrix< Real, EllpackSegments, Device, Index >;
+using SparseMatrix_Ellpack = Matrices::SparseMatrix< Real, Device, Index, Matrices::GeneralMatrix, EllpackSegments >;
 
 template< typename Device, typename Index, typename IndexAllocator >
 using SlicedEllpackSegments = Containers::Segments::SlicedEllpack< Device, Index, IndexAllocator >;
 
 template< typename Real, typename Device, typename Index >
-using SparseMatrix_SlicedEllpack = Matrices::SparseMatrix< Real, SlicedEllpackSegments, Device, Index >;
+using SparseMatrix_SlicedEllpack = Matrices::SparseMatrix< Real, Device, Index, Matrices::GeneralMatrix, SlicedEllpackSegments >;
 
 // Get the name (with extension) of input matrix file
 std::string getMatrixFileName( const String& InputFileName )
diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h
index a5effce93..0d8527daf 100644
--- a/src/TNL/Matrices/SparseMatrix.h
+++ b/src/TNL/Matrices/SparseMatrix.h
@@ -11,15 +11,18 @@
 #pragma once
 
 #include <TNL/Matrices/Matrix.h>
+#include <TNL/Matrices/MatrixType.h>
 #include <TNL/Allocators/Default.h>
+#include <TNL/Containers/Segments/CSR.h>
 
 namespace TNL {
 namespace Matrices {
 
 template< typename Real,
-          template< typename Device_, typename Index_, typename IndexAllocator_ > class Segments,
           typename Device = Devices::Host,
           typename Index = int,
+          typename MatrixType = GeneralMatrix,
+          template< typename Device_, typename Index_, typename IndexAllocator_ > class Segments = Containers::Segments::CSR,
           typename RealAllocator = typename Allocators::Default< Device >::template Allocator< Real >,
           typename IndexAllocator = typename Allocators::Default< Device >::template Allocator< Index > >
 class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
@@ -45,6 +48,7 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
       typedef Containers::VectorView< IndexType, DeviceType, IndexType > CompressedRowLengthsVectorView;
       typedef typename CompressedRowLengthsVectorView::ConstViewType ConstCompressedRowLengthsVectorView;
 
+      static constexpr bool isSymmetric() { return MatrixType::isSymmetric(); };
 
       SparseMatrix( const RealAllocatorType& realAllocator = RealAllocatorType(),
                     const IndexAllocatorType& indexAllocator = IndexAllocatorType() );
@@ -83,8 +87,8 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
       __cuda_callable__
       IndexType getNonZeroRowLengthFast( const IndexType row ) const;
 
-      template< typename Real2, template< typename, typename, typename > class Segments2, typename Device2, typename Index2, typename RealAllocator2, typename IndexAllocator2 >
-      void setLike( const SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2, IndexAllocator2 >& matrix );
+      template< typename Real2, typename Device2, typename Index2, typename MatrixType2, template< typename, typename, typename > class Segments2, typename RealAllocator2, typename IndexAllocator2 >
+      void setLike( const SparseMatrix< Real2, Device2, Index2, MatrixType2, Segments2, RealAllocator2, IndexAllocator2 >& matrix );
 
       IndexType getNumberOfNonzeroMatrixElements() const;
 
@@ -197,12 +201,13 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
 
       // cross-device copy assignment
       template< typename Real2,
-                template< typename, typename, typename > class Segments2,
                 typename Device2,
                 typename Index2,
+                typename MatrixType2,
+                template< typename, typename, typename > class Segments2,
                 typename RealAllocator2,
                 typename IndexAllocator2 >
-      SparseMatrix& operator=( const SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2, IndexAllocator2 >& matrix );
+      SparseMatrix& operator=( const SparseMatrix< Real2, Device2, Index2, MatrixType2, Segments2, RealAllocator2, IndexAllocator2 >& matrix );
 
       void save( File& file ) const;
 
diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp
index 68f33b93e..b8091d307 100644
--- a/src/TNL/Matrices/SparseMatrix.hpp
+++ b/src/TNL/Matrices/SparseMatrix.hpp
@@ -17,13 +17,14 @@
 namespace TNL {
 namespace Matrices {
 
-   template< typename Real,
-          template< typename, typename, typename > class Segments,
+template< typename Real,
           typename Device,
           typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
           typename RealAllocator,
           typename IndexAllocator >
-SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
 SparseMatrix( const RealAllocatorType& realAllocator,
               const IndexAllocatorType& indexAllocator )
    : Matrix< Real, Device, Index, RealAllocator >( realAllocator ), columnIndexes( indexAllocator )
@@ -31,36 +32,39 @@ SparseMatrix( const RealAllocatorType& realAllocator,
 }
 
 template< typename Real,
-          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
           typename RealAllocator,
           typename IndexAllocator >
-SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
 SparseMatrix( const SparseMatrix& m )
    : Matrix< Real, Device, Index, RealAllocator >( m ), columnIndexes( m.columnIndexes )
 {
 }
 
 template< typename Real,
-          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
           typename RealAllocator,
           typename IndexAllocator >
-SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
 SparseMatrix( const SparseMatrix&& m )
    : Matrix< Real, Device, Index, RealAllocator >( std::move( m ) ), columnIndexes( std::move( m.columnIndexes ) )
 {
 }
 
 template< typename Real,
-          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
           typename RealAllocator,
           typename IndexAllocator >
-SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
 SparseMatrix( const IndexType rows,
               const IndexType columns,
               const RealAllocatorType& realAllocator,
@@ -70,13 +74,14 @@ SparseMatrix( const IndexType rows,
 }
 
 template< typename Real,
-          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
           typename RealAllocator,
           typename IndexAllocator >
 String
-SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
 getSerializationType()
 {
    return String( "Matrices::SparseMatrix< " ) +
@@ -86,27 +91,29 @@ getSerializationType()
 }
 
 template< typename Real,
-          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
           typename RealAllocator,
           typename IndexAllocator >
 String
-SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
 getSerializationTypeVirtual() const
 {
    return this->getSerializationType();
 }
 
 template< typename Real,
-          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
           typename RealAllocator,
           typename IndexAllocator >
    template< typename RowsCapacitiesVector >
 void
-SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
 setCompressedRowLengths( const RowsCapacitiesVector& rowsCapacities )
 {
    TNL_ASSERT_EQ( rowsCapacities.getSize(), this->getRows(), "Number of matrix rows does not fit with rowLengths vector size." );
@@ -126,14 +133,15 @@ setCompressedRowLengths( const RowsCapacitiesVector& rowsCapacities )
 }
 
 template< typename Real,
-          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
           typename RealAllocator,
           typename IndexAllocator >
    template< typename Vector >
 void
-SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
 getCompressedRowLengths( Vector& rowLengths ) const
 {
    rowLengths.setSize( this->getRows() );
@@ -152,81 +160,87 @@ getCompressedRowLengths( Vector& rowLengths ) const
 }
 
 template< typename Real,
-          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
           typename RealAllocator,
           typename IndexAllocator >
 Index
-SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
 getRowLength( const IndexType row ) const
 {
 
 }
 
 template< typename Real,
-          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
           typename RealAllocator,
           typename IndexAllocator >
 __cuda_callable__
 Index
-SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
 getRowLengthFast( const IndexType row ) const
 {
 
 }
 
 template< typename Real,
-          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
           typename RealAllocator,
           typename IndexAllocator >
 Index
-SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
 getNonZeroRowLength( const IndexType row ) const
 {
 
 }
 
 template< typename Real,
-          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
           typename RealAllocator,
           typename IndexAllocator >
 __cuda_callable__
 Index
-SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
 getNonZeroRowLengthFast( const IndexType row ) const
 {
 
 }
 
 template< typename Real,
-          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
           typename RealAllocator,
           typename IndexAllocator >
-   template< typename Real2, template< typename, typename, typename > class Segments2,  typename Device2, typename Index2, typename RealAllocator2, typename IndexAllocator2 >
+   template< typename Real2, typename Device2, typename Index2, typename MatrixType2, template< typename, typename, typename > class Segments2, typename RealAllocator2, typename IndexAllocator2 >
 void
-SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
-setLike( const SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2, IndexAllocator2 >& matrix )
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
+setLike( const SparseMatrix< Real2, Device2, Index2, MatrixType2, Segments2, RealAllocator2, IndexAllocator2 >& matrix )
 {
    Matrix< Real, Device, Index, RealAllocator >::setLike( matrix );
 }
 
 template< typename Real,
-          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
           typename RealAllocator,
           typename IndexAllocator >
 Index
-SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
 getNumberOfNonzeroMatrixElements() const
 {
    const auto columns_view = this->columnIndexes.getConstView();
@@ -238,13 +252,14 @@ getNumberOfNonzeroMatrixElements() const
 }
 
 template< typename Real,
-          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
           typename RealAllocator,
           typename IndexAllocator >
 void
-SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
 reset()
 {
    Matrix< Real, Device, Index >::reset();
@@ -253,14 +268,15 @@ reset()
 }
 
 template< typename Real,
-          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
           typename RealAllocator,
           typename IndexAllocator >
 __cuda_callable__
 bool
-SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
 setElementFast( const IndexType row,
                 const IndexType column,
                 const RealType& value )
@@ -269,13 +285,14 @@ setElementFast( const IndexType row,
 }
 
 template< typename Real,
-          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
           typename RealAllocator,
           typename IndexAllocator >
 bool
-SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
 setElement( const IndexType row,
             const IndexType column,
             const RealType& value )
@@ -284,14 +301,15 @@ setElement( const IndexType row,
 }
 
 template< typename Real,
-          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
           typename RealAllocator,
           typename IndexAllocator >
 __cuda_callable__
 bool
-SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
 addElementFast( const IndexType row,
                 const IndexType column,
                 const RealType& value,
@@ -301,13 +319,14 @@ addElementFast( const IndexType row,
 }
 
 template< typename Real,
-          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
           typename RealAllocator,
           typename IndexAllocator >
 bool
-SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
 addElement( const IndexType row,
             const IndexType column,
             const RealType& value,
@@ -367,14 +386,15 @@ addElement( const IndexType row,
 
 
 template< typename Real,
-          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
           typename RealAllocator,
           typename IndexAllocator >
 __cuda_callable__
 bool
-SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
 setRowFast( const IndexType row,
             const IndexType* columnIndexes,
             const RealType* values,
@@ -383,13 +403,14 @@ setRowFast( const IndexType row,
 }
 
 template< typename Real,
-          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
           typename RealAllocator,
           typename IndexAllocator >
 bool
-SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
 setRow( const IndexType row,
         const IndexType* columnIndexes,
         const RealType* values,
@@ -412,14 +433,15 @@ setRow( const IndexType row,
 
 
 template< typename Real,
-          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
           typename RealAllocator,
           typename IndexAllocator >
 __cuda_callable__
 bool
-SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
 addRowFast( const IndexType row,
             const IndexType* columns,
             const RealType* values,
@@ -430,13 +452,14 @@ addRowFast( const IndexType row,
 }
 
 template< typename Real,
-          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
           typename RealAllocator,
           typename IndexAllocator >
 bool
-SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
 addRow( const IndexType row,
         const IndexType* columns,
         const RealType* values,
@@ -446,16 +469,16 @@ addRow( const IndexType row,
 
 }
 
-
 template< typename Real,
-          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
           typename RealAllocator,
           typename IndexAllocator >
 __cuda_callable__
 Real
-SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
 getElementFast( const IndexType row,
                 const IndexType column ) const
 {
@@ -463,13 +486,14 @@ getElementFast( const IndexType row,
 }
 
 template< typename Real,
-          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
           typename RealAllocator,
           typename IndexAllocator >
 Real
-SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
 getElement( const IndexType row,
             const IndexType column ) const
 {
@@ -486,14 +510,15 @@ getElement( const IndexType row,
 }
 
 template< typename Real,
-          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
           typename RealAllocator,
           typename IndexAllocator >
 __cuda_callable__
 void
-SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
 getRowFast( const IndexType row,
             IndexType* columns,
             RealType* values ) const
@@ -502,15 +527,16 @@ getRowFast( const IndexType row,
 }
 
 template< typename Real,
-          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
           typename RealAllocator,
           typename IndexAllocator >
    template< typename Vector >
 __cuda_callable__
 typename Vector::RealType
-SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
 rowVectorProduct( const IndexType row,
                   const Vector& vector ) const
 {
@@ -518,15 +544,16 @@ rowVectorProduct( const IndexType row,
 }
 
 template< typename Real,
-          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
           typename RealAllocator,
           typename IndexAllocator >
 template< typename InVector,
        typename OutVector >
 void
-SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
 vectorProduct( const InVector& inVector,
                OutVector& outVector,
                const RealType& matrixMultiplicator,
@@ -553,14 +580,15 @@ vectorProduct( const InVector& inVector,
 }
 
 template< typename Real,
-          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
           typename RealAllocator,
           typename IndexAllocator >
    template< typename Fetch, typename Reduce, typename Keep, typename FetchValue >
 void
-SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
 rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchValue& zero ) const
 {
    const auto columns_view = this->columnIndexes.getConstView();
@@ -576,28 +604,30 @@ rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Ke
 }
 
 template< typename Real,
-          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
           typename RealAllocator,
           typename IndexAllocator >
    template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
 void
-SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
 allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const
 {
    this->rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero );
 }
 
 template< typename Real,
-          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
           typename RealAllocator,
           typename IndexAllocator >
    template< typename Function >
 void
-SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
 forRows( IndexType first, IndexType last, Function& function ) const
 {
    const auto columns_view = this->columnIndexes.getConstView();
@@ -612,14 +642,15 @@ forRows( IndexType first, IndexType last, Function& function ) const
 }
 
 template< typename Real,
-          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
           typename RealAllocator,
           typename IndexAllocator >
    template< typename Function >
 void
-SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
 forAllRows( Function& function ) const
 {
    this->forRows( 0, this->getRows(), function );
@@ -633,7 +664,7 @@ forAllRows( Function& function ) const
           typename IndexAllocator >
 template< typename Real2, template< typename, typename > class Segments2, typename Index2, typename RealAllocator2, typename IndexAllocator2 >
 void
-SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
 addMatrix( const SparseMatrix< Real2, Segments2, Device, Index2, RealAllocator2, IndexAllocator2 >& matrix,
            const RealType& matrixMultiplicator,
            const RealType& thisMatrixMultiplicator )
@@ -649,7 +680,7 @@ template< typename Real,
           typename IndexAllocator >
 template< typename Real2, typename Index2 >
 void
-SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
 getTransposition( const SparseMatrix< Real2, Device, Index2 >& matrix,
                   const RealType& matrixMultiplicator )
 {
@@ -657,14 +688,15 @@ getTransposition( const SparseMatrix< Real2, Device, Index2 >& matrix,
 }*/
 
 template< typename Real,
-          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
           typename RealAllocator,
           typename IndexAllocator >
 template< typename Vector1, typename Vector2 >
 bool
-SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
 performSORIteration( const Vector1& b,
                      const IndexType row,
                      Vector2& x,
@@ -675,13 +707,14 @@ performSORIteration( const Vector1& b,
 
 // copy assignment
 template< typename Real,
-          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
           typename RealAllocator,
           typename IndexAllocator >
-SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >&
-SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >&
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
 operator=( const SparseMatrix& matrix )
 {
    Matrix< Real, Device, Index >::operator=( matrix );
@@ -692,22 +725,24 @@ operator=( const SparseMatrix& matrix )
 
 // cross-device copy assignment
 template< typename Real,
-          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
           typename RealAllocator,
           typename IndexAllocator >
    template< typename Real2,
-             template< typename, typename, typename > class Segments2,
              typename Device2,
              typename Index2,
+             typename MatrixType2,
+             template< typename, typename, typename > class Segments2,
              typename RealAllocator2,
              typename IndexAllocator2 >
-SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >&
-SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
-operator=( const SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2, IndexAllocator2 >& matrix )
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >&
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
+operator=( const SparseMatrix< Real2, Device2, Index2, MatrixType2, Segments2, RealAllocator2, IndexAllocator2 >& matrix )
 {
-   using RHSMatrixType = SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2, IndexAllocator2 >;
+   using RHSMatrixType = SparseMatrix< Real2, Device2, Index2, MatrixType2, Segments2, RealAllocator2, IndexAllocator2 >;
    typename RHSMatrixType::RowsCapacitiesType rowLengths;
    matrix.getCompressedRowLengths( rowLengths );
    this->setDimensions( matrix.getRows(), matrix.getColumns() );
@@ -800,13 +835,14 @@ operator=( const SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2
 }
 
 template< typename Real,
-          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
           typename RealAllocator,
           typename IndexAllocator >
 void
-SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
 save( File& file ) const
 {
    Matrix< RealType, DeviceType, IndexType >::save( file );
@@ -815,13 +851,14 @@ save( File& file ) const
 }
 
 template< typename Real,
-          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
           typename RealAllocator,
           typename IndexAllocator >
 void
-SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
 load( File& file )
 {
    Matrix< RealType, DeviceType, IndexType >::load( file );
@@ -830,39 +867,42 @@ load( File& file )
 }
 
 template< typename Real,
-          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
           typename RealAllocator,
           typename IndexAllocator >
 void
-SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
 save( const String& fileName ) const
 {
    Object::save( fileName );
 }
 
 template< typename Real,
-          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
           typename RealAllocator,
           typename IndexAllocator >
 void
-SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
 load( const String& fileName )
 {
    Object::load( fileName );
 }
 
 template< typename Real,
-          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
           typename RealAllocator,
           typename IndexAllocator >
 void
-SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
 print( std::ostream& str ) const
 {
    for( IndexType row = 0; row < this->getRows(); row++ )
@@ -882,14 +922,15 @@ print( std::ostream& str ) const
 }
 
 template< typename Real,
-          template< typename, typename, typename > class Segments,
           typename Device,
           typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
           typename RealAllocator,
           typename IndexAllocator >
 __cuda_callable__
 Index
-SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
 getPaddingIndex() const
 {
    return -1;
diff --git a/src/UnitTests/Matrices/SparseMatrixCopyTest.h b/src/UnitTests/Matrices/SparseMatrixCopyTest.h
index 34ffd600d..d100bb939 100644
--- a/src/UnitTests/Matrices/SparseMatrixCopyTest.h
+++ b/src/UnitTests/Matrices/SparseMatrixCopyTest.h
@@ -13,6 +13,7 @@
 #include <TNL/Matrices/SlicedEllpack.h>
 
 #include <TNL/Matrices/SparseMatrix.h>
+#include <TNL/Matrices/MatrixType.h>
 #include <TNL/Containers/Segments/CSR.h>
 #include <TNL/Containers/Segments/Ellpack.h>
 #include <TNL/Containers/Segments/SlicedEllpack.h>
@@ -30,12 +31,12 @@ using EllpackSegments = TNL::Containers::Segments::Ellpack< Device, Index, Index
 template< typename Device, typename Index, typename IndexAllocator >
 using SlicedEllpackSegments = TNL::Containers::Segments::SlicedEllpack< Device, Index, IndexAllocator >;
 
-using CSR_host = TNL::Matrices::SparseMatrix< int, TNL::Containers::Segments::CSR, TNL::Devices::Host, int >;
-using CSR_cuda = TNL::Matrices::SparseMatrix< int, TNL::Containers::Segments::CSR, TNL::Devices::Cuda, int >;
-using E_host   = TNL::Matrices::SparseMatrix< int, EllpackSegments, TNL::Devices::Host, int >;
-using E_cuda   = TNL::Matrices::SparseMatrix< int, EllpackSegments, TNL::Devices::Cuda, int >;
-using SE_host  = TNL::Matrices::SparseMatrix< int, SlicedEllpackSegments, TNL::Devices::Host, int >;
-using SE_cuda  = TNL::Matrices::SparseMatrix< int, SlicedEllpackSegments, TNL::Devices::Cuda, int >;
+using CSR_host = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >;
+using CSR_cuda = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >;
+using E_host   = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, EllpackSegments >;
+using E_cuda   = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, EllpackSegments >;
+using SE_host  = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, SlicedEllpackSegments >;
+using SE_cuda  = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, SlicedEllpackSegments >;
 
 
 #ifdef HAVE_GTEST 
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h
index 0718e3a69..353dcdbb0 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h
+++ b/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h
@@ -29,31 +29,31 @@ protected:
 // types for which MatrixTest is instantiated
 using CSRMatrixTypes = ::testing::Types
 <
-    TNL::Matrices::SparseMatrix< int,     TNL::Containers::Segments::CSR, TNL::Devices::Host, short >,
-    TNL::Matrices::SparseMatrix< long,    TNL::Containers::Segments::CSR, TNL::Devices::Host, short >,
-    TNL::Matrices::SparseMatrix< float,   TNL::Containers::Segments::CSR, TNL::Devices::Host, short >,
-    TNL::Matrices::SparseMatrix< double,  TNL::Containers::Segments::CSR, TNL::Devices::Host, short >,
-    TNL::Matrices::SparseMatrix< int,     TNL::Containers::Segments::CSR, TNL::Devices::Host, int   >,
-    TNL::Matrices::SparseMatrix< long,    TNL::Containers::Segments::CSR, TNL::Devices::Host, int   >,
-    TNL::Matrices::SparseMatrix< float,   TNL::Containers::Segments::CSR, TNL::Devices::Host, int   >,
-    TNL::Matrices::SparseMatrix< double,  TNL::Containers::Segments::CSR, TNL::Devices::Host, int   >,
-    TNL::Matrices::SparseMatrix< int,     TNL::Containers::Segments::CSR, TNL::Devices::Host, long  >,
-    TNL::Matrices::SparseMatrix< long,    TNL::Containers::Segments::CSR, TNL::Devices::Host, long  >,
-    TNL::Matrices::SparseMatrix< float,   TNL::Containers::Segments::CSR, TNL::Devices::Host, long  >,
-    TNL::Matrices::SparseMatrix< double,  TNL::Containers::Segments::CSR, TNL::Devices::Host, long  >
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >
 #ifdef HAVE_CUDA
-   ,TNL::Matrices::SparseMatrix< int,     TNL::Containers::Segments::CSR, TNL::Devices::Cuda, short >,
-    TNL::Matrices::SparseMatrix< long,    TNL::Containers::Segments::CSR, TNL::Devices::Cuda, short >,
-    TNL::Matrices::SparseMatrix< float,   TNL::Containers::Segments::CSR, TNL::Devices::Cuda, short >,
-    TNL::Matrices::SparseMatrix< double,  TNL::Containers::Segments::CSR, TNL::Devices::Cuda, short >,
-    TNL::Matrices::SparseMatrix< int,     TNL::Containers::Segments::CSR, TNL::Devices::Cuda, int   >,
-    TNL::Matrices::SparseMatrix< long,    TNL::Containers::Segments::CSR, TNL::Devices::Cuda, int   >,
-    TNL::Matrices::SparseMatrix< float,   TNL::Containers::Segments::CSR, TNL::Devices::Cuda, int   >,
-    TNL::Matrices::SparseMatrix< double,  TNL::Containers::Segments::CSR, TNL::Devices::Cuda, int   >,
-    TNL::Matrices::SparseMatrix< int,     TNL::Containers::Segments::CSR, TNL::Devices::Cuda, long  >,
-    TNL::Matrices::SparseMatrix< long,    TNL::Containers::Segments::CSR, TNL::Devices::Cuda, long  >,
-    TNL::Matrices::SparseMatrix< float,   TNL::Containers::Segments::CSR, TNL::Devices::Cuda, long  >,
-    TNL::Matrices::SparseMatrix< double,  TNL::Containers::Segments::CSR, TNL::Devices::Cuda, long  >
+   ,TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >
 #endif
 >;
 
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h
index 16c22d9ca..b7dc33834 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h
+++ b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h
@@ -40,31 +40,31 @@ using ColumnMajorEllpack = TNL::Containers::Segments::Ellpack< Device, Index, In
 // types for which MatrixTest is instantiated
 using EllpackMatrixTypes = ::testing::Types
 <
-    TNL::Matrices::SparseMatrix< int,     RowMajorEllpack, TNL::Devices::Host, short >,
-    TNL::Matrices::SparseMatrix< long,    RowMajorEllpack, TNL::Devices::Host, short >,
-    TNL::Matrices::SparseMatrix< float,   RowMajorEllpack, TNL::Devices::Host, short >,
-    TNL::Matrices::SparseMatrix< double,  RowMajorEllpack, TNL::Devices::Host, short >,
-    TNL::Matrices::SparseMatrix< int,     RowMajorEllpack, TNL::Devices::Host, int   >,
-    TNL::Matrices::SparseMatrix< long,    RowMajorEllpack, TNL::Devices::Host, int   >,
-    TNL::Matrices::SparseMatrix< float,   RowMajorEllpack, TNL::Devices::Host, int   >,
-    TNL::Matrices::SparseMatrix< double,  RowMajorEllpack, TNL::Devices::Host, int   >,
-    TNL::Matrices::SparseMatrix< int,     RowMajorEllpack, TNL::Devices::Host, long  >,
-    TNL::Matrices::SparseMatrix< long,    RowMajorEllpack, TNL::Devices::Host, long  >,
-    TNL::Matrices::SparseMatrix< float,   RowMajorEllpack, TNL::Devices::Host, long  >,
-    TNL::Matrices::SparseMatrix< double,  RowMajorEllpack, TNL::Devices::Host, long  >
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, RowMajorEllpack >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, RowMajorEllpack >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, RowMajorEllpack >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, RowMajorEllpack >,
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, RowMajorEllpack >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, RowMajorEllpack >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, RowMajorEllpack >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, RowMajorEllpack >,
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, RowMajorEllpack >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, RowMajorEllpack >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, RowMajorEllpack >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, RowMajorEllpack >
 #ifdef HAVE_CUDA
-   ,TNL::Matrices::SparseMatrix< int,     ColumnMajorEllpack, TNL::Devices::Cuda, short >,
-    TNL::Matrices::SparseMatrix< long,    ColumnMajorEllpack, TNL::Devices::Cuda, short >,
-    TNL::Matrices::SparseMatrix< float,   ColumnMajorEllpack, TNL::Devices::Cuda, short >,
-    TNL::Matrices::SparseMatrix< double,  ColumnMajorEllpack, TNL::Devices::Cuda, short >,
-    TNL::Matrices::SparseMatrix< int,     ColumnMajorEllpack, TNL::Devices::Cuda, int   >,
-    TNL::Matrices::SparseMatrix< long,    ColumnMajorEllpack, TNL::Devices::Cuda, int   >,
-    TNL::Matrices::SparseMatrix< float,   ColumnMajorEllpack, TNL::Devices::Cuda, int   >,
-    TNL::Matrices::SparseMatrix< double,  ColumnMajorEllpack, TNL::Devices::Cuda, int   >,
-    TNL::Matrices::SparseMatrix< int,     ColumnMajorEllpack, TNL::Devices::Cuda, long  >,
-    TNL::Matrices::SparseMatrix< long,    ColumnMajorEllpack, TNL::Devices::Cuda, long  >,
-    TNL::Matrices::SparseMatrix< float,   ColumnMajorEllpack, TNL::Devices::Cuda, long  >,
-    TNL::Matrices::SparseMatrix< double,  ColumnMajorEllpack, TNL::Devices::Cuda, long  >
+   ,TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >,
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >,
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >
 #endif
 >;
 
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h
index 8597121e4..b2404fe68 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h
+++ b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h
@@ -10,6 +10,7 @@
 
 #include <TNL/Containers/Segments/SlicedEllpack.h>
 #include <TNL/Matrices/SparseMatrix.h>
+#include <TNL/Matrices/MatrixType.h>
 
 
 #include "SparseMatrixTest.hpp"
@@ -40,31 +41,31 @@ using ColumnMajorSlicedEllpack = TNL::Containers::Segments::SlicedEllpack< Devic
 // types for which MatrixTest is instantiated
 using SlicedEllpackMatrixTypes = ::testing::Types
 <
-    TNL::Matrices::SparseMatrix< int,     RowMajorSlicedEllpack, TNL::Devices::Host, short >,
-    TNL::Matrices::SparseMatrix< long,    RowMajorSlicedEllpack, TNL::Devices::Host, short >,
-    TNL::Matrices::SparseMatrix< float,   RowMajorSlicedEllpack, TNL::Devices::Host, short >,
-    TNL::Matrices::SparseMatrix< double,  RowMajorSlicedEllpack, TNL::Devices::Host, short >,
-    TNL::Matrices::SparseMatrix< int,     RowMajorSlicedEllpack, TNL::Devices::Host, int   >,
-    TNL::Matrices::SparseMatrix< long,    RowMajorSlicedEllpack, TNL::Devices::Host, int   >,
-    TNL::Matrices::SparseMatrix< float,   RowMajorSlicedEllpack, TNL::Devices::Host, int   >,
-    TNL::Matrices::SparseMatrix< double,  RowMajorSlicedEllpack, TNL::Devices::Host, int   >,
-    TNL::Matrices::SparseMatrix< int,     RowMajorSlicedEllpack, TNL::Devices::Host, long  >,
-    TNL::Matrices::SparseMatrix< long,    RowMajorSlicedEllpack, TNL::Devices::Host, long  >,
-    TNL::Matrices::SparseMatrix< float,   RowMajorSlicedEllpack, TNL::Devices::Host, long  >,
-    TNL::Matrices::SparseMatrix< double,  RowMajorSlicedEllpack, TNL::Devices::Host, long  >
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >
 #ifdef HAVE_CUDA
-   ,TNL::Matrices::SparseMatrix< int,     ColumnMajorSlicedEllpack, TNL::Devices::Cuda, short >,
-    TNL::Matrices::SparseMatrix< long,    ColumnMajorSlicedEllpack, TNL::Devices::Cuda, short >,
-    TNL::Matrices::SparseMatrix< float,   ColumnMajorSlicedEllpack, TNL::Devices::Cuda, short >,
-    TNL::Matrices::SparseMatrix< double,  ColumnMajorSlicedEllpack, TNL::Devices::Cuda, short >,
-    TNL::Matrices::SparseMatrix< int,     ColumnMajorSlicedEllpack, TNL::Devices::Cuda, int   >,
-    TNL::Matrices::SparseMatrix< long,    ColumnMajorSlicedEllpack, TNL::Devices::Cuda, int   >,
-    TNL::Matrices::SparseMatrix< float,   ColumnMajorSlicedEllpack, TNL::Devices::Cuda, int   >,
-    TNL::Matrices::SparseMatrix< double,  ColumnMajorSlicedEllpack, TNL::Devices::Cuda, int   >,
-    TNL::Matrices::SparseMatrix< int,     ColumnMajorSlicedEllpack, TNL::Devices::Cuda, long  >,
-    TNL::Matrices::SparseMatrix< long,    ColumnMajorSlicedEllpack, TNL::Devices::Cuda, long  >,
-    TNL::Matrices::SparseMatrix< float,   ColumnMajorSlicedEllpack, TNL::Devices::Cuda, long  >,
-    TNL::Matrices::SparseMatrix< double,  ColumnMajorSlicedEllpack, TNL::Devices::Cuda, long  >
+   ,TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >
 #endif
 >;
 
-- 
GitLab


From c1e68285eeea3f4908d4724a553f721a83e9254d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Sat, 28 Dec 2019 11:59:31 +0100
Subject: [PATCH 039/179] Added MatrixType.h.

---
 src/TNL/Matrices/MatrixType.h | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)
 create mode 100644 src/TNL/Matrices/MatrixType.h

diff --git a/src/TNL/Matrices/MatrixType.h b/src/TNL/Matrices/MatrixType.h
new file mode 100644
index 000000000..c5c8f6375
--- /dev/null
+++ b/src/TNL/Matrices/MatrixType.h
@@ -0,0 +1,27 @@
+/***************************************************************************
+                          MatrixType.h -  description
+                             -------------------
+    begin                : Dec 28, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+namespace TNL {
+   namespace Matrices {
+
+struct GeneralMatrix
+{
+   static constexpr bool isSymmetric() { return false; }
+};
+
+struct SymmetricMatrix
+{
+   static constexpr bool isSymmetric() { return true; }
+};
+
+   } //namespace Matrices
+} //namespace TNL
\ No newline at end of file
-- 
GitLab


From 03949d09e39871858d3b7b1c64da75394596c1aa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Sat, 28 Dec 2019 13:33:43 +0100
Subject: [PATCH 040/179] Added MatrixView.

---
 src/TNL/Matrices/Matrix.h                     |  21 +-
 .../Matrices/{Matrix_impl.h => Matrix.hpp}    |  33 +-
 src/TNL/Matrices/MatrixView.h                 | 150 +++++++++
 src/TNL/Matrices/MatrixView.hpp               | 286 ++++++++++++++++++
 4 files changed, 477 insertions(+), 13 deletions(-)
 rename src/TNL/Matrices/{Matrix_impl.h => Matrix.hpp} (92%)
 create mode 100644 src/TNL/Matrices/MatrixView.h
 create mode 100644 src/TNL/Matrices/MatrixView.hpp

diff --git a/src/TNL/Matrices/Matrix.h b/src/TNL/Matrices/Matrix.h
index 4a038eb2e..96409c89b 100644
--- a/src/TNL/Matrices/Matrix.h
+++ b/src/TNL/Matrices/Matrix.h
@@ -15,6 +15,7 @@
 #include <TNL/Devices/Host.h>
 #include <TNL/Containers/Vector.h>
 #include <TNL/Containers/VectorView.h>
+#include <TNL/Matrices/MatrixView.h>
 
 namespace TNL {
 /**
@@ -30,13 +31,15 @@ class Matrix : public Object
 {
 public:
    using RealType = Real;
-   typedef Device DeviceType;
-   typedef Index IndexType;
-   typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector;
-   typedef Containers::VectorView< IndexType, DeviceType, IndexType > CompressedRowLengthsVectorView;
-   typedef typename CompressedRowLengthsVectorView::ConstViewType ConstCompressedRowLengthsVectorView;
-   typedef Containers::Vector< RealType, DeviceType, IndexType, RealAllocator > ValuesVector;
+   using DeviceType = Device;
+   using IndexType = Index;
+   using CompressedRowLengthsVector = Containers::Vector< IndexType, DeviceType, IndexType >;
+   using CompressedRowLengthsVectorView = Containers::VectorView< IndexType, DeviceType, IndexType >;
+   using ConstCompressedRowLengthsVectorView = typename CompressedRowLengthsVectorView::ConstViewType;
+   using ValuesVector = Containers::Vector< RealType, DeviceType, IndexType, RealAllocator >;
    using RealAllocatorType = RealAllocator;
+   using ViewType = MatrixView< Real, Device, Index >;
+   using ConstViewType = MatrixView< std::add_const_t< Real >, Device, Index >;
 
    Matrix( const RealAllocatorType& allocator = RealAllocatorType() );
 
@@ -44,6 +47,10 @@ public:
            const IndexType columns,
            const RealAllocatorType& allocator = RealAllocatorType() );
 
+   ViewType getView();
+
+   ConstViewType getConstView() const;
+
    virtual void setDimensions( const IndexType rows,
                                const IndexType columns );
 
@@ -162,4 +169,4 @@ void MatrixVectorProductCuda( const Matrix& matrix,
 } // namespace Matrices
 } // namespace TNL
 
-#include <TNL/Matrices/Matrix_impl.h>
+#include <TNL/Matrices/Matrix.hpp>
diff --git a/src/TNL/Matrices/Matrix_impl.h b/src/TNL/Matrices/Matrix.hpp
similarity index 92%
rename from src/TNL/Matrices/Matrix_impl.h
rename to src/TNL/Matrices/Matrix.hpp
index a93c7a893..91b81ffcf 100644
--- a/src/TNL/Matrices/Matrix_impl.h
+++ b/src/TNL/Matrices/Matrix.hpp
@@ -43,6 +43,28 @@ Matrix( const IndexType rows_, const IndexType columns_, const RealAllocatorType
 {
 }
 
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename RealAllocator >
+auto
+Matrix< Real, Device, Index, RealAllocator >::
+getView() -> ViewType
+{
+   return ViewType( rows, columns, values.getView() );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename RealAllocator >
+auto
+Matrix< Real, Device, Index, RealAllocator >::
+getConstView() const -> ConstViewType
+{
+   return ConstViewType( rows, columns, values.getConstView() );
+}
+
 template< typename Real,
           typename Device,
           typename Index,
@@ -105,12 +127,11 @@ template< typename Real,
           typename RealAllocator >
 Index Matrix< Real, Device, Index, RealAllocator >::getNumberOfNonzeroMatrixElements() const
 {
-    IndexType nonZeroElements( 0 );
-    for( IndexType i = 0; this->values.getSize(); i++ )
-        if( this->values.getElement( i ) != 0.0 )
-            nonZeroElements++;
-      
-    return nonZeroElements;
+   const auto values_view = this->values.getConstView();
+   auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType {
+      return ( values_view[ i ] != 0.0 );
+   };
+   return Algorithms::Reduction< DeviceType >::reduce( this->values.getSize(), std::plus<>{}, fetch, 0 );
 }
 
 template< typename Real,
diff --git a/src/TNL/Matrices/MatrixView.h b/src/TNL/Matrices/MatrixView.h
new file mode 100644
index 000000000..a2fa975cf
--- /dev/null
+++ b/src/TNL/Matrices/MatrixView.h
@@ -0,0 +1,150 @@
+/***************************************************************************
+                          MatrixView.h  -  description
+                             -------------------
+    begin                : Dec 28, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Object.h>
+#include <TNL/Allocators/Default.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Containers/Vector.h>
+#include <TNL/Containers/VectorView.h>
+
+namespace TNL {
+/**
+ * \brief Namespace for matrix formats.
+ */
+namespace Matrices {
+
+template< typename Real = double,
+          typename Device = Devices::Host,
+          typename Index = int >
+class MatrixView : public Object
+{
+public:
+   using RealType = Real;
+   typedef Device DeviceType;
+   typedef Index IndexType;
+   typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector;
+   typedef Containers::VectorView< IndexType, DeviceType, IndexType > CompressedRowLengthsVectorView;
+   typedef typename CompressedRowLengthsVectorView::ConstViewType ConstCompressedRowLengthsVectorView;
+   typedef Containers::VectorView< RealType, DeviceType, IndexType > ValuesView;
+
+   __cuda_callable__
+   MatrixView();
+
+   __cuda_callable__
+   MatrixView( const IndexType rows,
+               const IndexType columns,
+               const ValuesView& values );
+
+   __cuda_callable__
+   MatrixView( const MatrixView& view ) = default;
+
+   virtual IndexType getRowLength( const IndexType row ) const = 0;
+
+   // TODO: implementation is not parallel
+   // TODO: it would be nice if padding zeros could be stripped
+   void getCompressedRowLengths( CompressedRowLengthsVector& rowLengths ) const;
+
+   virtual void getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const;
+
+   IndexType getNumberOfMatrixElements() const;
+
+   virtual IndexType getNumberOfNonzeroMatrixElements() const = 0;
+
+   void reset();
+
+   __cuda_callable__
+   IndexType getRows() const;
+
+   __cuda_callable__
+   IndexType getColumns() const;
+
+   /****
+    * TODO: The fast variants of the following methods cannot be virtual.
+    * If they were, they could not be used in the CUDA kernels. If CUDA allows it
+    * in the future and it does not slow down, declare them as virtual here.
+    */
+
+   virtual bool setElement( const IndexType row,
+                            const IndexType column,
+                            const RealType& value ) = 0;
+
+   virtual bool addElement( const IndexType row,
+                            const IndexType column,
+                            const RealType& value,
+                            const RealType& thisElementMultiplicator = 1.0 ) = 0;
+
+   virtual bool setRow( const IndexType row,
+                        const IndexType* columns,
+                        const RealType* values,
+                        const IndexType numberOfElements ) = 0;
+
+   virtual bool addRow( const IndexType row,
+                        const IndexType* columns,
+                        const RealType* values,
+                        const IndexType numberOfElements,
+                        const RealType& thisElementMultiplicator = 1.0 ) = 0;
+
+   virtual Real getElement( const IndexType row,
+                            const IndexType column ) const = 0;
+
+   const ValuesView& getValues() const;
+
+   ValuesView& getValues();
+
+   // TODO: parallelize and optimize for sparse matrices
+   template< typename Matrix >
+   bool operator == ( const Matrix& matrix ) const;
+
+   template< typename Matrix >
+   bool operator != ( const Matrix& matrix ) const;
+
+   virtual void save( File& file ) const;
+
+   virtual void load( File& file );
+
+   virtual void print( std::ostream& str ) const;
+
+
+   // TODO: method for symmetric matrices, should not be in general Matrix interface
+   __cuda_callable__
+   const IndexType& getNumberOfColors() const;
+
+   // TODO: method for symmetric matrices, should not be in general Matrix interface
+   void computeColorsVector(Containers::Vector<Index, Device, Index> &colorsVector);
+
+   protected:
+
+   IndexType rows, columns;
+
+   ValuesView values;
+};
+
+template< typename Real, typename Device, typename Index >
+std::ostream& operator << ( std::ostream& str, const MatrixView< Real, Device, Index >& m )
+{
+   m.print( str );
+   return str;
+}
+
+/*
+template< typename Matrix,
+          typename InVector,
+          typename OutVector >
+void MatrixVectorProductCuda( const Matrix& matrix,
+                              const InVector& inVector,
+                              OutVector& outVector );
+*/
+
+} // namespace Matrices
+} // namespace TNL
+
+#include <TNL/Matrices/MatrixView.hpp>
diff --git a/src/TNL/Matrices/MatrixView.hpp b/src/TNL/Matrices/MatrixView.hpp
new file mode 100644
index 000000000..bd3d9beae
--- /dev/null
+++ b/src/TNL/Matrices/MatrixView.hpp
@@ -0,0 +1,286 @@
+/***************************************************************************
+                          MatrixView.hpp  -  description
+                             -------------------
+    begin                : Dec 28, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Matrices/Matrix.h>
+#include <TNL/Assert.h>
+#include <TNL/Cuda/LaunchHelpers.h>
+#include <TNL/Cuda/MemoryHelpers.h>
+#include <TNL/Cuda/SharedMemory.h>
+
+namespace TNL {
+namespace Matrices {
+
+template< typename Real,
+          typename Device,
+          typename Index >
+__cuda_callable__
+MatrixView< Real, Device, Index >::
+MatrixView()
+: rows( 0 ),
+  columns( 0 )
+{
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+__cuda_callable__
+MatrixView< Real, Device, Index >::
+MatrixView( const IndexType rows_, 
+            const IndexType columns_,
+            const ValuesView& values_ )
+ : rows( rows_ ), columns( columns_ ), values( values_ )
+{
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+void MatrixView< Real, Device, Index >::getCompressedRowLengths( CompressedRowLengthsVector& rowLengths ) const
+{
+   rowLengths.setSize( this->getRows() );
+   getCompressedRowLengths( rowLengths.getView() );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+void MatrixView< Real, Device, Index >::getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const
+{
+   TNL_ASSERT_EQ( rowLengths.getSize(), this->getRows(), "invalid size of the rowLengths vector" );
+   for( IndexType row = 0; row < this->getRows(); row++ )
+      rowLengths.setElement( row, this->getRowLength( row ) );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+Index MatrixView< Real, Device, Index >::getNumberOfMatrixElements() const
+{
+   return this->values.getSize();
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+Index MatrixView< Real, Device, Index >::getNumberOfNonzeroMatrixElements() const
+{
+   const auto values_view = this->values.getConstView();
+   auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType {
+      return ( values_view[ i ] != 0.0 );
+   };
+   return Algorithms::Reduction< DeviceType >::reduce( this->values.getSize(), std::plus<>{}, fetch, 0 );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+__cuda_callable__
+Index MatrixView< Real, Device, Index >::getRows() const
+{
+   return this->rows;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+__cuda_callable__
+Index MatrixView< Real, Device, Index >::getColumns() const
+{
+   return this->columns;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+const typename MatrixView< Real, Device, Index >::ValuesView&
+MatrixView< Real, Device, Index >::
+getValues() const
+{
+   return this->values;
+}
+   
+template< typename Real,
+          typename Device,
+          typename Index >
+typename MatrixView< Real, Device, Index >::ValuesView& 
+MatrixView< Real, Device, Index >::
+getValues()
+{
+   return this->values;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+void MatrixView< Real, Device, Index >::reset()
+{
+   this->rows = 0;
+   this->columns = 0;
+   this->values.reset();
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+   template< typename MatrixT >
+bool MatrixView< Real, Device, Index >::operator == ( const MatrixT& matrix ) const
+{
+   if( this->getRows() != matrix.getRows() ||
+       this->getColumns() != matrix.getColumns() )
+      return false;
+   for( IndexType row = 0; row < this->getRows(); row++ )
+      for( IndexType column = 0; column < this->getColumns(); column++ )
+         if( this->getElement( row, column ) != matrix.getElement( row, column ) )
+            return false;
+   return true;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+   template< typename MatrixT >
+bool MatrixView< Real, Device, Index >::operator != ( const MatrixT& matrix ) const
+{
+   return ! operator == ( matrix );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+void MatrixView< Real, Device, Index >::save( File& file ) const
+{
+   Object::save( file );
+   file.save( &this->rows );
+   file.save( &this->columns );
+   file << this->values;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+void MatrixView< Real, Device, Index >::load( File& file )
+{
+   Object::load( file );
+   file.load( &this->rows );
+   file.load( &this->columns );
+   file >> this->values;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+void MatrixView< Real, Device, Index >::print( std::ostream& str ) const
+{
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+__cuda_callable__
+const Index&
+MatrixView< Real, Device, Index >::
+getNumberOfColors() const
+{
+   return this->numberOfColors;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+void 
+MatrixView< Real, Device, Index >::
+computeColorsVector(Containers::Vector<Index, Device, Index> &colorsVector)
+{
+    for( IndexType i = this->getRows() - 1; i >= 0; i-- )
+    {
+        // init color array
+        Containers::Vector< Index, Device, Index > usedColors;
+        usedColors.setSize( this->numberOfColors );
+        for( IndexType j = 0; j < this->numberOfColors; j++ )
+            usedColors.setElement( j, 0 );
+
+        // find all colors used in given row
+        for( IndexType j = i + 1; j < this->getColumns(); j++ )
+             if( this->getElement( i, j ) != 0.0 )
+                 usedColors.setElement( colorsVector.getElement( j ), 1 );
+
+        // find unused color
+        bool found = false;
+        for( IndexType j = 0; j < this->numberOfColors; j++ )
+            if( usedColors.getElement( j ) == 0 )
+            {
+                colorsVector.setElement( i, j );
+                found = true;
+                break;
+            }
+        if( !found )
+        {
+            colorsVector.setElement( i, this->numberOfColors );
+            this->numberOfColors++;
+        }
+    }
+}
+
+/*
+#ifdef HAVE_CUDA
+template< typename Matrix,
+          typename InVector,
+          typename OutVector >
+__global__ void MatrixVectorProductCudaKernel( const Matrix* matrix,
+                                               const InVector* inVector,
+                                               OutVector* outVector,
+                                               int gridIdx )
+{
+   static_assert( std::is_same< typename Matrix::DeviceType, Devices::Cuda >::value, "" );
+   const typename Matrix::IndexType rowIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
+   if( rowIdx < matrix->getRows() )
+      ( *outVector )[ rowIdx ] = matrix->rowVectorProduct( rowIdx, *inVector );
+}
+#endif
+
+template< typename Matrix,
+          typename InVector,
+          typename OutVector >
+void MatrixVectorProductCuda( const Matrix& matrix,
+                                 const InVector& inVector,
+                                 OutVector& outVector )
+{
+#ifdef HAVE_CUDA
+   typedef typename Matrix::IndexType IndexType;
+   Matrix* kernel_this = Cuda::passToDevice( matrix );
+   InVector* kernel_inVector = Cuda::passToDevice( inVector );
+   OutVector* kernel_outVector = Cuda::passToDevice( outVector );
+   dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() );
+   const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x );
+   const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() );
+   for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
+   {
+      if( gridIdx == cudaGrids - 1 )
+         cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize();
+      MatrixVectorProductCudaKernel<<< cudaGridSize, cudaBlockSize >>>
+                                     ( kernel_this,
+                                       kernel_inVector,
+                                       kernel_outVector,
+                                       gridIdx );
+      TNL_CHECK_CUDA_DEVICE;
+   }
+   Cuda::freeFromDevice( kernel_this );
+   Cuda::freeFromDevice( kernel_inVector );
+   Cuda::freeFromDevice( kernel_outVector );
+   TNL_CHECK_CUDA_DEVICE;
+#endif
+}
+*/
+
+} // namespace Matrices
+} // namespace TNL
-- 
GitLab


From 256e89f0dd80a852d21432985511887440148b85 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Sat, 28 Dec 2019 15:17:35 +0100
Subject: [PATCH 041/179] Added SparseMatrixView.

---
 src/TNL/Matrices/SparseMatrix.h       |   1 +
 src/TNL/Matrices/SparseMatrix.hpp     |   2 +-
 src/TNL/Matrices/SparseMatrixView.h   | 196 ++++++++
 src/TNL/Matrices/SparseMatrixView.hpp | 647 ++++++++++++++++++++++++++
 4 files changed, 845 insertions(+), 1 deletion(-)
 create mode 100644 src/TNL/Matrices/SparseMatrixView.h
 create mode 100644 src/TNL/Matrices/SparseMatrixView.hpp

diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h
index 0d8527daf..5f02e9fde 100644
--- a/src/TNL/Matrices/SparseMatrix.h
+++ b/src/TNL/Matrices/SparseMatrix.h
@@ -14,6 +14,7 @@
 #include <TNL/Matrices/MatrixType.h>
 #include <TNL/Allocators/Default.h>
 #include <TNL/Containers/Segments/CSR.h>
+#include <TNL/Matrices/SparseMatrixView.h>
 
 namespace TNL {
 namespace Matrices {
diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp
index b8091d307..08eae92b4 100644
--- a/src/TNL/Matrices/SparseMatrix.hpp
+++ b/src/TNL/Matrices/SparseMatrix.hpp
@@ -1,5 +1,5 @@
 /***************************************************************************
-                          SparseMatrix.h -  description
+                          SparseMatrix.hpp -  description
                              -------------------
     begin                : Nov 29, 2019
     copyright            : (C) 2019 by Tomas Oberhuber
diff --git a/src/TNL/Matrices/SparseMatrixView.h b/src/TNL/Matrices/SparseMatrixView.h
new file mode 100644
index 000000000..b40d9c0c2
--- /dev/null
+++ b/src/TNL/Matrices/SparseMatrixView.h
@@ -0,0 +1,196 @@
+/***************************************************************************
+                          SparseMatrixView.h -  description
+                             -------------------
+    begin                : Dec 28, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Matrices/Matrix.h>
+#include <TNL/Matrices/MatrixType.h>
+#include <TNL/Allocators/Default.h>
+#include <TNL/Containers/Segments/CSR.h>
+
+namespace TNL {
+namespace Matrices {
+
+template< typename Real,
+          typename Device = Devices::Host,
+          typename Index = int,
+          typename MatrixType = GeneralMatrix,
+          template< typename Device_, typename Index_ > class SegmentsView = Containers::Segments::CSRView >
+class SparseMatrixView : public MatrixView< Real, Device, Index >
+{
+   public:
+
+      using RealType = Real;
+      template< typename Device_, typename Index_ >
+      using SegmentsViewTemplate = SegmentsView< Device_, Index_ >;
+      using SegmentsViewType = SegmentsView< Device, Index >;
+      using DeviceType = Device;
+      using IndexType = Index;
+      using RowsCapacitiesView = Containers::VectorView< IndexType, DeviceType, IndexType >;
+      using ConstRowsCapacitiesView = typename RowsCapacitiesView::ConstViewType;
+      using ValuesViewType = typename MatrixView< Real, Device, Index >::ValuesView;
+      using ColumnsViewType = Containers::VectorView< IndexType, DeviceType, IndexType >;
+
+      // TODO: remove this - it is here only for compatibility with original matrix implementation
+      typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector;
+      typedef Containers::VectorView< IndexType, DeviceType, IndexType > CompressedRowLengthsVectorView;
+      typedef typename CompressedRowLengthsVectorView::ConstViewType ConstCompressedRowLengthsVectorView;
+
+      static constexpr bool isSymmetric() { return MatrixType::isSymmetric(); };
+
+      __cuda_callable__
+      SparseMatrixView();
+
+      __cuda_callable__
+      SparseMatrixView( const IndexType rows,
+                        const IndexType columns,
+                        ValuesViewType& values,
+                        ColumnsViewType& columnIndexes,
+                        SegmentsViewType& segments );
+
+      __cuda_callable__
+      SparseMatrixView( const SparseMatrixView& m ) = default;
+
+      //__cuda_callable__
+      //SparseMatrixView( const SparseMatrixView&& m ) = default;
+
+      static String getSerializationType();
+
+      virtual String getSerializationTypeVirtual() const;
+
+      template< typename Vector >
+      void getCompressedRowLengths( Vector& rowLengths ) const;
+
+      IndexType getRowLength( const IndexType row ) const;
+
+      __cuda_callable__
+      IndexType getRowLengthFast( const IndexType row ) const;
+
+      IndexType getNonZeroRowLength( const IndexType row ) const;
+
+      __cuda_callable__
+      IndexType getNonZeroRowLengthFast( const IndexType row ) const;
+
+      IndexType getNumberOfNonzeroMatrixElements() const;
+
+      void reset();
+
+      __cuda_callable__
+      bool setElementFast( const IndexType row,
+                           const IndexType column,
+                           const RealType& value );
+
+      bool setElement( const IndexType row,
+                       const IndexType column,
+                       const RealType& value );
+
+      __cuda_callable__
+      bool addElementFast( const IndexType row,
+                           const IndexType column,
+                           const RealType& value,
+                           const RealType& thisElementMultiplicator = 1.0 );
+
+      bool addElement( const IndexType row,
+                       const IndexType column,
+                       const RealType& value,
+                       const RealType& thisElementMultiplicator = 1.0 );
+
+
+      __cuda_callable__
+      bool setRowFast( const IndexType row,
+                       const IndexType* columnIndexes,
+                       const RealType* values,
+                       const IndexType elements );
+
+      bool setRow( const IndexType row,
+                   const IndexType* columnIndexes,
+                   const RealType* values,
+                   const IndexType elements );
+
+
+      __cuda_callable__
+      bool addRowFast( const IndexType row,
+                       const IndexType* columns,
+                       const RealType* values,
+                       const IndexType numberOfElements,
+                       const RealType& thisElementMultiplicator = 1.0 );
+
+      bool addRow( const IndexType row,
+                   const IndexType* columns,
+                   const RealType* values,
+                   const IndexType numberOfElements,
+                   const RealType& thisElementMultiplicator = 1.0 );
+
+
+      __cuda_callable__
+      RealType getElementFast( const IndexType row,
+                               const IndexType column ) const;
+
+      RealType getElement( const IndexType row,
+                           const IndexType column ) const;
+
+      __cuda_callable__
+      void getRowFast( const IndexType row,
+                       IndexType* columns,
+                       RealType* values ) const;
+
+      template< typename Vector >
+      __cuda_callable__
+      typename Vector::RealType rowVectorProduct( const IndexType row,
+                                                  const Vector& vector ) const;
+
+      /***
+       * \brief This method computes outVector = matrixMultiplicator * ( *this ) * inVector + inVectorAddition * inVector
+       */
+      template< typename InVector,
+                typename OutVector >
+      void vectorProduct( const InVector& inVector,
+                          OutVector& outVector,
+                          const RealType& matrixMultiplicator = 1.0,
+                          const RealType& inVectorAddition = 0.0 ) const;
+
+      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+      void rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
+
+      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+      void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
+
+      template< typename Function >
+      void forRows( IndexType first, IndexType last, Function& function ) const;
+
+      template< typename Function >
+      void forAllRows( Function& function ) const;
+
+      template< typename Vector1, typename Vector2 >
+      bool performSORIteration( const Vector1& b,
+                                const IndexType row,
+                                Vector2& x,
+                                const RealType& omega = 1.0 ) const;
+
+      void save( File& file ) const;
+
+      void save( const String& fileName ) const;
+
+      void print( std::ostream& str ) const;
+
+      __cuda_callable__
+      IndexType getPaddingIndex() const;
+
+   protected:
+
+      ColumnsViewType columnIndexes;
+
+      SegmentsViewType segments;
+};
+
+}  // namespace Conatiners
+} // namespace TNL
+
+#include <TNL/Matrices/SparseMatrixView.hpp>
diff --git a/src/TNL/Matrices/SparseMatrixView.hpp b/src/TNL/Matrices/SparseMatrixView.hpp
new file mode 100644
index 000000000..0c49cd58d
--- /dev/null
+++ b/src/TNL/Matrices/SparseMatrixView.hpp
@@ -0,0 +1,647 @@
+/***************************************************************************
+                          SparseMatrixView.hpp -  description
+                             -------------------
+    begin                : Dec 28, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <functional>
+#include <TNL/Matrices/SparseMatrixView.h>
+#include <TNL/Algorithms/Reduction.h>
+
+namespace TNL {
+namespace Matrices {
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+__cuda_callable__
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+SparseMatrixView()
+{
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+__cuda_callable__
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+SparseMatrixView( const IndexType rows,
+                  const IndexType columns,
+                  ValuesViewType& values,
+                  ColumnsViewType& columnIndexes,
+                  SegmentsViewType& segments )
+ : MatrixView< Real, Device, Index >( rows, columns, values ), columnIndexes( columnIndexes ), segments( segments )
+{  
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+String
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+getSerializationType()
+{
+   return String( "Matrices::SparseMatrix< " ) +
+             TNL::getSerializationType< RealType >() + ", " +
+             TNL::getSerializationType< SegmentsView >() + ", [any_device], " +
+             TNL::getSerializationType< IndexType >() + ", [any_allocator] >";
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+String
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+getSerializationTypeVirtual() const
+{
+   return this->getSerializationType();
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+   template< typename Vector >
+void
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+getCompressedRowLengths( Vector& rowLengths ) const
+{
+   rowLengths.setSize( this->getRows() );
+   rowLengths = 0;
+   auto rowLengths_view = rowLengths.getView();
+   auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, const RealType& value ) -> IndexType {
+      return ( value != 0.0 );
+   };
+   auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) {
+      aux += a;
+   };
+   auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable {
+      rowLengths_view[ rowIdx ] = value;
+   };
+   this->allRowsReduction( fetch, reduce, keep, 0 );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+Index
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+getRowLength( const IndexType row ) const
+{
+
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+__cuda_callable__
+Index
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+getRowLengthFast( const IndexType row ) const
+{
+
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+Index
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+getNonZeroRowLength( const IndexType row ) const
+{
+
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+__cuda_callable__
+Index
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+getNonZeroRowLengthFast( const IndexType row ) const
+{
+
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+Index
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+getNumberOfNonzeroMatrixElements() const
+{
+   const auto columns_view = this->columnIndexes.getConstView();
+   const IndexType paddingIndex = this->getPaddingIndex();
+   auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType {
+      return ( columns_view[ i ] != paddingIndex );
+   };
+   return Algorithms::Reduction< DeviceType >::reduce( this->columnIndexes.getSize(), std::plus<>{}, fetch, 0 );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+void
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+reset()
+{
+   Matrix< Real, Device, Index >::reset();
+   this->columnIndexes.reset();
+
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+__cuda_callable__
+bool
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+setElementFast( const IndexType row,
+                const IndexType column,
+                const RealType& value )
+{
+   return this->addElementFast( row, column, value, 0.0 );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+bool
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+setElement( const IndexType row,
+            const IndexType column,
+            const RealType& value )
+{
+   return this->addElement( row, column, value, 0.0 );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+__cuda_callable__
+bool
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+addElementFast( const IndexType row,
+                const IndexType column,
+                const RealType& value,
+                const RealType& thisElementMultiplicator )
+{
+
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+bool
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+addElement( const IndexType row,
+            const IndexType column,
+            const RealType& value,
+            const RealType& thisElementMultiplicator )
+{
+   TNL_ASSERT( row >= 0 && row < this->rows &&
+               column >= 0 && column < this->columns,
+               std::cerr << " row = " << row
+                    << " column = " << column
+                    << " this->rows = " << this->rows
+                    << " this->columns = " << this->columns );
+
+   const IndexType rowSize = this->segments.getSegmentSize( row );
+   IndexType col( this->getPaddingIndex() );
+   IndexType i;
+   IndexType globalIdx;
+   for( i = 0; i < rowSize; i++ )
+   {
+      globalIdx = this->segments.getGlobalIndex( row, i );
+      TNL_ASSERT_LT( globalIdx, this->columnIndexes.getSize(), "" );
+      col = this->columnIndexes.getElement( globalIdx );
+      if( col == column )
+      {
+         this->values.setElement( globalIdx, thisElementMultiplicator * this->values.getElement( globalIdx ) + value );
+         return true;
+      }
+      if( col == this->getPaddingIndex() || col > column )
+         break;
+   }
+   if( i == rowSize )
+      return false;
+   if( col == this->getPaddingIndex() )
+   {
+      this->columnIndexes.setElement( globalIdx, column );
+      this->values.setElement( globalIdx, value );
+      return true;
+   }
+   else
+   {
+      IndexType j = rowSize - 1;
+      while( j > i )
+      {
+         const IndexType globalIdx1 = this->segments.getGlobalIndex( row, j );
+         const IndexType globalIdx2 = this->segments.getGlobalIndex( row, j - 1 );
+         TNL_ASSERT_LT( globalIdx1, this->columnIndexes.getSize(), "" );
+         TNL_ASSERT_LT( globalIdx2, this->columnIndexes.getSize(), "" );
+         this->columnIndexes.setElement( globalIdx1, this->columnIndexes.getElement( globalIdx2 ) );
+         this->values.setElement( globalIdx1, this->values.getElement( globalIdx2 ) );
+         j--;
+      }
+
+      this->columnIndexes.setElement( globalIdx, column );
+      this->values.setElement( globalIdx, value );
+      return true;
+   }
+}
+
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+__cuda_callable__
+bool
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+setRowFast( const IndexType row,
+            const IndexType* columnIndexes,
+            const RealType* values,
+            const IndexType elements )
+{
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+bool
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+setRow( const IndexType row,
+        const IndexType* columnIndexes,
+        const RealType* values,
+        const IndexType elements )
+{
+   const IndexType rowLength = this->segments.getSegmentSize( row );
+   if( elements > rowLength )
+      return false;
+
+   for( IndexType i = 0; i < elements; i++ )
+   {
+      const IndexType globalIdx = this->segments.getGlobalIndex( row, i );
+      this->columnIndexes.setElement( globalIdx, columnIndexes[ i ] );
+      this->values.setElement( globalIdx, values[ i ] );
+   }
+   for( IndexType i = elements; i < rowLength; i++ )
+      this->columnIndexes.setElement( this->segments.getGlobalIndex( row, i ), this->getPaddingIndex() );
+   return true;
+}
+
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+__cuda_callable__
+bool
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+addRowFast( const IndexType row,
+            const IndexType* columns,
+            const RealType* values,
+            const IndexType numberOfElements,
+            const RealType& thisElementMultiplicator )
+{
+
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+bool
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+addRow( const IndexType row,
+        const IndexType* columns,
+        const RealType* values,
+        const IndexType numberOfElements,
+        const RealType& thisElementMultiplicator )
+{
+
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+__cuda_callable__
+Real
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+getElementFast( const IndexType row,
+                const IndexType column ) const
+{
+
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+Real
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+getElement( const IndexType row,
+            const IndexType column ) const
+{
+   const IndexType rowSize = this->segments.getSegmentSize( row );
+   for( IndexType i = 0; i < rowSize; i++ )
+   {
+      const IndexType globalIdx = this->segments.getGlobalIndex( row, i );
+      TNL_ASSERT_LT( globalIdx, this->columnIndexes.getSize(), "" );
+      const IndexType col = this->columnIndexes.getElement( globalIdx );
+      if( col == column )
+         return this->values.getElement( globalIdx );
+   }
+   return 0.0;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+__cuda_callable__
+void
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+getRowFast( const IndexType row,
+            IndexType* columns,
+            RealType* values ) const
+{
+
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+   template< typename Vector >
+__cuda_callable__
+typename Vector::RealType
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+rowVectorProduct( const IndexType row,
+                  const Vector& vector ) const
+{
+
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+template< typename InVector,
+       typename OutVector >
+void
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+vectorProduct( const InVector& inVector,
+               OutVector& outVector,
+               const RealType& matrixMultiplicator,
+               const RealType& inVectorAddition ) const
+{
+   const auto inVectorView = inVector.getConstView();
+   auto outVectorView = outVector.getView();
+   const auto valuesView = this->values.getConstView();
+   const auto columnIndexesView = this->columnIndexes.getConstView();
+   const IndexType paddingIndex = this->getPaddingIndex();
+   auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType offset ) -> RealType {
+      const IndexType column = columnIndexesView[ offset ];
+      if( column == paddingIndex )
+         return 0.0;
+      return valuesView[ offset ] * inVectorView[ column ];
+   };
+   auto reduction = [] __cuda_callable__ ( RealType& sum, const RealType& value ) {
+      sum += value;
+   };
+   auto keeper = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable {
+      outVectorView[ row ] = value;
+   };
+   this->segments.segmentsReduction( 0, this->getRows(), fetch, reduction, keeper, ( RealType ) 0.0 );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+   template< typename Fetch, typename Reduce, typename Keep, typename FetchValue >
+void
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchValue& zero ) const
+{
+   const auto columns_view = this->columnIndexes.getConstView();
+   const auto values_view = this->values.getConstView();
+   const IndexType paddingIndex_ = this->getPaddingIndex();
+   auto fetch_ = [=] __cuda_callable__ ( IndexType rowIdx, IndexType globalIdx ) mutable -> decltype( fetch( IndexType(), IndexType(), RealType() ) ) {
+      IndexType columnIdx = columns_view[ globalIdx ];
+      if( columnIdx != paddingIndex_ )
+         return fetch( rowIdx, columnIdx, values_view[ globalIdx ] );
+      return zero;
+   };
+   this->segments.segmentsReduction( first, last, fetch_, reduce, keep, zero );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+   template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+void
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const
+{
+   this->rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+   template< typename Function >
+void
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+forRows( IndexType first, IndexType last, Function& function ) const
+{
+   const auto columns_view = this->columnIndexes.getConstView();
+   const auto values_view = this->values.getConstView();
+   const IndexType paddingIndex_ = this->getPaddingIndex();
+   auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx ) mutable -> bool {
+      function( rowIdx, localIdx, globalIdx );
+      return true;
+   };
+   this->segments.forSegments( first, last, f );
+
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+   template< typename Function >
+void
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+forAllRows( Function& function ) const
+{
+   this->forRows( 0, this->getRows(), function );
+}
+
+/*template< typename Real,
+          template< typename, typename > class SegmentsView,
+          typename Device,
+          typename Index,
+          typename RealAllocator,
+          typename IndexAllocator >
+template< typename Real2, template< typename, typename > class Segments2, typename Index2, typename RealAllocator2, typename IndexAllocator2 >
+void
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+addMatrix( const SparseMatrixView< Real2, Segments2, Device, Index2, RealAllocator2, IndexAllocator2 >& matrix,
+           const RealType& matrixMultiplicator,
+           const RealType& thisMatrixMultiplicator )
+{
+
+}
+
+template< typename Real,
+          template< typename, typename > class SegmentsView,
+          typename Device,
+          typename Index,
+          typename RealAllocator,
+          typename IndexAllocator >
+template< typename Real2, typename Index2 >
+void
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+getTransposition( const SparseMatrixView< Real2, Device, Index2 >& matrix,
+                  const RealType& matrixMultiplicator )
+{
+
+}*/
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+template< typename Vector1, typename Vector2 >
+bool
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+performSORIteration( const Vector1& b,
+                     const IndexType row,
+                     Vector2& x,
+                     const RealType& omega ) const
+{
+
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+void
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+save( File& file ) const
+{
+   Matrix< RealType, DeviceType, IndexType >::save( file );
+   file << this->columnIndexes;
+   this->segments.save( file );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+void
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+save( const String& fileName ) const
+{
+   Object::save( fileName );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+void
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+print( std::ostream& str ) const
+{
+   for( IndexType row = 0; row < this->getRows(); row++ )
+   {
+      str <<"Row: " << row << " -> ";
+      const IndexType rowLength = this->segments.getSegmentSize( row );
+      for( IndexType i = 0; i < rowLength; i++ )
+      {
+         const IndexType globalIdx = this->segments.getGlobalIndex( row, i );
+         const IndexType column = this->columnIndexes.getElement( globalIdx );
+         if( column == this->getPaddingIndex() )
+            break;
+         str << " Col:" << column << "->" << this->values.getElement( globalIdx ) << "\t";
+      }
+      str << std::endl;
+   }
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+__cuda_callable__
+Index
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+getPaddingIndex() const
+{
+   return -1;
+}
+
+   } //namespace Matrices
+} // namespace  TNL
-- 
GitLab


From 0f8eb296551dbe58c43132365650beb85fe97897 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Sat, 28 Dec 2019 17:12:54 +0100
Subject: [PATCH 042/179] ViewType and ConstViewType added to Matrix(View) and
 SparseMatrix(View).

---
 src/TNL/Containers/Segments/CSR.h             |  2 ++
 src/TNL/Containers/Segments/CSRView.h         |  2 ++
 src/TNL/Containers/Segments/Ellpack.h         |  2 ++
 src/TNL/Containers/Segments/EllpackView.h     |  2 ++
 src/TNL/Containers/Segments/SlicedEllpack.h   |  2 ++
 .../Containers/Segments/SlicedEllpackView.h   |  2 ++
 src/TNL/Matrices/MatrixView.h                 | 20 +++++++----
 src/TNL/Matrices/MatrixView.hpp               | 22 ++++++++++++
 src/TNL/Matrices/SparseMatrix.h               |  8 +++++
 src/TNL/Matrices/SparseMatrix.hpp             | 36 +++++++++++++++++++
 src/TNL/Matrices/SparseMatrixView.h           |  9 +++++
 src/TNL/Matrices/SparseMatrixView.hpp         | 36 ++++++++++++++++++-
 12 files changed, 136 insertions(+), 7 deletions(-)

diff --git a/src/TNL/Containers/Segments/CSR.h b/src/TNL/Containers/Segments/CSR.h
index add07f1df..f14060559 100644
--- a/src/TNL/Containers/Segments/CSR.h
+++ b/src/TNL/Containers/Segments/CSR.h
@@ -30,6 +30,8 @@ class CSR
       using IndexType = Index;
       using OffsetsHolder = Containers::Vector< IndexType, DeviceType, typename std::remove_const< IndexType >::type, IndexAllocator >;
       using SegmentsSizes = OffsetsHolder;
+      template< typename Device_, typename Index_ >
+      using ViewTemplate = CSRView< Device_, Index_ >;
       using ViewType = CSRView< Device, Index >;
       using ConstViewType = CSRView< Device, std::add_const_t< Index > >;
 
diff --git a/src/TNL/Containers/Segments/CSRView.h b/src/TNL/Containers/Segments/CSRView.h
index 2f8957970..4917df9e8 100644
--- a/src/TNL/Containers/Segments/CSRView.h
+++ b/src/TNL/Containers/Segments/CSRView.h
@@ -29,6 +29,8 @@ class CSRView
       using OffsetsView = typename Containers::VectorView< IndexType, DeviceType, IndexType >;
       using ConstOffsetsView = typename Containers::Vector< IndexType, DeviceType, IndexType >::ConstViewType;
       using ViewType = CSRView;
+      template< typename Device_, typename Index_ >
+      using ViewTemplate = CSRView< Device_, Index_ >;
       using ConstViewType = CSRView< Device, std::add_const_t< Index > >;
 
       __cuda_callable__
diff --git a/src/TNL/Containers/Segments/Ellpack.h b/src/TNL/Containers/Segments/Ellpack.h
index b9b3e63c1..8cb430b6a 100644
--- a/src/TNL/Containers/Segments/Ellpack.h
+++ b/src/TNL/Containers/Segments/Ellpack.h
@@ -32,6 +32,8 @@ class Ellpack
       static constexpr bool getRowMajorOrder() { return RowMajorOrder; }
       using OffsetsHolder = Containers::Vector< IndexType, DeviceType, IndexType >;
       using SegmentsSizes = OffsetsHolder;
+      template< typename Device_, typename Index_ >
+      using ViewTemplate = EllpackView< Device_, Index_ >;
       using ViewType = EllpackView< Device, Index, RowMajorOrder, Alignment >;
       //using ConstViewType = EllpackView< Device, std::add_const_t< Index >, RowMajorOrder, Alignment >;
 
diff --git a/src/TNL/Containers/Segments/EllpackView.h b/src/TNL/Containers/Segments/EllpackView.h
index adbfee629..6c6926be9 100644
--- a/src/TNL/Containers/Segments/EllpackView.h
+++ b/src/TNL/Containers/Segments/EllpackView.h
@@ -33,6 +33,8 @@ class EllpackView
       static constexpr bool getRowMajorOrder() { return RowMajorOrder; }
       using OffsetsHolder = Containers::Vector< IndexType, DeviceType, IndexType >;
       using SegmentsSizes = OffsetsHolder;
+      template< typename Device_, typename Index_ >
+      using ViewTemplate = EllpackView< Device_, Index_ >;
       using ViewType = EllpackView;
       //using ConstViewType = EllpackView< Device, std::add_const_t< Index > >;
 
diff --git a/src/TNL/Containers/Segments/SlicedEllpack.h b/src/TNL/Containers/Segments/SlicedEllpack.h
index 9c2e7157f..946c9b642 100644
--- a/src/TNL/Containers/Segments/SlicedEllpack.h
+++ b/src/TNL/Containers/Segments/SlicedEllpack.h
@@ -32,6 +32,8 @@ class SlicedEllpack
       static constexpr int getSliceSize() { return SliceSize; }
       static constexpr bool getRowMajorOrder() { return RowMajorOrder; }
       using ViewType = SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >;
+      template< typename Device_, typename Index_ >
+      using ViewTemplate = SlicedEllpackView< Device_, Index_ >;
       using ConstViewType = SlicedEllpackView< Device, std::add_const_t< Index >, RowMajorOrder, SliceSize >;
 
       SlicedEllpack();
diff --git a/src/TNL/Containers/Segments/SlicedEllpackView.h b/src/TNL/Containers/Segments/SlicedEllpackView.h
index 275baacf5..adcf9ef5a 100644
--- a/src/TNL/Containers/Segments/SlicedEllpackView.h
+++ b/src/TNL/Containers/Segments/SlicedEllpackView.h
@@ -31,6 +31,8 @@ class SlicedEllpackView
       using OffsetsView = typename Containers::VectorView< IndexType, DeviceType, typename std::remove_const < IndexType >::type >;
       static constexpr int getSliceSize() { return SliceSize; }
       static constexpr bool getRowMajorOrder() { return RowMajorOrder; }
+      template< typename Device_, typename Index_ >
+      using ViewTemplate = SlicedEllpackView< Device_, Index_ >;
       using ViewType = SlicedEllpackView;
       using ConstViewType = SlicedEllpackView< Device, std::add_const_t< Index > >;
 
diff --git a/src/TNL/Matrices/MatrixView.h b/src/TNL/Matrices/MatrixView.h
index a2fa975cf..80fa28acf 100644
--- a/src/TNL/Matrices/MatrixView.h
+++ b/src/TNL/Matrices/MatrixView.h
@@ -29,12 +29,14 @@ class MatrixView : public Object
 {
 public:
    using RealType = Real;
-   typedef Device DeviceType;
-   typedef Index IndexType;
-   typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector;
-   typedef Containers::VectorView< IndexType, DeviceType, IndexType > CompressedRowLengthsVectorView;
-   typedef typename CompressedRowLengthsVectorView::ConstViewType ConstCompressedRowLengthsVectorView;
-   typedef Containers::VectorView< RealType, DeviceType, IndexType > ValuesView;
+   using DeviceType = Device;
+   using IndexType = Index;
+   using CompressedRowLengthsVector = Containers::Vector< IndexType, DeviceType, IndexType >;
+   using CompressedRowLengthsVectorView = Containers::VectorView< IndexType, DeviceType, IndexType >;
+   using ConstCompressedRowLengthsVectorView = typename CompressedRowLengthsVectorView::ConstViewType;
+   using ValuesView = Containers::VectorView< RealType, DeviceType, IndexType >;
+   using ViewType = MatrixView< typename std::remove_const< Real >::type, Device, Index >;
+   using ConstViewType = MatrixView< typename std::add_const< Real >::type, Device, Index >;
 
    __cuda_callable__
    MatrixView();
@@ -47,6 +49,12 @@ public:
    __cuda_callable__
    MatrixView( const MatrixView& view ) = default;
 
+   __cuda_callable__
+   ViewType getView();
+
+   __cuda_callable__
+   ConstViewType getConstView() const;
+
    virtual IndexType getRowLength( const IndexType row ) const = 0;
 
    // TODO: implementation is not parallel
diff --git a/src/TNL/Matrices/MatrixView.hpp b/src/TNL/Matrices/MatrixView.hpp
index bd3d9beae..55ebc3d67 100644
--- a/src/TNL/Matrices/MatrixView.hpp
+++ b/src/TNL/Matrices/MatrixView.hpp
@@ -42,6 +42,28 @@ MatrixView( const IndexType rows_,
 {
 }
 
+template< typename Real,
+          typename Device,
+          typename Index >
+__cuda_callable__
+auto
+MatrixView< Real, Device, Index >::
+getView() ->ViewType
+{
+   return ViewType( rows, columns, values.getView() );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+__cuda_callable__
+auto
+MatrixView< Real, Device, Index >::
+getConstView() const -> ConstViewType
+{
+   return ConstViewType( rows, columns, values.getConstView() );
+}
+
 template< typename Real,
           typename Device,
           typename Index >
diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h
index 5f02e9fde..558cbb5b1 100644
--- a/src/TNL/Matrices/SparseMatrix.h
+++ b/src/TNL/Matrices/SparseMatrix.h
@@ -34,6 +34,8 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
       template< typename Device_, typename Index_, typename IndexAllocator_ >
       using SegmentsTemplate = Segments< Device_, Index_, IndexAllocator_ >;
       using SegmentsType = Segments< Device, Index, IndexAllocator >;
+      template< typename Device_, typename Index_ >
+      using SegmentsViewTemplate = typename SegmentsType::ViewTemplate< Device_, Index >;
       using DeviceType = Device;
       using IndexType = Index;
       using RealAllocatorType = RealAllocator;
@@ -43,6 +45,8 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
       using ConstRowsCapacitiesView = typename RowsCapacitiesView::ConstViewType;
       using ValuesVectorType = typename Matrix< Real, Device, Index, RealAllocator >::ValuesVector;
       using ColumnsVectorType = Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocatorType >;
+      using ViewType = SparseMatrixView< Real, Device, Index, MatrixType, SegmentsViewTemplate >;
+      using ConstViewType = SparseMatrixView< typename std::add_const< Real >::type, Device, Index, MatrixType, SegmentsViewTemplate >;
 
       // TODO: remove this - it is here only for compatibility with original matrix implementation
       typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector;
@@ -63,6 +67,10 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
                     const RealAllocatorType& realAllocator = RealAllocatorType(),
                     const IndexAllocatorType& indexAllocator = IndexAllocatorType() );
 
+      ViewType getView();
+
+      ConstViewType getConstView() const;
+
       static String getSerializationType();
 
       virtual String getSerializationTypeVirtual() const;
diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp
index 08eae92b4..8af68bd4d 100644
--- a/src/TNL/Matrices/SparseMatrix.hpp
+++ b/src/TNL/Matrices/SparseMatrix.hpp
@@ -73,6 +73,42 @@ SparseMatrix( const IndexType rows,
 {
 }
 
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
+          typename RealAllocator,
+          typename IndexAllocator >
+auto
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
+getView() -> ViewType
+{
+   return ViewType( this->getRows(), 
+                    this->getColumns(),
+                    this->getValues().getView(),
+                    this->getColumnsIndexes().getView(),
+                    this->segments.getView() );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
+          typename RealAllocator,
+          typename IndexAllocator >
+auto
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
+getConstView() const -> ConstViewType
+{
+   return ConstViewType( this->getRows(),
+                         this->getColumns(),
+                         this->getValues().getConstView(),
+                         this->getColumnsIndexes().getConstView(),
+                         this->segments.getConstView() );
+}
+
 template< typename Real,
           typename Device,
           typename Index,
diff --git a/src/TNL/Matrices/SparseMatrixView.h b/src/TNL/Matrices/SparseMatrixView.h
index b40d9c0c2..847c21dd5 100644
--- a/src/TNL/Matrices/SparseMatrixView.h
+++ b/src/TNL/Matrices/SparseMatrixView.h
@@ -37,6 +37,9 @@ class SparseMatrixView : public MatrixView< Real, Device, Index >
       using ConstRowsCapacitiesView = typename RowsCapacitiesView::ConstViewType;
       using ValuesViewType = typename MatrixView< Real, Device, Index >::ValuesView;
       using ColumnsViewType = Containers::VectorView< IndexType, DeviceType, IndexType >;
+      using ViewType = SparseMatrixView< typename std::remove_const< Real >::type, Device, Index, MatrixType, SegmentsViewTemplate >;
+      using ConstViewType = SparseMatrixView< typename std::add_const< Real >::type, Device, Index, MatrixType, SegmentsViewTemplate >;
+
 
       // TODO: remove this - it is here only for compatibility with original matrix implementation
       typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector;
@@ -61,6 +64,12 @@ class SparseMatrixView : public MatrixView< Real, Device, Index >
       //__cuda_callable__
       //SparseMatrixView( const SparseMatrixView&& m ) = default;
 
+      __cuda_callable__
+      ViewType getView();
+
+      __cuda_callable__
+      ConstViewType getConstView() const;
+
       static String getSerializationType();
 
       virtual String getSerializationTypeVirtual() const;
diff --git a/src/TNL/Matrices/SparseMatrixView.hpp b/src/TNL/Matrices/SparseMatrixView.hpp
index 0c49cd58d..ffcba43dc 100644
--- a/src/TNL/Matrices/SparseMatrixView.hpp
+++ b/src/TNL/Matrices/SparseMatrixView.hpp
@@ -41,7 +41,41 @@ SparseMatrixView( const IndexType rows,
                   ColumnsViewType& columnIndexes,
                   SegmentsViewType& segments )
  : MatrixView< Real, Device, Index >( rows, columns, values ), columnIndexes( columnIndexes ), segments( segments )
-{  
+{
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+__cuda_callable__
+auto
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+getView() -> ViewType
+{
+   return ViewType( this->getRows(), 
+                    this->getColumns(),
+                    this->getValues().getView(),
+                    this->getColumnsIndexes().getView(),
+                    this->segments.getView() );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+__cuda_callable__
+auto
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+getConstView() const -> ConstViewType
+{
+   return ConstViewType( this->getRows(),
+                         this->getColumns(),
+                         this->getValues().getConstView(),
+                         this->getColumnsIndexes().getConstView(),
+                         this->segments.getConstView() );
 }
 
 template< typename Real,
-- 
GitLab


From 8a083c49d0c3a8b6499bf53b4229e9445f5f116b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Sat, 28 Dec 2019 22:49:57 +0100
Subject: [PATCH 043/179] Added SparseMatrixRowView.

---
 src/TNL/Containers/Segments/CSR.h             |  5 +
 src/TNL/Containers/Segments/CSR.hpp           | 11 +++
 src/TNL/Containers/Segments/CSRSegmentView.h  | 47 ++++++++++
 src/TNL/Containers/Segments/CSRView.h         |  5 +
 src/TNL/Containers/Segments/CSRView.hpp       | 10 ++
 src/TNL/Containers/Segments/Ellpack.h         |  5 +
 src/TNL/Containers/Segments/Ellpack.hpp       | 16 ++++
 .../Containers/Segments/EllpackSegmentView.h  | 49 ++++++++++
 src/TNL/Containers/Segments/EllpackView.h     |  5 +
 src/TNL/Containers/Segments/EllpackView.hpp   | 15 +++
 src/TNL/Containers/Segments/SlicedEllpack.h   |  5 +
 src/TNL/Containers/Segments/SlicedEllpack.hpp | 21 +++++
 .../Containers/Segments/SlicedEllpackView.h   |  6 +-
 .../Containers/Segments/SlicedEllpackView.hpp | 20 ++++
 src/TNL/Matrices/SparseMatrix.h               | 22 ++++-
 src/TNL/Matrices/SparseMatrix.hpp             | 30 ++++++
 src/TNL/Matrices/SparseMatrixRowView.h        | 64 +++++++++++++
 src/TNL/Matrices/SparseMatrixRowView.hpp      | 94 +++++++++++++++++++
 src/TNL/Matrices/SparseMatrixView.h           | 22 +++--
 src/TNL/Matrices/SparseMatrixView.hpp         | 26 +++++
 20 files changed, 466 insertions(+), 12 deletions(-)
 create mode 100644 src/TNL/Containers/Segments/CSRSegmentView.h
 create mode 100644 src/TNL/Containers/Segments/EllpackSegmentView.h
 create mode 100644 src/TNL/Matrices/SparseMatrixRowView.h
 create mode 100644 src/TNL/Matrices/SparseMatrixRowView.hpp

diff --git a/src/TNL/Containers/Segments/CSR.h b/src/TNL/Containers/Segments/CSR.h
index f14060559..ddf56b67d 100644
--- a/src/TNL/Containers/Segments/CSR.h
+++ b/src/TNL/Containers/Segments/CSR.h
@@ -14,6 +14,7 @@
 
 #include <TNL/Containers/Vector.h>
 #include <TNL/Containers/Segments/CSRView.h>
+#include <TNL/Containers/Segments/CSRSegmentView.h>
 
 namespace TNL {
    namespace Containers {
@@ -34,6 +35,7 @@ class CSR
       using ViewTemplate = CSRView< Device_, Index_ >;
       using ViewType = CSRView< Device, Index >;
       using ConstViewType = CSRView< Device, std::add_const_t< Index > >;
+      using SegmentView = CSRSegmentView< IndexType >;
 
       CSR();
 
@@ -83,6 +85,9 @@ class CSR
       __cuda_callable__
       void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const;
 
+      __cuda_callable__
+      SegmentView getSegmentView( const IndexType segmentIdx ) const;
+
       /***
        * \brief Go over all segments and for each segment element call
        * function 'f' with arguments 'args'. The return type of 'f' is bool.
diff --git a/src/TNL/Containers/Segments/CSR.hpp b/src/TNL/Containers/Segments/CSR.hpp
index 61720869c..16e8a7763 100644
--- a/src/TNL/Containers/Segments/CSR.hpp
+++ b/src/TNL/Containers/Segments/CSR.hpp
@@ -158,6 +158,17 @@ getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localI
 {
 }
 
+template< typename Device,
+          typename Index,
+          typename IndexAllocator >
+__cuda_callable__
+auto
+CSR< Device, Index, IndexAllocator >::
+getSegmentView( const IndexType segmentIdx ) const -> SegmentView
+{
+   return SegmentView( offsets[ segmentIdx ], offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ] );
+}
+
 template< typename Device,
           typename Index,
           typename IndexAllocator >
diff --git a/src/TNL/Containers/Segments/CSRSegmentView.h b/src/TNL/Containers/Segments/CSRSegmentView.h
new file mode 100644
index 000000000..3ab5ef9d2
--- /dev/null
+++ b/src/TNL/Containers/Segments/CSRSegmentView.h
@@ -0,0 +1,47 @@
+/***************************************************************************
+                          CSRSegmentView.h -  description
+                             -------------------
+    begin                : Dec 28, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+namespace TNL {
+   namespace Containers {
+      namespace Segments {
+
+template< typename Index >
+class CSRSegmentView
+{
+   public:
+
+      using IndexType = Index;
+
+      __cuda_callable__
+      CSRSegmentView( const IndexType offset, const IndexType size )
+      : segmentOffset( offset ), segmentSize( size ){};
+
+      __cuda_callable__
+      IndexType getSize() const
+      {
+         return this->segmentSize;
+      };
+
+      __cuda_callable__
+      IndexType getGlobalIndex( const IndexType localIndex ) const
+      {
+         TNL_ASSERT_LT( localIndex, segmentSize, "Local index exceeds segment bounds." );
+         return segmentOffset + localIndex;
+      };
+
+      protected:
+
+         IndexType segmentOffset, segmentSize;
+};
+      } //namespace Segments
+   } //namespace Containers
+} //namespace TNL
\ No newline at end of file
diff --git a/src/TNL/Containers/Segments/CSRView.h b/src/TNL/Containers/Segments/CSRView.h
index 4917df9e8..3af5798f7 100644
--- a/src/TNL/Containers/Segments/CSRView.h
+++ b/src/TNL/Containers/Segments/CSRView.h
@@ -13,6 +13,7 @@
 #include <type_traits>
 
 #include <TNL/Containers/Vector.h>
+#include <TNL/Containers/Segments/CSRSegmentView.h>
 
 namespace TNL {
    namespace Containers {
@@ -32,6 +33,7 @@ class CSRView
       template< typename Device_, typename Index_ >
       using ViewTemplate = CSRView< Device_, Index_ >;
       using ConstViewType = CSRView< Device, std::add_const_t< Index > >;
+      using SegmentView = CSRSegmentView< IndexType >;
 
       __cuda_callable__
       CSRView();
@@ -82,6 +84,9 @@ class CSRView
       __cuda_callable__
       void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const;
 
+      __cuda_callable__
+      SegmentView getSegmentView( const IndexType segmentIdx ) const;
+
       /***
        * \brief Go over all segments and for each segment element call
        * function 'f' with arguments 'args'. The return type of 'f' is bool.
diff --git a/src/TNL/Containers/Segments/CSRView.hpp b/src/TNL/Containers/Segments/CSRView.hpp
index dd4c434ba..0135c8c68 100644
--- a/src/TNL/Containers/Segments/CSRView.hpp
+++ b/src/TNL/Containers/Segments/CSRView.hpp
@@ -149,6 +149,16 @@ getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localI
 {
 }
 
+template< typename Device,
+          typename Index >
+__cuda_callable__
+auto
+CSRView< Device, Index >::
+getSegmentView( const IndexType segmentIdx ) const -> SegmentView
+{
+   return SegmentView( offsets[ segmentIdx ], offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ] );
+}
+
 template< typename Device,
           typename Index >
    template< typename Function, typename... Args >
diff --git a/src/TNL/Containers/Segments/Ellpack.h b/src/TNL/Containers/Segments/Ellpack.h
index 8cb430b6a..0ecae8e7d 100644
--- a/src/TNL/Containers/Segments/Ellpack.h
+++ b/src/TNL/Containers/Segments/Ellpack.h
@@ -12,6 +12,7 @@
 
 #include <TNL/Containers/Vector.h>
 #include <TNL/Containers/Segments/EllpackView.h>
+#include <TNL/Containers/Segments/EllpackSegmentView.h>
 
 namespace TNL {
    namespace Containers {
@@ -36,6 +37,7 @@ class Ellpack
       using ViewTemplate = EllpackView< Device_, Index_ >;
       using ViewType = EllpackView< Device, Index, RowMajorOrder, Alignment >;
       //using ConstViewType = EllpackView< Device, std::add_const_t< Index >, RowMajorOrder, Alignment >;
+      using SegmentView = EllpackSegmentView< IndexType >;
 
 
       Ellpack();
@@ -80,6 +82,9 @@ class Ellpack
       __cuda_callable__
       void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const;
 
+      __cuda_callable__
+      SegmentView getSegmentView( const IndexType segmentIdx ) const;
+
       /***
        * \brief Go over all segments and for each segment element call
        * function 'f' with arguments 'args'. The return type of 'f' is bool.
diff --git a/src/TNL/Containers/Segments/Ellpack.hpp b/src/TNL/Containers/Segments/Ellpack.hpp
index 97d30d314..762d314dd 100644
--- a/src/TNL/Containers/Segments/Ellpack.hpp
+++ b/src/TNL/Containers/Segments/Ellpack.hpp
@@ -216,6 +216,22 @@ getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localI
 {
 }
 
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder,
+          int Alignment >
+__cuda_callable__
+auto
+Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >::
+getSegmentView( const IndexType segmentIdx ) const -> SegmentView
+{
+   if( RowMajorOrder )
+      return SegmentView( segmentIdx * this->segmentSize, this->segmentSize, 1 );
+   else
+      return SegmentView( segmentIdx, this->segmentSize, this->alignedSize );
+}
+
 template< typename Device,
           typename Index,
           typename IndexAllocator,
diff --git a/src/TNL/Containers/Segments/EllpackSegmentView.h b/src/TNL/Containers/Segments/EllpackSegmentView.h
new file mode 100644
index 000000000..7a1638e3f
--- /dev/null
+++ b/src/TNL/Containers/Segments/EllpackSegmentView.h
@@ -0,0 +1,49 @@
+/***************************************************************************
+                          EllpackSegmentView.h -  description
+                             -------------------
+    begin                : Dec 28, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+namespace TNL {
+   namespace Containers {
+      namespace Segments {
+
+template< typename Index >
+class EllpackSegmentView
+{
+   public:
+
+      using IndexType = Index;
+
+      __cuda_callable__
+      EllpackSegmentView( const IndexType offset,
+                          const IndexType size,
+                          const IndexType step )
+      : segmentOffset( offset ), segmentSize( size ), step( step ){};
+
+      __cuda_callable__
+      IndexType getSize() const
+      {
+         return this->segmentSize;
+      };
+
+      __cuda_callable__
+      IndexType getGlobalIndex( const IndexType localIndex ) const
+      {
+         TNL_ASSERT_LT( localIndex, segmentSize, "Local index exceeds segment bounds." );
+         return segmentOffset + localIndex * step;
+      };
+
+      protected:
+         
+         IndexType segmentOffset, segmentSize, step;
+};
+      } //namespace Segments
+   } //namespace Containers
+} //namespace TNL
diff --git a/src/TNL/Containers/Segments/EllpackView.h b/src/TNL/Containers/Segments/EllpackView.h
index 6c6926be9..185321adb 100644
--- a/src/TNL/Containers/Segments/EllpackView.h
+++ b/src/TNL/Containers/Segments/EllpackView.h
@@ -13,6 +13,7 @@
 #include <type_traits>
 
 #include <TNL/Containers/Vector.h>
+#include <TNL/Containers/Segments/EllpackSegmentView.h>
 
 
 namespace TNL {
@@ -37,6 +38,7 @@ class EllpackView
       using ViewTemplate = EllpackView< Device_, Index_ >;
       using ViewType = EllpackView;
       //using ConstViewType = EllpackView< Device, std::add_const_t< Index > >;
+      using SegmentView = EllpackSegmentView< IndexType >;
 
       __cuda_callable__
       EllpackView();
@@ -75,6 +77,9 @@ class EllpackView
       __cuda_callable__
       void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const;
 
+      __cuda_callable__
+      SegmentView getSegmentView( const IndexType segmentIdx ) const;
+
       /***
        * \brief Go over all segments and for each segment element call
        * function 'f' with arguments 'args'. The return type of 'f' is bool.
diff --git a/src/TNL/Containers/Segments/EllpackView.hpp b/src/TNL/Containers/Segments/EllpackView.hpp
index d124633ff..914d30a2e 100644
--- a/src/TNL/Containers/Segments/EllpackView.hpp
+++ b/src/TNL/Containers/Segments/EllpackView.hpp
@@ -160,6 +160,21 @@ getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localI
 {
 }
 
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int Alignment >
+__cuda_callable__
+auto
+EllpackView< Device, Index, RowMajorOrder, Alignment >::
+getSegmentView( const IndexType segmentIdx ) const -> SegmentView
+{
+   if( RowMajorOrder )
+      return SegmentView( segmentIdx * this->segmentSize, this->segmentSize, 1 );
+   else
+      return SegmentView( segmentIdx, this->segmentSize, this->alignedSize );
+}
+
 template< typename Device,
           typename Index,
           bool RowMajorOrder,
diff --git a/src/TNL/Containers/Segments/SlicedEllpack.h b/src/TNL/Containers/Segments/SlicedEllpack.h
index 946c9b642..8c01e8a28 100644
--- a/src/TNL/Containers/Segments/SlicedEllpack.h
+++ b/src/TNL/Containers/Segments/SlicedEllpack.h
@@ -12,6 +12,7 @@
 
 #include <TNL/Containers/Vector.h>
 #include <TNL/Containers/Segments/SlicedEllpackView.h>
+#include <TNL/Containers/Segments/EllpackSegmentView.h>
 
 namespace TNL {
    namespace Containers {
@@ -35,6 +36,7 @@ class SlicedEllpack
       template< typename Device_, typename Index_ >
       using ViewTemplate = SlicedEllpackView< Device_, Index_ >;
       using ConstViewType = SlicedEllpackView< Device, std::add_const_t< Index >, RowMajorOrder, SliceSize >;
+      using SegmentView = EllpackSegmentView< IndexType >;
 
       SlicedEllpack();
 
@@ -76,6 +78,9 @@ class SlicedEllpack
       __cuda_callable__
       void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const;
 
+      __cuda_callable__
+      SegmentView getSegmentView( const IndexType segmentIdx ) const;
+
       /***
        * \brief Go over all segments and for each segment element call
        * function 'f' with arguments 'args'. The return type of 'f' is bool.
diff --git a/src/TNL/Containers/Segments/SlicedEllpack.hpp b/src/TNL/Containers/Segments/SlicedEllpack.hpp
index c9c1d8560..1f6479704 100644
--- a/src/TNL/Containers/Segments/SlicedEllpack.hpp
+++ b/src/TNL/Containers/Segments/SlicedEllpack.hpp
@@ -241,6 +241,27 @@ getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localI
 {
 }
 
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder,
+          int SliceSize >
+__cuda_callable__
+auto
+SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >::
+getSegmentView( const IndexType segmentIdx ) const -> SegmentView
+{
+   const IndexType sliceIdx = segmentIdx / SliceSize;
+   const IndexType segmentInSliceIdx = segmentIdx % SliceSize;
+   const IndexType& sliceOffset = this->sliceOffsets[ sliceIdx ];
+   const IndexType& segmentSize = this->sliceSegmentSizes[ sliceIdx ];
+
+   if( RowMajorOrder )
+      return SegmentView( sliceOffset, segmentSize, 1 );
+   else
+      return SegmentView( sliceOffset + segmentInSliceIdx, segmentSize, SliceSize );
+}
+
 template< typename Device,
           typename Index,
           typename IndexAllocator,
diff --git a/src/TNL/Containers/Segments/SlicedEllpackView.h b/src/TNL/Containers/Segments/SlicedEllpackView.h
index adcf9ef5a..890814b81 100644
--- a/src/TNL/Containers/Segments/SlicedEllpackView.h
+++ b/src/TNL/Containers/Segments/SlicedEllpackView.h
@@ -13,6 +13,7 @@
 #include <type_traits>
 
 #include <TNL/Containers/Vector.h>
+#include <TNL/Containers/Segments/EllpackSegmentView.h>
 
 namespace TNL {
    namespace Containers {
@@ -35,6 +36,7 @@ class SlicedEllpackView
       using ViewTemplate = SlicedEllpackView< Device_, Index_ >;
       using ViewType = SlicedEllpackView;
       using ConstViewType = SlicedEllpackView< Device, std::add_const_t< Index > >;
+      using SegmentView = EllpackSegmentView< IndexType >;
 
       __cuda_callable__
       SlicedEllpackView();
@@ -68,7 +70,6 @@ class SlicedEllpackView
       __cuda_callable__
       IndexType getSize() const;
 
-
       __cuda_callable__
       IndexType getStorageSize() const;
 
@@ -78,6 +79,9 @@ class SlicedEllpackView
       __cuda_callable__
       void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const;
 
+      __cuda_callable__
+      SegmentView getSegmentView( const IndexType segmentIdx ) const;
+
       /***
        * \brief Go over all segments and for each segment element call
        * function 'f' with arguments 'args'. The return type of 'f' is bool.
diff --git a/src/TNL/Containers/Segments/SlicedEllpackView.hpp b/src/TNL/Containers/Segments/SlicedEllpackView.hpp
index 66cfce195..45e33b236 100644
--- a/src/TNL/Containers/Segments/SlicedEllpackView.hpp
+++ b/src/TNL/Containers/Segments/SlicedEllpackView.hpp
@@ -196,6 +196,26 @@ getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localI
 {
 }
 
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int SliceSize >
+__cuda_callable__
+auto
+SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >::
+getSegmentView( const IndexType segmentIdx ) const -> SegmentView
+{
+   const IndexType sliceIdx = segmentIdx / SliceSize;
+   const IndexType segmentInSliceIdx = segmentIdx % SliceSize;
+   const IndexType& sliceOffset = this->sliceOffsets[ sliceIdx ];
+   const IndexType& segmentSize = this->sliceSegmentSizes[ sliceIdx ];
+
+   if( RowMajorOrder )
+      return SegmentView( sliceOffset, segmentSize, 1 );
+   else
+      return SegmentView( sliceOffset + segmentInSliceIdx, segmentSize, SliceSize );
+}
+
 template< typename Device,
           typename Index,
           bool RowMajorOrder,
diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h
index 558cbb5b1..46c02dfb0 100644
--- a/src/TNL/Matrices/SparseMatrix.h
+++ b/src/TNL/Matrices/SparseMatrix.h
@@ -15,6 +15,7 @@
 #include <TNL/Allocators/Default.h>
 #include <TNL/Containers/Segments/CSR.h>
 #include <TNL/Matrices/SparseMatrixView.h>
+#include <TNL/Matrices/SparseMatrixRowView.h>
 
 namespace TNL {
 namespace Matrices {
@@ -36,6 +37,7 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
       using SegmentsType = Segments< Device, Index, IndexAllocator >;
       template< typename Device_, typename Index_ >
       using SegmentsViewTemplate = typename SegmentsType::ViewTemplate< Device_, Index >;
+      using SegmentViewType = typename SegmentsType::ViewType;
       using DeviceType = Device;
       using IndexType = Index;
       using RealAllocatorType = RealAllocator;
@@ -47,6 +49,7 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
       using ColumnsVectorType = Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocatorType >;
       using ViewType = SparseMatrixView< Real, Device, Index, MatrixType, SegmentsViewTemplate >;
       using ConstViewType = SparseMatrixView< typename std::add_const< Real >::type, Device, Index, MatrixType, SegmentsViewTemplate >;
+      using RowView = SparseMatrixRowView< RealType, SegmentViewType >;
 
       // TODO: remove this - it is here only for compatibility with original matrix implementation
       typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector;
@@ -104,6 +107,12 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
       void reset();
 
       __cuda_callable__
+      const RowView getRow( const IndexType& rowIdx ) const;
+
+      __cuda_callable__
+      RowView getRow( const IndexType& rowIdx );
+
+      [[deprecated("")]] __cuda_callable__
       bool setElementFast( const IndexType row,
                            const IndexType column,
                            const RealType& value );
@@ -112,37 +121,40 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
                        const IndexType column,
                        const RealType& value );
 
-      __cuda_callable__
+      [[deprecated("")]] __cuda_callable__
       bool addElementFast( const IndexType row,
                            const IndexType column,
                            const RealType& value,
                            const RealType& thisElementMultiplicator = 1.0 );
 
+      [[deprecated("")]]
       bool addElement( const IndexType row,
                        const IndexType column,
                        const RealType& value,
                        const RealType& thisElementMultiplicator = 1.0 );
 
 
-      __cuda_callable__
+      [[deprecated("")]] __cuda_callable__
       bool setRowFast( const IndexType row,
                        const IndexType* columnIndexes,
                        const RealType* values,
                        const IndexType elements );
 
+      [[deprecated("")]] 
       bool setRow( const IndexType row,
                    const IndexType* columnIndexes,
                    const RealType* values,
                    const IndexType elements );
 
 
-      __cuda_callable__
+      [[deprecated("")]] __cuda_callable__
       bool addRowFast( const IndexType row,
                        const IndexType* columns,
                        const RealType* values,
                        const IndexType numberOfElements,
                        const RealType& thisElementMultiplicator = 1.0 );
 
+      [[deprecated("")]] 
       bool addRow( const IndexType row,
                    const IndexType* columns,
                    const RealType* values,
@@ -150,14 +162,14 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
                    const RealType& thisElementMultiplicator = 1.0 );
 
 
-      __cuda_callable__
+      [[deprecated("")]] __cuda_callable__
       RealType getElementFast( const IndexType row,
                                const IndexType column ) const;
 
       RealType getElement( const IndexType row,
                            const IndexType column ) const;
 
-      __cuda_callable__
+      [[deprecated("")]] __cuda_callable__
       void getRowFast( const IndexType row,
                        IndexType* columns,
                        RealType* values ) const;
diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp
index 8af68bd4d..3f26c95ca 100644
--- a/src/TNL/Matrices/SparseMatrix.hpp
+++ b/src/TNL/Matrices/SparseMatrix.hpp
@@ -303,6 +303,36 @@ reset()
 
 }
 
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
+          typename RealAllocator,
+          typename IndexAllocator >
+__cuda_callable__ auto
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
+getRow( const IndexType& rowIdx ) const -> const RowView
+{
+   TNL_ASSERT_LT( rowIdx, this->getRows(), "Row index is larger than number of matrix rows." );
+   return RowView( this->segments.getSegmentView( rowIdx ), this->values.getView(), this->columnIndexes.getView() );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
+          typename RealAllocator,
+          typename IndexAllocator >
+__cuda_callable__ auto
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
+getRow( const IndexType& rowIdx ) -> RowView
+{
+   TNL_ASSERT_LT( rowIdx, this->getRows(), "Row index is larger than number of matrix rows." );
+   return RowView( this->segments.getSegmentView( rowIdx ), this->values.getView(), this->columnIndexes.getView() );
+}
+
 template< typename Real,
           typename Device,
           typename Index,
diff --git a/src/TNL/Matrices/SparseMatrixRowView.h b/src/TNL/Matrices/SparseMatrixRowView.h
new file mode 100644
index 000000000..c6d0468f9
--- /dev/null
+++ b/src/TNL/Matrices/SparseMatrixRowView.h
@@ -0,0 +1,64 @@
+/***************************************************************************
+                          SparseMatrixRowView.h -  description
+                             -------------------
+    begin                : Dec 28, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+namespace TNL {
+   namespace Matrices {
+
+template< typename Real,
+          typename SegmentView >
+class SparseMatrixRowView
+{
+   public:
+
+      using RealType = Real;
+      using SegmentViewType = SegmentView;
+      using DeviceType = typename SegmentViewType::DeviceType;
+      using IndexType = typename SegmentViewType::IndexType;
+      using ValuesView = Containers::VectorView< RealType, DeviceType, IndexType >;
+      using ColumnIndexesView = Containers::VectorView< IndexType, DeviceType, IndexType >;
+
+      __cuda_callable__
+      SparseMatrixRowView( const SegmentView& segmentView,
+                           const ValuesView& values,
+                           const ColumnIndexesView& columnIndexes );
+
+      __cuda_callable__
+      IndexType getSize() const;
+
+      __cuda_callable__
+      const IndexType& getColumnIndex( const IndexType localIdx ) const;
+
+      __cuda_callable__
+      IndexType& getColumnIndex( const IndexType localIdx );
+      
+      __cuda_callable__
+      const RealType& getValue( const IndexType localIdx ) const;
+
+      __cuda_callable__
+      RealType& getValue( const IndexType localIdx );
+
+      __cuda_callable__
+      void setElement( const IndexType localIdx,
+                       const IndexType column,
+                       const RealType& value );
+   protected:
+
+      SegmentView segmentView;
+
+      ValuesView values;
+
+      ColumnIndexesView columnIndexes;
+};
+   } // namespace Matrices
+} // namespace TNL
+
+#include <TNL/Matrices/SparseMatrixRowView.hpp>
diff --git a/src/TNL/Matrices/SparseMatrixRowView.hpp b/src/TNL/Matrices/SparseMatrixRowView.hpp
new file mode 100644
index 000000000..364bb8e2e
--- /dev/null
+++ b/src/TNL/Matrices/SparseMatrixRowView.hpp
@@ -0,0 +1,94 @@
+/***************************************************************************
+                          SparseMatrixRowView.hpp -  description
+                             -------------------
+    begin                : Dec 28, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Matrices/SparseMatrixRowView.h>
+
+namespace TNL {
+   namespace Matrices {
+
+template< typename Real,
+          typename SegmentView >
+__cuda_callable__
+SparseMatrixRowView< Real, SegmentView >::
+SparseMatrixRowView( const SegmentView& segmentView,
+                     const ValuesView& values,
+                     const ColumnIndexesView& columnIndexes )
+ : segmentView( segmentView ), values( values ), columnIndexes( columnIndexes )
+{
+}
+
+template< typename Real,
+          typename SegmentView >
+__cuda_callable__ auto
+SparseMatrixRowView< Real, SegmentView >::
+getSize() const -> IndexType
+{
+   return segmentView.getSize();
+}
+
+template< typename Real,
+          typename SegmentView >
+__cuda_callable__ auto
+SparseMatrixRowView< Real, SegmentView >::
+getColumnIndex( const IndexType localIdx ) const -> const IndexType&
+{
+   TNL_ASSERT_LT( localIdx, this->getSize(), "Local index exceeds matrix row capacity." );
+   return columnIndexes[ segmentView.getGlobalIndex( localIdx ) ];
+}
+
+template< typename Real,
+          typename SegmentView >
+__cuda_callable__ auto
+SparseMatrixRowView< Real, SegmentView >::
+getColumnIndex( const IndexType localIdx ) -> IndexType&
+{
+   TNL_ASSERT_LT( localIdx, this->getSize(), "Local index exceeds matrix row capacity." );
+   return columnIndexes[ segmentView.getGlobalIndex( localIdx ) ];
+}
+
+template< typename Real,
+          typename SegmentView >
+__cuda_callable__ auto
+SparseMatrixRowView< Real, SegmentView >::
+getValue( const IndexType localIdx ) const -> const RealType&
+{
+   TNL_ASSERT_LT( localIdx, this->getSize(), "Local index exceeds matrix row capacity." );
+   return values[ segmentView.getGlobalIndex( localIdx ) ];
+}
+
+template< typename Real,
+          typename SegmentView >
+__cuda_callable__ auto
+SparseMatrixRowView< Real, SegmentView >::
+getValue( const IndexType localIdx ) -> RealType&
+{
+   TNL_ASSERT_LT( localIdx, this->getSize(), "Local index exceeds matrix row capacity." );
+   return values[ segmentView.getGlobalIndex( localIdx ) ];
+}
+
+template< typename Real,
+          typename SegmentView >
+__cuda_callable__ void 
+SparseMatrixRowView< Real, SegmentView >::
+setElement( const IndexType localIdx,
+            const IndexType column,
+            const RealType& value )
+{
+   TNL_ASSERT_LT( localIdx, this->getSize(), "Local index exceeds matrix row capacity." );
+   const IndexType globalIdx = segmentView.getGlobalIndex( localIdx );
+   columnIndexes[ globalIdx ] = column;
+   values[ globalIdx ] = value;
+}
+
+
+   } // namespace Matrices
+} // namespace TNL
diff --git a/src/TNL/Matrices/SparseMatrixView.h b/src/TNL/Matrices/SparseMatrixView.h
index 847c21dd5..a674ee807 100644
--- a/src/TNL/Matrices/SparseMatrixView.h
+++ b/src/TNL/Matrices/SparseMatrixView.h
@@ -14,6 +14,7 @@
 #include <TNL/Matrices/MatrixType.h>
 #include <TNL/Allocators/Default.h>
 #include <TNL/Containers/Segments/CSR.h>
+#include <TNL/Matrices/SparseMatrixRowView.h>
 
 namespace TNL {
 namespace Matrices {
@@ -39,7 +40,7 @@ class SparseMatrixView : public MatrixView< Real, Device, Index >
       using ColumnsViewType = Containers::VectorView< IndexType, DeviceType, IndexType >;
       using ViewType = SparseMatrixView< typename std::remove_const< Real >::type, Device, Index, MatrixType, SegmentsViewTemplate >;
       using ConstViewType = SparseMatrixView< typename std::add_const< Real >::type, Device, Index, MatrixType, SegmentsViewTemplate >;
-
+      using RowView = SparseMatrixRowView< RealType, SegmentsViewType >;
 
       // TODO: remove this - it is here only for compatibility with original matrix implementation
       typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector;
@@ -92,6 +93,12 @@ class SparseMatrixView : public MatrixView< Real, Device, Index >
       void reset();
 
       __cuda_callable__
+      const RowView getRow( const IndexType& rowIdx ) const;
+
+      __cuda_callable__
+      RowView getRow( const IndexType& rowIdx );
+
+      [[deprecated("")]] __cuda_callable__
       bool setElementFast( const IndexType row,
                            const IndexType column,
                            const RealType& value );
@@ -100,37 +107,40 @@ class SparseMatrixView : public MatrixView< Real, Device, Index >
                        const IndexType column,
                        const RealType& value );
 
-      __cuda_callable__
+      [[deprecated("")]] __cuda_callable__
       bool addElementFast( const IndexType row,
                            const IndexType column,
                            const RealType& value,
                            const RealType& thisElementMultiplicator = 1.0 );
 
+      [[deprecated("")]] 
       bool addElement( const IndexType row,
                        const IndexType column,
                        const RealType& value,
                        const RealType& thisElementMultiplicator = 1.0 );
 
 
-      __cuda_callable__
+      [[deprecated("")]] __cuda_callable__
       bool setRowFast( const IndexType row,
                        const IndexType* columnIndexes,
                        const RealType* values,
                        const IndexType elements );
 
+      [[deprecated("")]] 
       bool setRow( const IndexType row,
                    const IndexType* columnIndexes,
                    const RealType* values,
                    const IndexType elements );
 
 
-      __cuda_callable__
+      [[deprecated("")]] __cuda_callable__
       bool addRowFast( const IndexType row,
                        const IndexType* columns,
                        const RealType* values,
                        const IndexType numberOfElements,
                        const RealType& thisElementMultiplicator = 1.0 );
 
+      [[deprecated("")]] 
       bool addRow( const IndexType row,
                    const IndexType* columns,
                    const RealType* values,
@@ -138,14 +148,14 @@ class SparseMatrixView : public MatrixView< Real, Device, Index >
                    const RealType& thisElementMultiplicator = 1.0 );
 
 
-      __cuda_callable__
+      [[deprecated("")]] __cuda_callable__
       RealType getElementFast( const IndexType row,
                                const IndexType column ) const;
 
       RealType getElement( const IndexType row,
                            const IndexType column ) const;
 
-      __cuda_callable__
+      [[deprecated("")]] __cuda_callable__
       void getRowFast( const IndexType row,
                        IndexType* columns,
                        RealType* values ) const;
diff --git a/src/TNL/Matrices/SparseMatrixView.hpp b/src/TNL/Matrices/SparseMatrixView.hpp
index ffcba43dc..3f9743124 100644
--- a/src/TNL/Matrices/SparseMatrixView.hpp
+++ b/src/TNL/Matrices/SparseMatrixView.hpp
@@ -211,6 +211,32 @@ reset()
 
 }
 
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+__cuda_callable__ auto
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+getRow( const IndexType& rowIdx ) const -> const RowView
+{
+   TNL_ASSERT_LT( rowIdx, this->getRows(), "Row index is larger than number of matrix rows." );
+   return RowView( this->segments.getSegmentView( rowIdx ), this->values.getView(), this->columnIndexes.getView() );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+__cuda_callable__ auto
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+getRow( const IndexType& rowIdx ) -> RowView
+{
+   TNL_ASSERT_LT( rowIdx, this->getRows(), "Row index is larger than number of matrix rows." );
+   return RowView( this->segments.getSegmentView( rowIdx ), this->values.getView(), this->columnIndexes.getView() );
+}
+
 template< typename Real,
           typename Device,
           typename Index,
-- 
GitLab


From 02c67f6e1a3cfbd96c3714d8d013c0bc0e4d9c0a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Sun, 29 Dec 2019 18:09:32 +0100
Subject: [PATCH 044/179] Debugging SparseMatrixRowView.

---
 src/TNL/Containers/Segments/CSR.h             |   6 +-
 src/TNL/Containers/Segments/CSR.hpp           |  12 +-
 src/TNL/Containers/Segments/CSRView.h         |   6 +-
 src/TNL/Containers/Segments/CSRView.hpp       |  13 +-
 src/TNL/Containers/Segments/Ellpack.h         |   6 +-
 src/TNL/Containers/Segments/Ellpack.hpp       |  14 +-
 src/TNL/Containers/Segments/EllpackView.h     |   6 +-
 src/TNL/Containers/Segments/EllpackView.hpp   |  17 +-
 src/TNL/Containers/Segments/SlicedEllpack.h   |   6 +-
 src/TNL/Containers/Segments/SlicedEllpack.hpp |  16 +-
 .../Containers/Segments/SlicedEllpackView.h   |   6 +-
 .../Containers/Segments/SlicedEllpackView.hpp |  17 +-
 src/TNL/Matrices/Matrix.h                     |   4 +-
 src/TNL/Matrices/Matrix.hpp                   |   4 +-
 src/TNL/Matrices/MatrixView.h                 |  10 +-
 src/TNL/Matrices/MatrixView.hpp               |   4 +-
 src/TNL/Matrices/SparseMatrix.h               |  12 +-
 src/TNL/Matrices/SparseMatrix.hpp             |   6 +-
 src/TNL/Matrices/SparseMatrixRowView.h        |  24 +-
 src/TNL/Matrices/SparseMatrixRowView.hpp      |  55 +++--
 src/TNL/Matrices/SparseMatrixView.h           |  13 +-
 src/TNL/Matrices/SparseMatrixView.hpp         |  12 +-
 src/UnitTests/Matrices/SparseMatrixTest.hpp   | 227 ++++++++++++++++++
 .../Matrices/SparseMatrixTest_CSR_segments.h  |   8 +
 .../SparseMatrixTest_Ellpack_segments.h       |   7 +
 .../SparseMatrixTest_SlicedEllpack_segments.h |   7 +
 26 files changed, 426 insertions(+), 92 deletions(-)

diff --git a/src/TNL/Containers/Segments/CSR.h b/src/TNL/Containers/Segments/CSR.h
index ddf56b67d..df7cb5686 100644
--- a/src/TNL/Containers/Segments/CSR.h
+++ b/src/TNL/Containers/Segments/CSR.h
@@ -35,7 +35,7 @@ class CSR
       using ViewTemplate = CSRView< Device_, Index_ >;
       using ViewType = CSRView< Device, Index >;
       using ConstViewType = CSRView< Device, std::add_const_t< Index > >;
-      using SegmentView = CSRSegmentView< IndexType >;
+      using SegmentViewType = CSRSegmentView< IndexType >;
 
       CSR();
 
@@ -45,6 +45,8 @@ class CSR
 
       CSR( const CSR&& segments );
 
+      static String getSerializationType();
+
       /**
        * \brief Set sizes of particular segments.
        */
@@ -86,7 +88,7 @@ class CSR
       void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const;
 
       __cuda_callable__
-      SegmentView getSegmentView( const IndexType segmentIdx ) const;
+      SegmentViewType getSegmentView( const IndexType segmentIdx ) const;
 
       /***
        * \brief Go over all segments and for each segment element call
diff --git a/src/TNL/Containers/Segments/CSR.hpp b/src/TNL/Containers/Segments/CSR.hpp
index 16e8a7763..9ab2186c3 100644
--- a/src/TNL/Containers/Segments/CSR.hpp
+++ b/src/TNL/Containers/Segments/CSR.hpp
@@ -54,6 +54,16 @@ CSR( const CSR&& csr ) : offsets( std::move( csr.offsets ) )
 
 }
 
+template< typename Device,
+          typename Index,
+          typename IndexAllocator >
+String
+CSR< Device, Index, IndexAllocator >::
+getSerializationType()
+{
+   return "CSR< [any_device], " + TNL::getSerializationType< IndexType >() + " >";
+}
+
 template< typename Device,
           typename Index,
           typename IndexAllocator >
@@ -164,7 +174,7 @@ template< typename Device,
 __cuda_callable__
 auto
 CSR< Device, Index, IndexAllocator >::
-getSegmentView( const IndexType segmentIdx ) const -> SegmentView
+getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType
 {
    return SegmentView( offsets[ segmentIdx ], offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ] );
 }
diff --git a/src/TNL/Containers/Segments/CSRView.h b/src/TNL/Containers/Segments/CSRView.h
index 3af5798f7..860a35a0a 100644
--- a/src/TNL/Containers/Segments/CSRView.h
+++ b/src/TNL/Containers/Segments/CSRView.h
@@ -33,7 +33,7 @@ class CSRView
       template< typename Device_, typename Index_ >
       using ViewTemplate = CSRView< Device_, Index_ >;
       using ConstViewType = CSRView< Device, std::add_const_t< Index > >;
-      using SegmentView = CSRSegmentView< IndexType >;
+      using SegmentViewType = CSRSegmentView< IndexType >;
 
       __cuda_callable__
       CSRView();
@@ -50,6 +50,8 @@ class CSRView
       __cuda_callable__
       CSRView( const CSRView&& csr_view );
 
+      static String getSerializationType();
+
       ViewType getView();
 
       ConstViewType getConstView() const;
@@ -85,7 +87,7 @@ class CSRView
       void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const;
 
       __cuda_callable__
-      SegmentView getSegmentView( const IndexType segmentIdx ) const;
+      SegmentViewType getSegmentView( const IndexType segmentIdx ) const;
 
       /***
        * \brief Go over all segments and for each segment element call
diff --git a/src/TNL/Containers/Segments/CSRView.hpp b/src/TNL/Containers/Segments/CSRView.hpp
index 0135c8c68..f4f59370d 100644
--- a/src/TNL/Containers/Segments/CSRView.hpp
+++ b/src/TNL/Containers/Segments/CSRView.hpp
@@ -64,6 +64,15 @@ CSRView( const CSRView&& csr_view )
 {
 }
 
+template< typename Device,
+          typename Index >
+String
+CSRView< Device, Index >::
+getSerializationType()
+{
+   return "CSR< [any_device], " + TNL::getSerializationType< IndexType >() + " >";
+}
+
 template< typename Device,
           typename Index >
 typename CSRView< Device, Index >::ViewType
@@ -154,9 +163,9 @@ template< typename Device,
 __cuda_callable__
 auto
 CSRView< Device, Index >::
-getSegmentView( const IndexType segmentIdx ) const -> SegmentView
+getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType
 {
-   return SegmentView( offsets[ segmentIdx ], offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ] );
+   return SegmentViewType( offsets[ segmentIdx ], offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ] );
 }
 
 template< typename Device,
diff --git a/src/TNL/Containers/Segments/Ellpack.h b/src/TNL/Containers/Segments/Ellpack.h
index 0ecae8e7d..f73155335 100644
--- a/src/TNL/Containers/Segments/Ellpack.h
+++ b/src/TNL/Containers/Segments/Ellpack.h
@@ -37,7 +37,7 @@ class Ellpack
       using ViewTemplate = EllpackView< Device_, Index_ >;
       using ViewType = EllpackView< Device, Index, RowMajorOrder, Alignment >;
       //using ConstViewType = EllpackView< Device, std::add_const_t< Index >, RowMajorOrder, Alignment >;
-      using SegmentView = EllpackSegmentView< IndexType >;
+      using SegmentViewType = EllpackSegmentView< IndexType >;
 
 
       Ellpack();
@@ -50,6 +50,8 @@ class Ellpack
 
       Ellpack( const Ellpack&& segments );
 
+      static String getSerializationType();
+
       ViewType getView();
 
       //ConstViewType getConstView() const;
@@ -83,7 +85,7 @@ class Ellpack
       void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const;
 
       __cuda_callable__
-      SegmentView getSegmentView( const IndexType segmentIdx ) const;
+      SegmentViewType getSegmentView( const IndexType segmentIdx ) const;
 
       /***
        * \brief Go over all segments and for each segment element call
diff --git a/src/TNL/Containers/Segments/Ellpack.hpp b/src/TNL/Containers/Segments/Ellpack.hpp
index 762d314dd..9f7702a6f 100644
--- a/src/TNL/Containers/Segments/Ellpack.hpp
+++ b/src/TNL/Containers/Segments/Ellpack.hpp
@@ -76,6 +76,18 @@ Ellpack( const Ellpack&& ellpack )
 {
 }
 
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder,
+          int Alignment >
+String
+Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >::
+getSerializationType()
+{
+   return "Ellpack< [any_device], " + TNL::getSerializationType< IndexType >() + " >";
+}
+
 template< typename Device,
           typename Index,
           typename IndexAllocator,
@@ -224,7 +236,7 @@ template< typename Device,
 __cuda_callable__
 auto
 Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >::
-getSegmentView( const IndexType segmentIdx ) const -> SegmentView
+getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType
 {
    if( RowMajorOrder )
       return SegmentView( segmentIdx * this->segmentSize, this->segmentSize, 1 );
diff --git a/src/TNL/Containers/Segments/EllpackView.h b/src/TNL/Containers/Segments/EllpackView.h
index 185321adb..682eeeb4a 100644
--- a/src/TNL/Containers/Segments/EllpackView.h
+++ b/src/TNL/Containers/Segments/EllpackView.h
@@ -38,7 +38,7 @@ class EllpackView
       using ViewTemplate = EllpackView< Device_, Index_ >;
       using ViewType = EllpackView;
       //using ConstViewType = EllpackView< Device, std::add_const_t< Index > >;
-      using SegmentView = EllpackSegmentView< IndexType >;
+      using SegmentViewType = EllpackSegmentView< IndexType >;
 
       __cuda_callable__
       EllpackView();
@@ -52,6 +52,8 @@ class EllpackView
       __cuda_callable__
       EllpackView( const EllpackView&& ellpackView );
 
+      static String getSerializationType();
+
       ViewType getView();
 
       //ConstViewType getConstView() const;
@@ -78,7 +80,7 @@ class EllpackView
       void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const;
 
       __cuda_callable__
-      SegmentView getSegmentView( const IndexType segmentIdx ) const;
+      SegmentViewType getSegmentView( const IndexType segmentIdx ) const;
 
       /***
        * \brief Go over all segments and for each segment element call
diff --git a/src/TNL/Containers/Segments/EllpackView.hpp b/src/TNL/Containers/Segments/EllpackView.hpp
index 914d30a2e..f5dba4f3d 100644
--- a/src/TNL/Containers/Segments/EllpackView.hpp
+++ b/src/TNL/Containers/Segments/EllpackView.hpp
@@ -63,6 +63,17 @@ EllpackView( const EllpackView&& ellpack )
 {
 }
 
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int Alignment >
+String
+EllpackView< Device, Index, RowMajorOrder, Alignment >::
+getSerializationType()
+{
+   return "Ellpack< [any_device], " + TNL::getSerializationType< IndexType >() + " >";
+}
+
 template< typename Device,
           typename Index,
           bool RowMajorOrder,
@@ -167,12 +178,12 @@ template< typename Device,
 __cuda_callable__
 auto
 EllpackView< Device, Index, RowMajorOrder, Alignment >::
-getSegmentView( const IndexType segmentIdx ) const -> SegmentView
+getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType
 {
    if( RowMajorOrder )
-      return SegmentView( segmentIdx * this->segmentSize, this->segmentSize, 1 );
+      return SegmentViewType( segmentIdx * this->segmentSize, this->segmentSize, 1 );
    else
-      return SegmentView( segmentIdx, this->segmentSize, this->alignedSize );
+      return SegmentViewType( segmentIdx, this->segmentSize, this->alignedSize );
 }
 
 template< typename Device,
diff --git a/src/TNL/Containers/Segments/SlicedEllpack.h b/src/TNL/Containers/Segments/SlicedEllpack.h
index 8c01e8a28..1c110b1f1 100644
--- a/src/TNL/Containers/Segments/SlicedEllpack.h
+++ b/src/TNL/Containers/Segments/SlicedEllpack.h
@@ -36,7 +36,7 @@ class SlicedEllpack
       template< typename Device_, typename Index_ >
       using ViewTemplate = SlicedEllpackView< Device_, Index_ >;
       using ConstViewType = SlicedEllpackView< Device, std::add_const_t< Index >, RowMajorOrder, SliceSize >;
-      using SegmentView = EllpackSegmentView< IndexType >;
+      using SegmentViewType = EllpackSegmentView< IndexType >;
 
       SlicedEllpack();
 
@@ -46,6 +46,8 @@ class SlicedEllpack
 
       SlicedEllpack( const SlicedEllpack&& segments );
 
+      static String getSerializationType();
+
       ViewType getView();
 
       ConstViewType getConstView() const;
@@ -79,7 +81,7 @@ class SlicedEllpack
       void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const;
 
       __cuda_callable__
-      SegmentView getSegmentView( const IndexType segmentIdx ) const;
+      SegmentViewType getSegmentView( const IndexType segmentIdx ) const;
 
       /***
        * \brief Go over all segments and for each segment element call
diff --git a/src/TNL/Containers/Segments/SlicedEllpack.hpp b/src/TNL/Containers/Segments/SlicedEllpack.hpp
index 1f6479704..e2aec924d 100644
--- a/src/TNL/Containers/Segments/SlicedEllpack.hpp
+++ b/src/TNL/Containers/Segments/SlicedEllpack.hpp
@@ -69,6 +69,18 @@ SlicedEllpack( const SlicedEllpack&& slicedEllpack )
 {
 }
 
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder,
+          int SliceSize >
+String
+SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >::
+getSerializationType()
+{
+   return "SlicedEllpack< [any_device], " + TNL::getSerializationType< IndexType >() + " >";
+}
+
 template< typename Device,
           typename Index,
           typename IndexAllocator,
@@ -249,7 +261,7 @@ template< typename Device,
 __cuda_callable__
 auto
 SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >::
-getSegmentView( const IndexType segmentIdx ) const -> SegmentView
+getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType
 {
    const IndexType sliceIdx = segmentIdx / SliceSize;
    const IndexType segmentInSliceIdx = segmentIdx % SliceSize;
@@ -257,7 +269,7 @@ getSegmentView( const IndexType segmentIdx ) const -> SegmentView
    const IndexType& segmentSize = this->sliceSegmentSizes[ sliceIdx ];
 
    if( RowMajorOrder )
-      return SegmentView( sliceOffset, segmentSize, 1 );
+      return SegmentView( sliceOffset + segmentInSliceIdx * segmentSize, segmentSize, 1 );
    else
       return SegmentView( sliceOffset + segmentInSliceIdx, segmentSize, SliceSize );
 }
diff --git a/src/TNL/Containers/Segments/SlicedEllpackView.h b/src/TNL/Containers/Segments/SlicedEllpackView.h
index 890814b81..e87c75229 100644
--- a/src/TNL/Containers/Segments/SlicedEllpackView.h
+++ b/src/TNL/Containers/Segments/SlicedEllpackView.h
@@ -36,7 +36,7 @@ class SlicedEllpackView
       using ViewTemplate = SlicedEllpackView< Device_, Index_ >;
       using ViewType = SlicedEllpackView;
       using ConstViewType = SlicedEllpackView< Device, std::add_const_t< Index > >;
-      using SegmentView = EllpackSegmentView< IndexType >;
+      using SegmentViewType = EllpackSegmentView< IndexType >;
 
       __cuda_callable__
       SlicedEllpackView();
@@ -54,6 +54,8 @@ class SlicedEllpackView
       __cuda_callable__
       SlicedEllpackView( const SlicedEllpackView&& slicedEllpackView );
 
+      static String getSerializationType();
+
       ViewType getView();
 
       ConstViewType getConstView() const;
@@ -80,7 +82,7 @@ class SlicedEllpackView
       void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const;
 
       __cuda_callable__
-      SegmentView getSegmentView( const IndexType segmentIdx ) const;
+      SegmentViewType getSegmentView( const IndexType segmentIdx ) const;
 
       /***
        * \brief Go over all segments and for each segment element call
diff --git a/src/TNL/Containers/Segments/SlicedEllpackView.hpp b/src/TNL/Containers/Segments/SlicedEllpackView.hpp
index 45e33b236..139a09a15 100644
--- a/src/TNL/Containers/Segments/SlicedEllpackView.hpp
+++ b/src/TNL/Containers/Segments/SlicedEllpackView.hpp
@@ -72,6 +72,17 @@ SlicedEllpackView( const SlicedEllpackView&& slicedEllpackView )
 {
 }
 
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int SliceSize >
+String
+SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >::
+getSerializationType()
+{
+   return "SlicedEllpack< [any_device], " + TNL::getSerializationType< IndexType >() + " >";
+}
+
 template< typename Device,
           typename Index,
           bool RowMajorOrder,
@@ -203,7 +214,7 @@ template< typename Device,
 __cuda_callable__
 auto
 SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >::
-getSegmentView( const IndexType segmentIdx ) const -> SegmentView
+getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType
 {
    const IndexType sliceIdx = segmentIdx / SliceSize;
    const IndexType segmentInSliceIdx = segmentIdx % SliceSize;
@@ -211,9 +222,9 @@ getSegmentView( const IndexType segmentIdx ) const -> SegmentView
    const IndexType& segmentSize = this->sliceSegmentSizes[ sliceIdx ];
 
    if( RowMajorOrder )
-      return SegmentView( sliceOffset, segmentSize, 1 );
+      return SegmentViewType( sliceOffset + segmentInSliceIdx * segmentSize, segmentSize, 1 );
    else
-      return SegmentView( sliceOffset + segmentInSliceIdx, segmentSize, SliceSize );
+      return SegmentViewType( sliceOffset + segmentInSliceIdx, segmentSize, SliceSize );
 }
 
 template< typename Device,
diff --git a/src/TNL/Matrices/Matrix.h b/src/TNL/Matrices/Matrix.h
index 96409c89b..66a686046 100644
--- a/src/TNL/Matrices/Matrix.h
+++ b/src/TNL/Matrices/Matrix.h
@@ -47,9 +47,9 @@ public:
            const IndexType columns,
            const RealAllocatorType& allocator = RealAllocatorType() );
 
-   ViewType getView();
+   /*ViewType getView();
 
-   ConstViewType getConstView() const;
+   ConstViewType getConstView() const;*/
 
    virtual void setDimensions( const IndexType rows,
                                const IndexType columns );
diff --git a/src/TNL/Matrices/Matrix.hpp b/src/TNL/Matrices/Matrix.hpp
index 91b81ffcf..3a09d0088 100644
--- a/src/TNL/Matrices/Matrix.hpp
+++ b/src/TNL/Matrices/Matrix.hpp
@@ -43,7 +43,7 @@ Matrix( const IndexType rows_, const IndexType columns_, const RealAllocatorType
 {
 }
 
-template< typename Real,
+/*template< typename Real,
           typename Device,
           typename Index,
           typename RealAllocator >
@@ -63,7 +63,7 @@ Matrix< Real, Device, Index, RealAllocator >::
 getConstView() const -> ConstViewType
 {
    return ConstViewType( rows, columns, values.getConstView() );
-}
+}*/
 
 template< typename Real,
           typename Device,
diff --git a/src/TNL/Matrices/MatrixView.h b/src/TNL/Matrices/MatrixView.h
index 80fa28acf..18a9fb488 100644
--- a/src/TNL/Matrices/MatrixView.h
+++ b/src/TNL/Matrices/MatrixView.h
@@ -49,11 +49,11 @@ public:
    __cuda_callable__
    MatrixView( const MatrixView& view ) = default;
 
-   __cuda_callable__
-   ViewType getView();
+   //__cuda_callable__
+   //ViewType getView();
 
-   __cuda_callable__
-   ConstViewType getConstView() const;
+   //__cuda_callable__
+   //ConstViewType getConstView() const;
 
    virtual IndexType getRowLength( const IndexType row ) const = 0;
 
@@ -65,7 +65,7 @@ public:
 
    IndexType getNumberOfMatrixElements() const;
 
-   virtual IndexType getNumberOfNonzeroMatrixElements() const = 0;
+   virtual IndexType getNumberOfNonzeroMatrixElements() const;
 
    void reset();
 
diff --git a/src/TNL/Matrices/MatrixView.hpp b/src/TNL/Matrices/MatrixView.hpp
index 55ebc3d67..0473f52b8 100644
--- a/src/TNL/Matrices/MatrixView.hpp
+++ b/src/TNL/Matrices/MatrixView.hpp
@@ -42,7 +42,7 @@ MatrixView( const IndexType rows_,
 {
 }
 
-template< typename Real,
+/*template< typename Real,
           typename Device,
           typename Index >
 __cuda_callable__
@@ -62,7 +62,7 @@ MatrixView< Real, Device, Index >::
 getConstView() const -> ConstViewType
 {
    return ConstViewType( rows, columns, values.getConstView() );
-}
+}*/
 
 template< typename Real,
           typename Device,
diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h
index 46c02dfb0..8169f89f2 100644
--- a/src/TNL/Matrices/SparseMatrix.h
+++ b/src/TNL/Matrices/SparseMatrix.h
@@ -36,8 +36,8 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
       using SegmentsTemplate = Segments< Device_, Index_, IndexAllocator_ >;
       using SegmentsType = Segments< Device, Index, IndexAllocator >;
       template< typename Device_, typename Index_ >
-      using SegmentsViewTemplate = typename SegmentsType::ViewTemplate< Device_, Index >;
-      using SegmentViewType = typename SegmentsType::ViewType;
+      using SegmentsViewTemplate = typename SegmentsType::template ViewTemplate< Device_, Index >;
+      using SegmentViewType = typename SegmentsType::SegmentViewType;
       using DeviceType = Device;
       using IndexType = Index;
       using RealAllocatorType = RealAllocator;
@@ -46,10 +46,12 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
       using RowsCapacitiesView = Containers::VectorView< IndexType, DeviceType, IndexType >;
       using ConstRowsCapacitiesView = typename RowsCapacitiesView::ConstViewType;
       using ValuesVectorType = typename Matrix< Real, Device, Index, RealAllocator >::ValuesVector;
-      using ColumnsVectorType = Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocatorType >;
+      using ValuesViewType = typename ValuesVectorType::ViewType;
+      using ColumnsIndexesVectorType = Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocatorType >;
+      using ColumnsIndexesViewType = typename ColumnsIndexesVectorType::ViewType;
       using ViewType = SparseMatrixView< Real, Device, Index, MatrixType, SegmentsViewTemplate >;
       using ConstViewType = SparseMatrixView< typename std::add_const< Real >::type, Device, Index, MatrixType, SegmentsViewTemplate >;
-      using RowView = SparseMatrixRowView< RealType, SegmentViewType >;
+      using RowView = SparseMatrixRowView< SegmentViewType, ValuesViewType, ColumnsIndexesViewType >;
 
       // TODO: remove this - it is here only for compatibility with original matrix implementation
       typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector;
@@ -246,7 +248,7 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
 // TODO: restore it and also in Matrix
 //   protected:
 
-      ColumnsVectorType columnIndexes;
+      ColumnsIndexesVectorType columnIndexes;
 
       SegmentsType segments;
 
diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp
index 3f26c95ca..c0dd3b9a3 100644
--- a/src/TNL/Matrices/SparseMatrix.hpp
+++ b/src/TNL/Matrices/SparseMatrix.hpp
@@ -87,7 +87,7 @@ getView() -> ViewType
    return ViewType( this->getRows(), 
                     this->getColumns(),
                     this->getValues().getView(),
-                    this->getColumnsIndexes().getView(),
+                    this->columnIndexes.getView(),
                     this->segments.getView() );
 }
 
@@ -105,7 +105,7 @@ getConstView() const -> ConstViewType
    return ConstViewType( this->getRows(),
                          this->getColumns(),
                          this->getValues().getConstView(),
-                         this->getColumnsIndexes().getConstView(),
+                         this->columnIndexes.getConstView(),
                          this->segments.getConstView() );
 }
 
@@ -299,8 +299,6 @@ SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAll
 reset()
 {
    Matrix< Real, Device, Index >::reset();
-   this->columnIndexes.reset();
-
 }
 
 template< typename Real,
diff --git a/src/TNL/Matrices/SparseMatrixRowView.h b/src/TNL/Matrices/SparseMatrixRowView.h
index c6d0468f9..19445f531 100644
--- a/src/TNL/Matrices/SparseMatrixRowView.h
+++ b/src/TNL/Matrices/SparseMatrixRowView.h
@@ -13,23 +13,23 @@
 namespace TNL {
    namespace Matrices {
 
-template< typename Real,
-          typename SegmentView >
+template< typename SegmentView,
+          typename ValuesView,
+          typename ColumnsIndexesView >
 class SparseMatrixRowView
 {
    public:
 
-      using RealType = Real;
+      using RealType = typename ValuesView::RealType;
       using SegmentViewType = SegmentView;
-      using DeviceType = typename SegmentViewType::DeviceType;
       using IndexType = typename SegmentViewType::IndexType;
-      using ValuesView = Containers::VectorView< RealType, DeviceType, IndexType >;
-      using ColumnIndexesView = Containers::VectorView< IndexType, DeviceType, IndexType >;
+      using ValuesViewType = ValuesView;
+      using ColumnsIndexesViewType = ColumnsIndexesView;
 
       __cuda_callable__
-      SparseMatrixRowView( const SegmentView& segmentView,
-                           const ValuesView& values,
-                           const ColumnIndexesView& columnIndexes );
+      SparseMatrixRowView( const SegmentViewType& segmentView,
+                           const ValuesViewType& values,
+                           const ColumnsIndexesViewType& columnIndexes );
 
       __cuda_callable__
       IndexType getSize() const;
@@ -52,11 +52,11 @@ class SparseMatrixRowView
                        const RealType& value );
    protected:
 
-      SegmentView segmentView;
+      SegmentViewType segmentView;
 
-      ValuesView values;
+      ValuesViewType values;
 
-      ColumnIndexesView columnIndexes;
+      ColumnsIndexesViewType columnIndexes;
 };
    } // namespace Matrices
 } // namespace TNL
diff --git a/src/TNL/Matrices/SparseMatrixRowView.hpp b/src/TNL/Matrices/SparseMatrixRowView.hpp
index 364bb8e2e..70dac874e 100644
--- a/src/TNL/Matrices/SparseMatrixRowView.hpp
+++ b/src/TNL/Matrices/SparseMatrixRowView.hpp
@@ -15,70 +15,77 @@
 namespace TNL {
    namespace Matrices {
 
-template< typename Real,
-          typename SegmentView >
+template< typename SegmentView,
+          typename ValuesView,
+          typename ColumnsIndexesView >
 __cuda_callable__
-SparseMatrixRowView< Real, SegmentView >::
-SparseMatrixRowView( const SegmentView& segmentView,
-                     const ValuesView& values,
-                     const ColumnIndexesView& columnIndexes )
+SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView >::
+SparseMatrixRowView( const SegmentViewType& segmentView,
+                     const ValuesViewType& values,
+                     const ColumnsIndexesViewType& columnIndexes )
  : segmentView( segmentView ), values( values ), columnIndexes( columnIndexes )
 {
 }
 
-template< typename Real,
-          typename SegmentView >
+template< typename SegmentView,
+          typename ValuesView,
+          typename ColumnsIndexesView >
 __cuda_callable__ auto
-SparseMatrixRowView< Real, SegmentView >::
+SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView >::
 getSize() const -> IndexType
 {
    return segmentView.getSize();
 }
 
-template< typename Real,
-          typename SegmentView >
+template< typename SegmentView,
+          typename ValuesView,
+          typename ColumnsIndexesView >
 __cuda_callable__ auto
-SparseMatrixRowView< Real, SegmentView >::
+SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView >::
 getColumnIndex( const IndexType localIdx ) const -> const IndexType&
 {
    TNL_ASSERT_LT( localIdx, this->getSize(), "Local index exceeds matrix row capacity." );
    return columnIndexes[ segmentView.getGlobalIndex( localIdx ) ];
 }
 
-template< typename Real,
-          typename SegmentView >
+template< typename SegmentView,
+          typename ValuesView,
+          typename ColumnsIndexesView >
 __cuda_callable__ auto
-SparseMatrixRowView< Real, SegmentView >::
+SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView >::
 getColumnIndex( const IndexType localIdx ) -> IndexType&
 {
    TNL_ASSERT_LT( localIdx, this->getSize(), "Local index exceeds matrix row capacity." );
    return columnIndexes[ segmentView.getGlobalIndex( localIdx ) ];
 }
 
-template< typename Real,
-          typename SegmentView >
+template< typename SegmentView,
+          typename ValuesView,
+          typename ColumnsIndexesView >
 __cuda_callable__ auto
-SparseMatrixRowView< Real, SegmentView >::
+SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView >::
 getValue( const IndexType localIdx ) const -> const RealType&
 {
    TNL_ASSERT_LT( localIdx, this->getSize(), "Local index exceeds matrix row capacity." );
    return values[ segmentView.getGlobalIndex( localIdx ) ];
 }
 
-template< typename Real,
-          typename SegmentView >
+template< typename SegmentView,
+          typename ValuesView,
+          typename ColumnsIndexesView >
 __cuda_callable__ auto
-SparseMatrixRowView< Real, SegmentView >::
+SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView >::
 getValue( const IndexType localIdx ) -> RealType&
 {
    TNL_ASSERT_LT( localIdx, this->getSize(), "Local index exceeds matrix row capacity." );
    return values[ segmentView.getGlobalIndex( localIdx ) ];
 }
 
-template< typename Real,
-          typename SegmentView >
+template< typename SegmentView,
+          typename ValuesView,
+          typename ColumnsIndexesView >
 __cuda_callable__ void 
-SparseMatrixRowView< Real, SegmentView >::
+SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView >::
 setElement( const IndexType localIdx,
             const IndexType column,
             const RealType& value )
diff --git a/src/TNL/Matrices/SparseMatrixView.h b/src/TNL/Matrices/SparseMatrixView.h
index a674ee807..714692df8 100644
--- a/src/TNL/Matrices/SparseMatrixView.h
+++ b/src/TNL/Matrices/SparseMatrixView.h
@@ -32,15 +32,16 @@ class SparseMatrixView : public MatrixView< Real, Device, Index >
       template< typename Device_, typename Index_ >
       using SegmentsViewTemplate = SegmentsView< Device_, Index_ >;
       using SegmentsViewType = SegmentsView< Device, Index >;
+      using SegmentViewType = typename SegmentsViewType::SegmentViewType;
       using DeviceType = Device;
       using IndexType = Index;
       using RowsCapacitiesView = Containers::VectorView< IndexType, DeviceType, IndexType >;
       using ConstRowsCapacitiesView = typename RowsCapacitiesView::ConstViewType;
       using ValuesViewType = typename MatrixView< Real, Device, Index >::ValuesView;
-      using ColumnsViewType = Containers::VectorView< IndexType, DeviceType, IndexType >;
+      using ColumnsIndexesViewType = Containers::VectorView< IndexType, DeviceType, IndexType >;
       using ViewType = SparseMatrixView< typename std::remove_const< Real >::type, Device, Index, MatrixType, SegmentsViewTemplate >;
       using ConstViewType = SparseMatrixView< typename std::add_const< Real >::type, Device, Index, MatrixType, SegmentsViewTemplate >;
-      using RowView = SparseMatrixRowView< RealType, SegmentsViewType >;
+      using RowView = SparseMatrixRowView< SegmentViewType, ValuesViewType, ColumnsIndexesViewType >;
 
       // TODO: remove this - it is here only for compatibility with original matrix implementation
       typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector;
@@ -55,9 +56,9 @@ class SparseMatrixView : public MatrixView< Real, Device, Index >
       __cuda_callable__
       SparseMatrixView( const IndexType rows,
                         const IndexType columns,
-                        ValuesViewType& values,
-                        ColumnsViewType& columnIndexes,
-                        SegmentsViewType& segments );
+                        const ValuesViewType& values,
+                        const ColumnsIndexesViewType& columnIndexes,
+                        const SegmentsViewType& segments );
 
       __cuda_callable__
       SparseMatrixView( const SparseMatrixView& m ) = default;
@@ -204,7 +205,7 @@ class SparseMatrixView : public MatrixView< Real, Device, Index >
 
    protected:
 
-      ColumnsViewType columnIndexes;
+      ColumnsIndexesViewType columnIndexes;
 
       SegmentsViewType segments;
 };
diff --git a/src/TNL/Matrices/SparseMatrixView.hpp b/src/TNL/Matrices/SparseMatrixView.hpp
index 3f9743124..5ac494a9b 100644
--- a/src/TNL/Matrices/SparseMatrixView.hpp
+++ b/src/TNL/Matrices/SparseMatrixView.hpp
@@ -37,9 +37,9 @@ __cuda_callable__
 SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
 SparseMatrixView( const IndexType rows,
                   const IndexType columns,
-                  ValuesViewType& values,
-                  ColumnsViewType& columnIndexes,
-                  SegmentsViewType& segments )
+                  const ValuesViewType& values,
+                  const ColumnsIndexesViewType& columnIndexes,
+                  const SegmentsViewType& segments )
  : MatrixView< Real, Device, Index >( rows, columns, values ), columnIndexes( columnIndexes ), segments( segments )
 {
 }
@@ -57,7 +57,7 @@ getView() -> ViewType
    return ViewType( this->getRows(), 
                     this->getColumns(),
                     this->getValues().getView(),
-                    this->getColumnsIndexes().getView(),
+                    this->columnIndexes.getView(),
                     this->segments.getView() );
 }
 
@@ -89,7 +89,7 @@ getSerializationType()
 {
    return String( "Matrices::SparseMatrix< " ) +
              TNL::getSerializationType< RealType >() + ", " +
-             TNL::getSerializationType< SegmentsView >() + ", [any_device], " +
+             TNL::getSerializationType< SegmentsViewType >() + ", [any_device], " +
              TNL::getSerializationType< IndexType >() + ", [any_allocator] >";
 }
 
@@ -648,7 +648,7 @@ void
 SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
 save( File& file ) const
 {
-   Matrix< RealType, DeviceType, IndexType >::save( file );
+   MatrixView< RealType, DeviceType, IndexType >::save( file );
    file << this->columnIndexes;
    this->segments.save( file );
 }
diff --git a/src/UnitTests/Matrices/SparseMatrixTest.hpp b/src/UnitTests/Matrices/SparseMatrixTest.hpp
index 07a60178f..72dfc90e8 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest.hpp
+++ b/src/UnitTests/Matrices/SparseMatrixTest.hpp
@@ -11,6 +11,7 @@
 #include <TNL/Containers/Vector.h>
 #include <TNL/Containers/VectorView.h>
 #include <TNL/Math.h>
+#include <TNL/Algorithms/ParallelFor.h>
 #include <iostream>
 
 // Temporary, until test_OperatorEquals doesn't work for all formats.
@@ -249,6 +250,232 @@ void test_Reset()
     EXPECT_EQ( m.getColumns(), 0 );
 }
 
+template< typename Matrix >
+void test_GetRow()
+{
+    using RealType = typename Matrix::RealType;
+    using DeviceType = typename Matrix::DeviceType;
+    using IndexType = typename Matrix::IndexType;
+
+/*
+ * Sets up the following 10x10 sparse matrix:
+ *
+ *    /  1  0  2  0  3  0  4  0  0  0  \
+ *    |  5  6  7  0  0  0  0  0  0  0  |
+ *    |  8  9 10 11 12 13 14 15  0  0  |
+ *    | 16 17  0  0  0  0  0  0  0  0  |
+ *    | 18  0  0  0  0  0  0  0  0  0  |
+ *    | 19  0  0  0  0  0  0  0  0  0  |
+ *    | 20  0  0  0  0  0  0  0  0  0  |
+ *    | 21  0  0  0  0  0  0  0  0  0  |
+ *    | 22 23 24 25 26 27 28 29 30 31  |
+ *    \ 32 33 34 35 36 37 38 39 40 41 /
+ */
+
+    const IndexType rows = 10;
+    const IndexType cols = 10;
+
+    Matrix m( rows, cols );
+
+    typename Matrix::CompressedRowLengthsVector rowLengths;
+    rowLengths.setSize( rows );
+    rowLengths.setElement( 0, 4 );
+    rowLengths.setElement( 1, 3 );
+    rowLengths.setElement( 2, 8 );
+    rowLengths.setElement( 3, 2 );
+    for( IndexType i = 4; i < rows - 2; i++ )
+    {
+        rowLengths.setElement( i, 1 );
+    }
+    rowLengths.setElement( 8, 10 );
+    rowLengths.setElement( 9, 10 );
+    m.setCompressedRowLengths( rowLengths );
+
+    /*RealType value = 1;
+    for( IndexType i = 0; i < 4; i++ )
+        m.setElement( 0, 2 * i, value++ );
+
+    for( IndexType i = 0; i < 3; i++ )
+        m.setElement( 1, i, value++ );
+
+    for( IndexType i = 0; i < 8; i++ )
+        m.setElement( 2, i, value++ );
+
+    for( IndexType i = 0; i < 2; i++ )
+        m.setElement( 3, i, value++ );
+
+    for( IndexType i = 4; i < 8; i++ )
+        m.setElement( i, 0, value++ );
+
+    for( IndexType j = 8; j < rows; j++)
+    {
+        for( IndexType i = 0; i < cols; i++ )
+            m.setElement( j, i, value++ );
+    }*/
+    auto matrixView = m.getView();
+    auto f = [=] __cuda_callable__ ( const IndexType rowIdx ) mutable {
+       auto row = matrixView.getRow( rowIdx );
+       RealType val;
+       switch( rowIdx )
+       {
+          case 0:
+            val = 1;
+            for( IndexType i = 0; i < 4; i++ )
+               row.setElement( i, 2 * i, val++ );
+            break;
+         case 1:
+            val = 5;
+            for( IndexType i = 0; i < 3; i++ )
+               row.setElement( i, i, val++ );
+            break;
+         case 2:
+            val = 8;
+            for( IndexType i = 0; i < 8; i++ )
+               row.setElement( i, i, val++ );
+            break;
+         case 3:
+            val = 16;
+            for( IndexType i = 0; i < 2; i++ )
+               row.setElement( i, i, val++ );
+            break;
+         case 4:
+            row.setElement( 0, 0, 18 );
+            break;
+         case 5:
+            row.setElement( 0, 0, 19 );
+            break;
+         case 6:
+            row.setElement( 0, 0, 20 );
+            break;
+         case 7:
+            row.setElement( 0, 0, 21 );
+            break;
+         case 8:
+             val = 22;
+             for( IndexType i = 0; i < rows; i++ )
+                row.setElement( i, i, val++ );
+             break;
+         case 9:
+             val = 32;
+             for( IndexType i = 0; i < rows; i++ )
+                row.setElement( i, i, val++ );
+             break;
+       }
+    };
+    TNL::Algorithms::ParallelFor< DeviceType >::exec( ( IndexType ) 0, rows, f );
+
+    EXPECT_EQ( m.getElement( 0, 0 ),  1 );
+    EXPECT_EQ( m.getElement( 0, 1 ),  0 );
+    EXPECT_EQ( m.getElement( 0, 2 ),  2 );
+    EXPECT_EQ( m.getElement( 0, 3 ),  0 );
+    EXPECT_EQ( m.getElement( 0, 4 ),  3 );
+    EXPECT_EQ( m.getElement( 0, 5 ),  0 );
+    EXPECT_EQ( m.getElement( 0, 6 ),  4 );
+    EXPECT_EQ( m.getElement( 0, 7 ),  0 );
+    EXPECT_EQ( m.getElement( 0, 8 ),  0 );
+    EXPECT_EQ( m.getElement( 0, 9 ),  0 );
+
+    EXPECT_EQ( m.getElement( 1, 0 ),  5 );
+    EXPECT_EQ( m.getElement( 1, 1 ),  6 );
+    EXPECT_EQ( m.getElement( 1, 2 ),  7 );
+    EXPECT_EQ( m.getElement( 1, 3 ),  0 );
+    EXPECT_EQ( m.getElement( 1, 4 ),  0 );
+    EXPECT_EQ( m.getElement( 1, 5 ),  0 );
+    EXPECT_EQ( m.getElement( 1, 6 ),  0 );
+    EXPECT_EQ( m.getElement( 1, 7 ),  0 );
+    EXPECT_EQ( m.getElement( 1, 8 ),  0 );
+    EXPECT_EQ( m.getElement( 1, 9 ),  0 );
+
+    EXPECT_EQ( m.getElement( 2, 0 ),  8 );
+    EXPECT_EQ( m.getElement( 2, 1 ),  9 );
+    EXPECT_EQ( m.getElement( 2, 2 ), 10 );
+    EXPECT_EQ( m.getElement( 2, 3 ), 11 );
+    EXPECT_EQ( m.getElement( 2, 4 ), 12 );
+    EXPECT_EQ( m.getElement( 2, 5 ), 13 );
+    EXPECT_EQ( m.getElement( 2, 6 ), 14 );
+    EXPECT_EQ( m.getElement( 2, 7 ), 15 );
+    EXPECT_EQ( m.getElement( 2, 8 ),  0 );
+    EXPECT_EQ( m.getElement( 2, 9 ),  0 );
+
+    EXPECT_EQ( m.getElement( 3, 0 ), 16 );
+    EXPECT_EQ( m.getElement( 3, 1 ), 17 );
+    EXPECT_EQ( m.getElement( 3, 2 ),  0 );
+    EXPECT_EQ( m.getElement( 3, 3 ),  0 );
+    EXPECT_EQ( m.getElement( 3, 4 ),  0 );
+    EXPECT_EQ( m.getElement( 3, 5 ),  0 );
+    EXPECT_EQ( m.getElement( 3, 6 ),  0 );
+    EXPECT_EQ( m.getElement( 3, 7 ),  0 );
+    EXPECT_EQ( m.getElement( 3, 8 ),  0 );
+    EXPECT_EQ( m.getElement( 3, 9 ),  0 );
+
+    EXPECT_EQ( m.getElement( 4, 0 ), 18 );
+    EXPECT_EQ( m.getElement( 4, 1 ),  0 );
+    EXPECT_EQ( m.getElement( 4, 2 ),  0 );
+    EXPECT_EQ( m.getElement( 4, 3 ),  0 );
+    EXPECT_EQ( m.getElement( 4, 4 ),  0 );
+    EXPECT_EQ( m.getElement( 4, 5 ),  0 );
+    EXPECT_EQ( m.getElement( 4, 6 ),  0 );
+    EXPECT_EQ( m.getElement( 4, 7 ),  0 );
+    EXPECT_EQ( m.getElement( 4, 8 ),  0 );
+    EXPECT_EQ( m.getElement( 4, 9 ),  0 );
+
+    EXPECT_EQ( m.getElement( 5, 0 ), 19 );
+    EXPECT_EQ( m.getElement( 5, 1 ),  0 );
+    EXPECT_EQ( m.getElement( 5, 2 ),  0 );
+    EXPECT_EQ( m.getElement( 5, 3 ),  0 );
+    EXPECT_EQ( m.getElement( 5, 4 ),  0 );
+    EXPECT_EQ( m.getElement( 5, 5 ),  0 );
+    EXPECT_EQ( m.getElement( 5, 6 ),  0 );
+    EXPECT_EQ( m.getElement( 5, 7 ),  0 );
+    EXPECT_EQ( m.getElement( 5, 8 ),  0 );
+    EXPECT_EQ( m.getElement( 5, 9 ),  0 );
+
+    EXPECT_EQ( m.getElement( 6, 0 ), 20 );
+    EXPECT_EQ( m.getElement( 6, 1 ),  0 );
+    EXPECT_EQ( m.getElement( 6, 2 ),  0 );
+    EXPECT_EQ( m.getElement( 6, 3 ),  0 );
+    EXPECT_EQ( m.getElement( 6, 4 ),  0 );
+    EXPECT_EQ( m.getElement( 6, 5 ),  0 );
+    EXPECT_EQ( m.getElement( 6, 6 ),  0 );
+    EXPECT_EQ( m.getElement( 6, 7 ),  0 );
+    EXPECT_EQ( m.getElement( 6, 8 ),  0 );
+    EXPECT_EQ( m.getElement( 6, 9 ),  0 );
+
+    EXPECT_EQ( m.getElement( 7, 0 ), 21 );
+    EXPECT_EQ( m.getElement( 7, 1 ),  0 );
+    EXPECT_EQ( m.getElement( 7, 2 ),  0 );
+    EXPECT_EQ( m.getElement( 7, 3 ),  0 );
+    EXPECT_EQ( m.getElement( 7, 4 ),  0 );
+    EXPECT_EQ( m.getElement( 7, 5 ),  0 );
+    EXPECT_EQ( m.getElement( 7, 6 ),  0 );
+    EXPECT_EQ( m.getElement( 7, 7 ),  0 );
+    EXPECT_EQ( m.getElement( 7, 8 ),  0 );
+    EXPECT_EQ( m.getElement( 7, 9 ),  0 );
+
+    EXPECT_EQ( m.getElement( 8, 0 ), 22 );
+    EXPECT_EQ( m.getElement( 8, 1 ), 23 );
+    EXPECT_EQ( m.getElement( 8, 2 ), 24 );
+    EXPECT_EQ( m.getElement( 8, 3 ), 25 );
+    EXPECT_EQ( m.getElement( 8, 4 ), 26 );
+    EXPECT_EQ( m.getElement( 8, 5 ), 27 );
+    EXPECT_EQ( m.getElement( 8, 6 ), 28 );
+    EXPECT_EQ( m.getElement( 8, 7 ), 29 );
+    EXPECT_EQ( m.getElement( 8, 8 ), 30 );
+    EXPECT_EQ( m.getElement( 8, 9 ), 31 );
+
+    EXPECT_EQ( m.getElement( 9, 0 ), 32 );
+    EXPECT_EQ( m.getElement( 9, 1 ), 33 );
+    EXPECT_EQ( m.getElement( 9, 2 ), 34 );
+    EXPECT_EQ( m.getElement( 9, 3 ), 35 );
+    EXPECT_EQ( m.getElement( 9, 4 ), 36 );
+    EXPECT_EQ( m.getElement( 9, 5 ), 37 );
+    EXPECT_EQ( m.getElement( 9, 6 ), 38 );
+    EXPECT_EQ( m.getElement( 9, 7 ), 39 );
+    EXPECT_EQ( m.getElement( 9, 8 ), 40 );
+    EXPECT_EQ( m.getElement( 9, 9 ), 41 );
+}
+
+
 template< typename Matrix >
 void test_SetElement()
 {
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h
index 353dcdbb0..e86e34f0a 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h
+++ b/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h
@@ -94,6 +94,14 @@ TYPED_TEST( CSRMatrixTest, resetTest )
     test_Reset< CSRMatrixType >();
 }
 
+TYPED_TEST( CSRMatrixTest, getRowTest )
+{
+    using CSRMatrixType = typename TestFixture::CSRMatrixType;
+
+    test_GetRow< CSRMatrixType >();
+}
+
+
 TYPED_TEST( CSRMatrixTest, setElementTest )
 {
     using CSRMatrixType = typename TestFixture::CSRMatrixType;
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h
index b7dc33834..f597e3199 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h
+++ b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h
@@ -105,6 +105,13 @@ TYPED_TEST( EllpackMatrixTest, resetTest )
     test_Reset< EllpackMatrixType >();
 }
 
+TYPED_TEST( EllpackMatrixTest, getRowTest )
+{
+    using EllpackMatrixType = typename TestFixture::EllpackMatrixType;
+
+    test_GetRow< EllpackMatrixType >();
+}
+
 TYPED_TEST( EllpackMatrixTest, setElementTest )
 {
     using EllpackMatrixType = typename TestFixture::EllpackMatrixType;
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h
index b2404fe68..172ed722a 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h
+++ b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h
@@ -106,6 +106,13 @@ TYPED_TEST( SlicedEllpackMatrixTest, resetTest )
     test_Reset< SlicedEllpackMatrixType >();
 }
 
+TYPED_TEST( SlicedEllpackMatrixTest, getRowTest )
+{
+    using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
+
+    test_GetRow< SlicedEllpackMatrixType >();
+}
+
 TYPED_TEST( SlicedEllpackMatrixTest, setElementTest )
 {
     using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
-- 
GitLab


From 30a8311f9609c586166c50ae852930ceff6c1944 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Sun, 29 Dec 2019 22:49:02 +0100
Subject: [PATCH 045/179] Changing SpMV benchmark for testing new sparse matrix
 implementation.

---
 src/Benchmarks/SpMV/spmv.h | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/src/Benchmarks/SpMV/spmv.h b/src/Benchmarks/SpMV/spmv.h
index 8a1b0614e..8a222c7b5 100644
--- a/src/Benchmarks/SpMV/spmv.h
+++ b/src/Benchmarks/SpMV/spmv.h
@@ -287,15 +287,20 @@ benchmarkSpmvSynthetic( Benchmark& benchmark,
                         bool verboseMR )
 {
    benchmarkSpMV< Real, Matrices::CSR >( benchmark, inputFileName, verboseMR );
+   benchmarkSpMV< Real, SparseMatrix_CSR >( benchmark, inputFileName, verboseMR );
+   
    benchmarkSpMV< Real, Matrices::Ellpack >( benchmark, inputFileName, verboseMR );
-   //benchmarkSpMV< Real, SlicedEllpackAlias >( benchmark, inputFileName, verboseMR );
+   benchmarkSpMV< Real, SparseMatrix_Ellpack >( benchmark, inputFileName, verboseMR );
+   
+   benchmarkSpMV< Real, SlicedEllpackAlias >( benchmark, inputFileName, verboseMR );
+   benchmarkSpMV< Real, SparseMatrix_SlicedEllpack >( benchmark, inputFileName, verboseMR );
    //benchmarkSpMV< Real, Matrices::ChunkedEllpack >( benchmark, inputFileName, verboseMR );
 
    ////
    // Segments based sparse matrices
-   benchmarkSpMV< Real, SparseMatrix_CSR >( benchmark, inputFileName, verboseMR );
-   benchmarkSpMV< Real, SparseMatrix_Ellpack >( benchmark, inputFileName, verboseMR );
-   //benchmarkSpMV< Real, SparseMatrix_SlicedEllpack >( benchmark, inputFileName, verboseMR );
+   
+   
+   //
 
    // AdEllpack is broken
    // benchmarkSpMV< Real, Matrices::AdEllpack >( benchmark, inputFileName, verboseMR );
-- 
GitLab


From e07a01684e5f10209a6660c8cfb4e9b2ebcd20db Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Mon, 30 Dec 2019 15:20:48 +0100
Subject: [PATCH 046/179] Added boolean compute to stop segment reduction.

---
 src/TNL/Containers/Segments/CSR.hpp           |  7 +++---
 src/TNL/Containers/Segments/CSRView.hpp       |  7 +++---
 src/TNL/Containers/Segments/Ellpack.hpp       | 13 ++++++-----
 src/TNL/Containers/Segments/EllpackView.hpp   | 13 ++++++-----
 src/TNL/Containers/Segments/SlicedEllpack.hpp | 14 +++++++-----
 .../Containers/Segments/SlicedEllpackView.hpp | 22 +++++++++++--------
 src/TNL/Matrices/SparseMatrix.hpp             |  7 +++---
 src/TNL/Matrices/SparseMatrixView.hpp         |  5 +++--
 .../Containers/Segments/SegmentsTest.hpp      |  2 +-
 9 files changed, 51 insertions(+), 39 deletions(-)

diff --git a/src/TNL/Containers/Segments/CSR.hpp b/src/TNL/Containers/Segments/CSR.hpp
index 9ab2186c3..83da548fc 100644
--- a/src/TNL/Containers/Segments/CSR.hpp
+++ b/src/TNL/Containers/Segments/CSR.hpp
@@ -218,14 +218,15 @@ void
 CSR< Device, Index, IndexAllocator >::
 segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
 {
-   using RealType = decltype( fetch( IndexType(), IndexType() ) );
+   using RealType = decltype( fetch( IndexType(), IndexType(), std::declval< bool& >(), args... ) );
    const auto offsetsView = this->offsets.getConstView();
    auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable {
       const IndexType begin = offsetsView[ i ];
       const IndexType end = offsetsView[ i + 1 ];
       RealType aux( zero );
-      for( IndexType j = begin; j < end; j++  )
-         reduction( aux, fetch( i, j, args... ) );
+      bool compute( true );
+      for( IndexType j = begin; j < end && compute; j++  )
+         reduction( aux, fetch( i, j, compute, args... ) );
       keeper( i, aux );
    };
    Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
diff --git a/src/TNL/Containers/Segments/CSRView.hpp b/src/TNL/Containers/Segments/CSRView.hpp
index f4f59370d..b4304ee32 100644
--- a/src/TNL/Containers/Segments/CSRView.hpp
+++ b/src/TNL/Containers/Segments/CSRView.hpp
@@ -204,14 +204,15 @@ void
 CSRView< Device, Index >::
 segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
 {
-   using RealType = decltype( fetch( IndexType(), IndexType() ) );
+   using RealType = decltype( fetch( IndexType(), IndexType(), std::declval< bool& >(), args... ) );
    const auto offsetsView = this->offsets.getConstView();
    auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable {
       const IndexType begin = offsetsView[ i ];
       const IndexType end = offsetsView[ i + 1 ];
       RealType aux( zero );
-      for( IndexType j = begin; j < end; j++  )
-         reduction( aux, fetch( i, j, args... ) );
+      bool compute( true );
+      for( IndexType j = begin; j < end && compute; j++  )
+         reduction( aux, fetch( i, j, compute, args... ) );
       keeper( i, aux );
    };
    Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
diff --git a/src/TNL/Containers/Segments/Ellpack.hpp b/src/TNL/Containers/Segments/Ellpack.hpp
index 9f7702a6f..ebc2b360e 100644
--- a/src/TNL/Containers/Segments/Ellpack.hpp
+++ b/src/TNL/Containers/Segments/Ellpack.hpp
@@ -306,31 +306,32 @@ void
 Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >::
 segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
 {
+   using RealType = decltype( fetch( IndexType(), IndexType(), std::declval< bool& >(), args... ) );
    if( RowMajorOrder )
    {
-      using RealType = decltype( fetch( IndexType(), IndexType() ) );
       const IndexType segmentSize = this->segmentSize;
       auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable {
          const IndexType begin = i * segmentSize;
          const IndexType end = begin + segmentSize;
          RealType aux( zero );
-         for( IndexType j = begin; j < end; j++  )
-            reduction( aux, fetch( i, j, args... ) );
+         bool compute( true );
+         for( IndexType j = begin; j < end && compute; j++  )
+            reduction( aux, fetch( i, j, compute, args... ) );
          keeper( i, aux );
       };
       Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
    }
    else
    {
-      using RealType = decltype( fetch( IndexType(), IndexType() ) );
       const IndexType storageSize = this->getStorageSize();
       const IndexType alignedSize = this->alignedSize;
       auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable {
          const IndexType begin = i;
          const IndexType end = storageSize;
          RealType aux( zero );
-         for( IndexType j = begin; j < end; j += alignedSize  )
-            reduction( aux, fetch( i, j, args... ) );
+         bool compute( true );
+         for( IndexType j = begin; j < end && compute; j += alignedSize  )
+            reduction( aux, fetch( i, j, compute, args... ) );
          keeper( i, aux );
       };
       Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
diff --git a/src/TNL/Containers/Segments/EllpackView.hpp b/src/TNL/Containers/Segments/EllpackView.hpp
index f5dba4f3d..dc6bd485d 100644
--- a/src/TNL/Containers/Segments/EllpackView.hpp
+++ b/src/TNL/Containers/Segments/EllpackView.hpp
@@ -245,31 +245,32 @@ void
 EllpackView< Device, Index, RowMajorOrder, Alignment >::
 segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
 {
+   using RealType = decltype( fetch( IndexType(), IndexType(), std::declval< bool& >(), args... ) );
    if( RowMajorOrder )
    {
-      using RealType = decltype( fetch( IndexType(), IndexType() ) );
       const IndexType segmentSize = this->segmentSize;
       auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable {
          const IndexType begin = i * segmentSize;
          const IndexType end = begin + segmentSize;
          RealType aux( zero );
-         for( IndexType j = begin; j < end; j++  )
-            reduction( aux, fetch( i, j, args... ) );
+         bool compute( true );
+         for( IndexType j = begin; j < end && compute; j++  )
+            reduction( aux, fetch( i, j, compute, args... ) );
          keeper( i, aux );
       };
       Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
    }
    else
    {
-      using RealType = decltype( fetch( IndexType(), IndexType() ) );
       const IndexType storageSize = this->getStorageSize();
       const IndexType alignedSize = this->alignedSize;
       auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable {
          const IndexType begin = i;
          const IndexType end = storageSize;
          RealType aux( zero );
-         for( IndexType j = begin; j < end; j += alignedSize  )
-            reduction( aux, fetch( i, j, args... ) );
+         bool compute( true );
+         for( IndexType j = begin; j < end && compute; j += alignedSize  )
+            reduction( aux, fetch( i, j, compute, args... ) );
          keeper( i, aux );
       };
       Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
diff --git a/src/TNL/Containers/Segments/SlicedEllpack.hpp b/src/TNL/Containers/Segments/SlicedEllpack.hpp
index e2aec924d..ecd32abb2 100644
--- a/src/TNL/Containers/Segments/SlicedEllpack.hpp
+++ b/src/TNL/Containers/Segments/SlicedEllpack.hpp
@@ -127,7 +127,7 @@ setSegmentsSizes( const SizesHolder& sizes )
    const auto sizes_view = sizes.getConstView();
    auto slices_view = this->sliceOffsets.getView();
    auto slice_segment_size_view = this->sliceSegmentSizes.getView();
-   auto fetch = [=] __cuda_callable__ ( IndexType segmentIdx, IndexType globalIdx ) -> IndexType {
+   auto fetch = [=] __cuda_callable__ ( IndexType segmentIdx, IndexType globalIdx, bool& compute ) -> IndexType {
       if( globalIdx < _size )
          return sizes_view[ globalIdx ];
       return 0;
@@ -341,7 +341,7 @@ void
 SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >::
 segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
 {
-   using RealType = decltype( fetch( IndexType(), IndexType() ) );
+   using RealType = decltype( fetch( IndexType(), IndexType(), std::declval< bool& >(), args... ) );
    const auto sliceSegmentSizes_view = this->sliceSegmentSizes.getConstView();
    const auto sliceOffsets_view = this->sliceOffsets.getConstView();
    if( RowMajorOrder )
@@ -353,8 +353,9 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red
          const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx * segmentSize;
          const IndexType end = begin + segmentSize;
          RealType aux( zero );
-         for( IndexType globalIdx = begin; globalIdx< end; globalIdx++  )
-            reduction( aux, fetch( segmentIdx, globalIdx, args... ) );
+         bool compute( true );
+         for( IndexType globalIdx = begin; globalIdx< end && compute; globalIdx++  )
+            reduction( aux, fetch( segmentIdx, globalIdx, compute, args... ) );
          keeper( segmentIdx, aux );
       };
       Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
@@ -368,8 +369,9 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red
          const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx;
          const IndexType end = sliceOffsets_view[ sliceIdx + 1 ];
          RealType aux( zero );
-         for( IndexType globalIdx = begin; globalIdx < end; globalIdx += SliceSize  )
-            reduction( aux, fetch( segmentIdx, globalIdx, args... ) );
+         bool compute( true );
+         for( IndexType globalIdx = begin; globalIdx < end && compute; globalIdx += SliceSize  )
+            reduction( aux, fetch( segmentIdx, globalIdx, compute, args... ) );
          keeper( segmentIdx, aux );
       };
       Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
diff --git a/src/TNL/Containers/Segments/SlicedEllpackView.hpp b/src/TNL/Containers/Segments/SlicedEllpackView.hpp
index 139a09a15..41b49ed15 100644
--- a/src/TNL/Containers/Segments/SlicedEllpackView.hpp
+++ b/src/TNL/Containers/Segments/SlicedEllpackView.hpp
@@ -247,8 +247,9 @@ forSegments( IndexType first, IndexType last, Function& f, Args... args ) const
          const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx * segmentSize;
          const IndexType end = begin + segmentSize;
          IndexType localIdx( 0 );
-         for( IndexType globalIdx = begin; globalIdx < end; globalIdx++  )
-            if( ! f( segmentIdx, localIdx++, globalIdx, args... ) )
+         bool compute( true );
+         for( IndexType globalIdx = begin; globalIdx < end && compute; globalIdx++  )
+            if( ! f( segmentIdx, localIdx++, globalIdx, compute, args... ) )
                break;
       };
       Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
@@ -262,8 +263,9 @@ forSegments( IndexType first, IndexType last, Function& f, Args... args ) const
          const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx;
          const IndexType end = sliceOffsets_view[ sliceIdx + 1 ];
          IndexType localIdx( 0 );
-         for( IndexType globalIdx = begin; globalIdx < end; globalIdx += SliceSize )
-            if( ! f( segmentIdx, localIdx++, globalIdx, args... ) )
+         bool compute( true );
+         for( IndexType globalIdx = begin; globalIdx < end && compute; globalIdx += SliceSize )
+            if( ! f( segmentIdx, localIdx++, globalIdx, compute, args... ) )
                break;
       };
       Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
@@ -291,7 +293,7 @@ void
 SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >::
 segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
 {
-   using RealType = decltype( fetch( IndexType(), IndexType() ) );
+   using RealType = decltype( fetch( IndexType(), IndexType(), std::declval< bool& >(), args... ) );
    const auto sliceSegmentSizes_view = this->sliceSegmentSizes.getConstView();
    const auto sliceOffsets_view = this->sliceOffsets.getConstView();
    if( RowMajorOrder )
@@ -303,8 +305,9 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red
          const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx * segmentSize;
          const IndexType end = begin + segmentSize;
          RealType aux( zero );
-         for( IndexType globalIdx = begin; globalIdx< end; globalIdx++  )
-            reduction( aux, fetch( segmentIdx, globalIdx, args... ) );
+         bool compute( true );
+         for( IndexType globalIdx = begin; globalIdx< end && compute; globalIdx++  )
+            reduction( aux, fetch( segmentIdx, globalIdx, compute, args... ) );
          keeper( segmentIdx, aux );
       };
       Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
@@ -318,8 +321,9 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red
          const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx;
          const IndexType end = sliceOffsets_view[ sliceIdx + 1 ];
          RealType aux( zero );
-         for( IndexType globalIdx = begin; globalIdx < end; globalIdx += SliceSize  )
-            reduction( aux, fetch( segmentIdx, globalIdx, args... ) );
+         bool compute( true );
+         for( IndexType globalIdx = begin; globalIdx < end && compute; globalIdx += SliceSize  )
+            reduction( aux, fetch( segmentIdx, globalIdx, compute, args... ) );
          keeper( segmentIdx, aux );
       };
       Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp
index c0dd3b9a3..691157a9c 100644
--- a/src/TNL/Matrices/SparseMatrix.hpp
+++ b/src/TNL/Matrices/SparseMatrix.hpp
@@ -628,9 +628,10 @@ vectorProduct( const InVector& inVector,
    const auto valuesView = this->values.getConstView();
    const auto columnIndexesView = this->columnIndexes.getConstView();
    const IndexType paddingIndex = this->getPaddingIndex();
-   auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType offset ) -> RealType {
+   auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType offset, bool& compute ) -> RealType {
       const IndexType column = columnIndexesView[ offset ];
-      if( column == paddingIndex )
+      compute = ( column != paddingIndex );
+      if( ! compute )
          return 0.0;
       return valuesView[ offset ] * inVectorView[ column ];
    };
@@ -658,7 +659,7 @@ rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Ke
    const auto columns_view = this->columnIndexes.getConstView();
    const auto values_view = this->values.getConstView();
    const IndexType paddingIndex_ = this->getPaddingIndex();
-   auto fetch_ = [=] __cuda_callable__ ( IndexType rowIdx, IndexType globalIdx ) mutable -> decltype( fetch( IndexType(), IndexType(), RealType() ) ) {
+   auto fetch_ = [=] __cuda_callable__ ( IndexType rowIdx, IndexType globalIdx, bool& compute ) mutable -> decltype( fetch( IndexType(), IndexType(), RealType() ) ) {
       IndexType columnIdx = columns_view[ globalIdx ];
       if( columnIdx != paddingIndex_ )
          return fetch( rowIdx, columnIdx, values_view[ globalIdx ] );
diff --git a/src/TNL/Matrices/SparseMatrixView.hpp b/src/TNL/Matrices/SparseMatrixView.hpp
index 5ac494a9b..ce0e7aa18 100644
--- a/src/TNL/Matrices/SparseMatrixView.hpp
+++ b/src/TNL/Matrices/SparseMatrixView.hpp
@@ -508,9 +508,10 @@ vectorProduct( const InVector& inVector,
    const auto valuesView = this->values.getConstView();
    const auto columnIndexesView = this->columnIndexes.getConstView();
    const IndexType paddingIndex = this->getPaddingIndex();
-   auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType offset ) -> RealType {
+   auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType offset, bool& compute ) -> RealType {
       const IndexType column = columnIndexesView[ offset ];
-      if( column == paddingIndex )
+      compute = ( column != paddingIndex );
+      if( ! compute )
          return 0.0;
       return valuesView[ offset ] * inVectorView[ column ];
    };
diff --git a/src/UnitTests/Containers/Segments/SegmentsTest.hpp b/src/UnitTests/Containers/Segments/SegmentsTest.hpp
index 5e74f96b0..6189c2e9a 100644
--- a/src/UnitTests/Containers/Segments/SegmentsTest.hpp
+++ b/src/UnitTests/Containers/Segments/SegmentsTest.hpp
@@ -143,7 +143,7 @@ void test_AllReduction_MaximumInSegments()
 
    const auto v_view = v.getConstView();
    auto result_view = result.getView();
-   auto fetch = [=] __cuda_callable__ ( IndexType segmentIdx, IndexType globalIdx ) -> IndexType {
+   auto fetch = [=] __cuda_callable__ ( IndexType segmentIdx, IndexType globalIdx, bool& compute ) -> IndexType {
       return v_view[ globalIdx ];
    };
    auto reduce = [] __cuda_callable__ ( IndexType& a, const IndexType b ) {
-- 
GitLab


From 9b45cc2e843ec3062224bb5a4873ea97ce3faf07 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Mon, 30 Dec 2019 17:10:43 +0100
Subject: [PATCH 047/179] Ignoring flag compute in SlicedEllpack -> it makes
 SlicedEllpack faster.

---
 src/TNL/Containers/Segments/SlicedEllpack.hpp     | 4 ++--
 src/TNL/Containers/Segments/SlicedEllpackView.hpp | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/TNL/Containers/Segments/SlicedEllpack.hpp b/src/TNL/Containers/Segments/SlicedEllpack.hpp
index ecd32abb2..b58b6a954 100644
--- a/src/TNL/Containers/Segments/SlicedEllpack.hpp
+++ b/src/TNL/Containers/Segments/SlicedEllpack.hpp
@@ -354,7 +354,7 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red
          const IndexType end = begin + segmentSize;
          RealType aux( zero );
          bool compute( true );
-         for( IndexType globalIdx = begin; globalIdx< end && compute; globalIdx++  )
+         for( IndexType globalIdx = begin; globalIdx< end; globalIdx++  )
             reduction( aux, fetch( segmentIdx, globalIdx, compute, args... ) );
          keeper( segmentIdx, aux );
       };
@@ -370,7 +370,7 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red
          const IndexType end = sliceOffsets_view[ sliceIdx + 1 ];
          RealType aux( zero );
          bool compute( true );
-         for( IndexType globalIdx = begin; globalIdx < end && compute; globalIdx += SliceSize  )
+         for( IndexType globalIdx = begin; globalIdx < end; globalIdx += SliceSize  )
             reduction( aux, fetch( segmentIdx, globalIdx, compute, args... ) );
          keeper( segmentIdx, aux );
       };
diff --git a/src/TNL/Containers/Segments/SlicedEllpackView.hpp b/src/TNL/Containers/Segments/SlicedEllpackView.hpp
index 41b49ed15..82570664f 100644
--- a/src/TNL/Containers/Segments/SlicedEllpackView.hpp
+++ b/src/TNL/Containers/Segments/SlicedEllpackView.hpp
@@ -306,7 +306,7 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red
          const IndexType end = begin + segmentSize;
          RealType aux( zero );
          bool compute( true );
-         for( IndexType globalIdx = begin; globalIdx< end && compute; globalIdx++  )
+         for( IndexType globalIdx = begin; globalIdx< end; globalIdx++  )
             reduction( aux, fetch( segmentIdx, globalIdx, compute, args... ) );
          keeper( segmentIdx, aux );
       };
@@ -322,7 +322,7 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red
          const IndexType end = sliceOffsets_view[ sliceIdx + 1 ];
          RealType aux( zero );
          bool compute( true );
-         for( IndexType globalIdx = begin; globalIdx < end && compute; globalIdx += SliceSize  )
+         for( IndexType globalIdx = begin; globalIdx < end; globalIdx += SliceSize  )
             reduction( aux, fetch( segmentIdx, globalIdx, compute, args... ) );
          keeper( segmentIdx, aux );
       };
-- 
GitLab


From 6bb460d87b0a34672d3602828b790a13369547e3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Mon, 30 Dec 2019 18:28:24 +0100
Subject: [PATCH 048/179] Original implementation of the sparse matrices was
 moved to Matrices/Legacy.

---
 src/Benchmarks/BLAS/spmv.h                           |  8 ++++----
 .../DistSpMV/tnl-benchmark-distributed-spmv.h        |  2 +-
 .../LinearSolvers/tnl-benchmark-linear-solvers.h     |  2 +-
 .../ODESolvers/tnl-benchmark-ode-solvers.h           |  2 +-
 src/Benchmarks/SpMV/spmv.h                           | 12 ++++++------
 src/Python/pytnl/tnl/SparseMatrix.cpp                |  6 +++---
 src/Python/pytnl/tnl/SparseMatrix.h                  |  2 +-
 src/TNL/Matrices/DistributedMatrix.h                 |  2 +-
 src/TNL/Matrices/{ => Legacy}/AdEllpack.h            |  4 ++--
 src/TNL/Matrices/{ => Legacy}/AdEllpack_impl.h       |  2 +-
 src/TNL/Matrices/{ => Legacy}/BiEllpack.h            |  4 ++--
 src/TNL/Matrices/{ => Legacy}/BiEllpackSymmetric.h   |  0
 .../Matrices/{ => Legacy}/BiEllpackSymmetric_impl.h  |  0
 src/TNL/Matrices/{ => Legacy}/BiEllpack_impl.h       |  2 +-
 src/TNL/Matrices/{ => Legacy}/CSR.h                  |  4 ++--
 src/TNL/Matrices/{ => Legacy}/CSR_impl.h             |  2 +-
 src/TNL/Matrices/{ => Legacy}/ChunkedEllpack.h       |  4 ++--
 src/TNL/Matrices/{ => Legacy}/ChunkedEllpack_impl.h  |  2 +-
 src/TNL/Matrices/{ => Legacy}/Ellpack.h              |  4 ++--
 src/TNL/Matrices/{ => Legacy}/EllpackSymmetric.h     |  0
 .../Matrices/{ => Legacy}/EllpackSymmetricGraph.h    |  0
 .../{ => Legacy}/EllpackSymmetricGraph_impl.h        |  0
 .../Matrices/{ => Legacy}/EllpackSymmetric_impl.h    |  0
 src/TNL/Matrices/{ => Legacy}/Ellpack_impl.h         |  2 +-
 src/TNL/Matrices/{ => Legacy}/SlicedEllpack.h        |  4 ++--
 .../Matrices/{ => Legacy}/SlicedEllpackSymmetric.h   |  0
 .../{ => Legacy}/SlicedEllpackSymmetricGraph.h       |  0
 .../{ => Legacy}/SlicedEllpackSymmetricGraph_impl.h  |  0
 .../{ => Legacy}/SlicedEllpackSymmetric_impl.h       |  0
 src/TNL/Matrices/{ => Legacy}/SlicedEllpack_impl.h   |  2 +-
 src/TNL/Matrices/{ => Legacy}/Sparse.h               |  4 ++--
 src/TNL/Matrices/{ => Legacy}/SparseRow.h            |  2 +-
 src/TNL/Matrices/{ => Legacy}/SparseRow_impl.h       |  2 +-
 src/TNL/Matrices/{ => Legacy}/Sparse_impl.h          |  0
 src/TNL/Problems/HeatEquationProblem.h               |  2 +-
 src/TNL/Problems/PDEProblem.h                        |  2 +-
 src/TNL/Solvers/Linear/Preconditioners/ILU0.h        |  2 +-
 src/TNL/Solvers/Linear/Preconditioners/ILUT.h        |  2 +-
 src/TNL/Solvers/SolverConfig_impl.h                  |  3 +--
 src/UnitTests/Matrices/DistributedMatrixTest.h       |  2 +-
 src/UnitTests/Matrices/SparseMatrixCopyTest.h        |  6 +++---
 src/UnitTests/Matrices/SparseMatrixTest.h            |  2 +-
 src/UnitTests/Matrices/SparseMatrixTest.hpp          |  6 +++---
 src/UnitTests/Matrices/SparseMatrixTest_AdEllpack.h  |  2 +-
 src/UnitTests/Matrices/SparseMatrixTest_BiEllpack.h  |  2 +-
 src/UnitTests/Matrices/SparseMatrixTest_CSR.h        |  2 +-
 .../Matrices/SparseMatrixTest_ChunkedEllpack.h       |  2 +-
 src/UnitTests/Matrices/SparseMatrixTest_Ellpack.h    |  2 +-
 48 files changed, 58 insertions(+), 59 deletions(-)
 rename src/TNL/Matrices/{ => Legacy}/AdEllpack.h (99%)
 rename src/TNL/Matrices/{ => Legacy}/AdEllpack_impl.h (99%)
 rename src/TNL/Matrices/{ => Legacy}/BiEllpack.h (98%)
 rename src/TNL/Matrices/{ => Legacy}/BiEllpackSymmetric.h (100%)
 rename src/TNL/Matrices/{ => Legacy}/BiEllpackSymmetric_impl.h (100%)
 rename src/TNL/Matrices/{ => Legacy}/BiEllpack_impl.h (99%)
 rename src/TNL/Matrices/{ => Legacy}/CSR.h (99%)
 rename src/TNL/Matrices/{ => Legacy}/CSR_impl.h (99%)
 rename src/TNL/Matrices/{ => Legacy}/ChunkedEllpack.h (99%)
 rename src/TNL/Matrices/{ => Legacy}/ChunkedEllpack_impl.h (99%)
 rename src/TNL/Matrices/{ => Legacy}/Ellpack.h (98%)
 rename src/TNL/Matrices/{ => Legacy}/EllpackSymmetric.h (100%)
 rename src/TNL/Matrices/{ => Legacy}/EllpackSymmetricGraph.h (100%)
 rename src/TNL/Matrices/{ => Legacy}/EllpackSymmetricGraph_impl.h (100%)
 rename src/TNL/Matrices/{ => Legacy}/EllpackSymmetric_impl.h (100%)
 rename src/TNL/Matrices/{ => Legacy}/Ellpack_impl.h (99%)
 rename src/TNL/Matrices/{ => Legacy}/SlicedEllpack.h (98%)
 rename src/TNL/Matrices/{ => Legacy}/SlicedEllpackSymmetric.h (100%)
 rename src/TNL/Matrices/{ => Legacy}/SlicedEllpackSymmetricGraph.h (100%)
 rename src/TNL/Matrices/{ => Legacy}/SlicedEllpackSymmetricGraph_impl.h (100%)
 rename src/TNL/Matrices/{ => Legacy}/SlicedEllpackSymmetric_impl.h (100%)
 rename src/TNL/Matrices/{ => Legacy}/SlicedEllpack_impl.h (99%)
 rename src/TNL/Matrices/{ => Legacy}/Sparse.h (95%)
 rename src/TNL/Matrices/{ => Legacy}/SparseRow.h (97%)
 rename src/TNL/Matrices/{ => Legacy}/SparseRow_impl.h (99%)
 rename src/TNL/Matrices/{ => Legacy}/Sparse_impl.h (100%)

diff --git a/src/Benchmarks/BLAS/spmv.h b/src/Benchmarks/BLAS/spmv.h
index d515d52d7..6685b9f76 100644
--- a/src/Benchmarks/BLAS/spmv.h
+++ b/src/Benchmarks/BLAS/spmv.h
@@ -15,10 +15,10 @@
 #include "../Benchmarks.h"
 
 #include <TNL/Pointers/DevicePointer.h>
-#include <TNL/Matrices/CSR.h>
-#include <TNL/Matrices/Ellpack.h>
-#include <TNL/Matrices/SlicedEllpack.h>
-#include <TNL/Matrices/ChunkedEllpack.h>
+#include <TNL/Matrices/Legacy/CSR.h>
+#include <TNL/Matrices/Legacy/Ellpack.h>
+#include <TNL/Matrices/Legacy/SlicedEllpack.h>
+#include <TNL/Matrices/Legacy/ChunkedEllpack.h>
 
 namespace TNL {
 namespace Benchmarks {
diff --git a/src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.h b/src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.h
index aa4b29424..b90b11088 100644
--- a/src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.h
+++ b/src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.h
@@ -30,7 +30,7 @@
 #include "../Benchmarks.h"
 #include "ordering.h"
 
-#include <TNL/Matrices/SlicedEllpack.h>
+#include <TNL/Matrices/Legacy/SlicedEllpack.h>
 
 using namespace TNL;
 using namespace TNL::Benchmarks;
diff --git a/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h b/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h
index 4aabf39cd..9c58d25b0 100644
--- a/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h
+++ b/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h
@@ -55,7 +55,7 @@
    #define HAVE_CUSOLVER
 #endif
 
-#include <TNL/Matrices/SlicedEllpack.h>
+#include <TNL/Matrices/Legacy/SlicedEllpack.h>
 
 using namespace TNL;
 using namespace TNL::Benchmarks;
diff --git a/src/Benchmarks/ODESolvers/tnl-benchmark-ode-solvers.h b/src/Benchmarks/ODESolvers/tnl-benchmark-ode-solvers.h
index d29b680bc..1e4bc380e 100644
--- a/src/Benchmarks/ODESolvers/tnl-benchmark-ode-solvers.h
+++ b/src/Benchmarks/ODESolvers/tnl-benchmark-ode-solvers.h
@@ -36,7 +36,7 @@
 #include "Euler.h"
 #include "Merson.h"
 
-#include <TNL/Matrices/SlicedEllpack.h>
+#include <TNL/Matrices/Legacy/SlicedEllpack.h>
 
 using namespace TNL;
 using namespace TNL::Benchmarks;
diff --git a/src/Benchmarks/SpMV/spmv.h b/src/Benchmarks/SpMV/spmv.h
index 8a222c7b5..e3a1ae047 100644
--- a/src/Benchmarks/SpMV/spmv.h
+++ b/src/Benchmarks/SpMV/spmv.h
@@ -17,12 +17,12 @@
 #include "../Benchmarks.h"
 
 #include <TNL/Pointers/DevicePointer.h>
-#include <TNL/Matrices/CSR.h>
-#include <TNL/Matrices/Ellpack.h>
-#include <TNL/Matrices/SlicedEllpack.h>
-#include <TNL/Matrices/ChunkedEllpack.h>
-#include <TNL/Matrices/AdEllpack.h>
-#include <TNL/Matrices/BiEllpack.h>
+#include <TNL/Matrices/Legacy/CSR.h>
+#include <TNL/Matrices/Legacy/Ellpack.h>
+#include <TNL/Matrices/Legacy/SlicedEllpack.h>
+#include <TNL/Matrices/Legacy/ChunkedEllpack.h>
+#include <TNL/Matrices/Legacy/AdEllpack.h>
+#include <TNL/Matrices/Legacy/BiEllpack.h>
 
 #include <TNL/Matrices/MatrixReader.h>
 
diff --git a/src/Python/pytnl/tnl/SparseMatrix.cpp b/src/Python/pytnl/tnl/SparseMatrix.cpp
index e65849983..fe3ba5aca 100644
--- a/src/Python/pytnl/tnl/SparseMatrix.cpp
+++ b/src/Python/pytnl/tnl/SparseMatrix.cpp
@@ -3,9 +3,9 @@
 
 #include "SparseMatrix.h"
 
-#include <TNL/Matrices/CSR.h>
-#include <TNL/Matrices/Ellpack.h>
-#include <TNL/Matrices/SlicedEllpack.h>
+#include <TNL/Matrices/Legacy/CSR.h>
+#include <TNL/Matrices/Legacy/Ellpack.h>
+#include <TNL/Matrices/Legacy/SlicedEllpack.h>
 
 using CSR_host = TNL::Matrices::CSR< double, TNL::Devices::Host, int >;
 using CSR_cuda = TNL::Matrices::CSR< double, TNL::Devices::Cuda, int >;
diff --git a/src/Python/pytnl/tnl/SparseMatrix.h b/src/Python/pytnl/tnl/SparseMatrix.h
index 1a32bd257..03ec5814c 100644
--- a/src/Python/pytnl/tnl/SparseMatrix.h
+++ b/src/Python/pytnl/tnl/SparseMatrix.h
@@ -5,7 +5,7 @@ namespace py = pybind11;
 
 #include <TNL/String.h>
 #include <TNL/Containers/Vector.h>
-#include <TNL/Matrices/CSR.h>
+#include <TNL/Matrices/Legacy/CSR.h>
 
 template< typename Matrix >
 struct SpecificExports
diff --git a/src/TNL/Matrices/DistributedMatrix.h b/src/TNL/Matrices/DistributedMatrix.h
index 76b6ea8c1..05ee28391 100644
--- a/src/TNL/Matrices/DistributedMatrix.h
+++ b/src/TNL/Matrices/DistributedMatrix.h
@@ -14,7 +14,7 @@
 
 #include <type_traits>
 
-#include <TNL/Matrices/SparseRow.h>
+#include <TNL/Matrices/Legacy/SparseRow.h>
 #include <TNL/Communicators/MpiCommunicator.h>
 #include <TNL/Containers/Subrange.h>
 #include <TNL/Containers/DistributedVector.h>
diff --git a/src/TNL/Matrices/AdEllpack.h b/src/TNL/Matrices/Legacy/AdEllpack.h
similarity index 99%
rename from src/TNL/Matrices/AdEllpack.h
rename to src/TNL/Matrices/Legacy/AdEllpack.h
index f011e6c80..3d2db7b96 100644
--- a/src/TNL/Matrices/AdEllpack.h
+++ b/src/TNL/Matrices/Legacy/AdEllpack.h
@@ -18,7 +18,7 @@
 
 #pragma once
 
-#include <TNL/Matrices/Sparse.h>
+#include <TNL/Matrices/Legacy/Sparse.h>
 #include <TNL/Containers/Vector.h>
 
 namespace TNL {
@@ -293,4 +293,4 @@ protected:
 } // namespace Matrices
 } // namespace TNL
 
-#include <TNL/Matrices/AdEllpack_impl.h>
+#include <TNL/Matrices/Legacy/AdEllpack_impl.h>
diff --git a/src/TNL/Matrices/AdEllpack_impl.h b/src/TNL/Matrices/Legacy/AdEllpack_impl.h
similarity index 99%
rename from src/TNL/Matrices/AdEllpack_impl.h
rename to src/TNL/Matrices/Legacy/AdEllpack_impl.h
index b7b97ff93..234e18f94 100644
--- a/src/TNL/Matrices/AdEllpack_impl.h
+++ b/src/TNL/Matrices/Legacy/AdEllpack_impl.h
@@ -8,7 +8,7 @@
 
 /* See Copyright Notice in tnl/Copyright */
 
-#include <TNL/Matrices/AdEllpack.h>
+#include <TNL/Matrices/Legacy/AdEllpack.h>
 #include <TNL/Containers/Vector.h>
 #include <TNL/Math.h>
 #include <TNL/TypeInfo.h>
diff --git a/src/TNL/Matrices/BiEllpack.h b/src/TNL/Matrices/Legacy/BiEllpack.h
similarity index 98%
rename from src/TNL/Matrices/BiEllpack.h
rename to src/TNL/Matrices/Legacy/BiEllpack.h
index 3ec4b662f..fe3fd9e11 100644
--- a/src/TNL/Matrices/BiEllpack.h
+++ b/src/TNL/Matrices/Legacy/BiEllpack.h
@@ -18,7 +18,7 @@
 
 #pragma once
 
-#include <TNL/Matrices/Sparse.h>
+#include <TNL/Matrices/Legacy/Sparse.h>
 #include <TNL/Containers/Vector.h>
 
 namespace TNL {
@@ -217,5 +217,5 @@ private:
    } //namespace Matrices
 } // namespace TNL
 
-#include <TNL/Matrices/BiEllpack_impl.h>
+#include <TNL/Matrices/Legacy/BiEllpack_impl.h>
 
diff --git a/src/TNL/Matrices/BiEllpackSymmetric.h b/src/TNL/Matrices/Legacy/BiEllpackSymmetric.h
similarity index 100%
rename from src/TNL/Matrices/BiEllpackSymmetric.h
rename to src/TNL/Matrices/Legacy/BiEllpackSymmetric.h
diff --git a/src/TNL/Matrices/BiEllpackSymmetric_impl.h b/src/TNL/Matrices/Legacy/BiEllpackSymmetric_impl.h
similarity index 100%
rename from src/TNL/Matrices/BiEllpackSymmetric_impl.h
rename to src/TNL/Matrices/Legacy/BiEllpackSymmetric_impl.h
diff --git a/src/TNL/Matrices/BiEllpack_impl.h b/src/TNL/Matrices/Legacy/BiEllpack_impl.h
similarity index 99%
rename from src/TNL/Matrices/BiEllpack_impl.h
rename to src/TNL/Matrices/Legacy/BiEllpack_impl.h
index c659b758e..36732a39a 100644
--- a/src/TNL/Matrices/BiEllpack_impl.h
+++ b/src/TNL/Matrices/Legacy/BiEllpack_impl.h
@@ -11,7 +11,7 @@
 #pragma once
 
 
-#include <TNL/Matrices/BiEllpack.h>
+#include <TNL/Matrices/Legacy/BiEllpack.h>
 #include <TNL/Containers/Vector.h>
 #include <TNL/Math.h>
 #include <cstdio>
diff --git a/src/TNL/Matrices/CSR.h b/src/TNL/Matrices/Legacy/CSR.h
similarity index 99%
rename from src/TNL/Matrices/CSR.h
rename to src/TNL/Matrices/Legacy/CSR.h
index 485176d1d..b68434252 100644
--- a/src/TNL/Matrices/CSR.h
+++ b/src/TNL/Matrices/Legacy/CSR.h
@@ -10,7 +10,7 @@
 
 #pragma once 
 
-#include <TNL/Matrices/Sparse.h>
+#include <TNL/Matrices/Legacy/Sparse.h>
 #include <TNL/Containers/Vector.h>
 
 #include <TNL/Devices/Cuda.h>
@@ -272,4 +272,4 @@ protected:
 } // namespace Matrices
 } // namespace TNL
 
-#include <TNL/Matrices/CSR_impl.h>
+#include <TNL/Matrices/Legacy/CSR_impl.h>
diff --git a/src/TNL/Matrices/CSR_impl.h b/src/TNL/Matrices/Legacy/CSR_impl.h
similarity index 99%
rename from src/TNL/Matrices/CSR_impl.h
rename to src/TNL/Matrices/Legacy/CSR_impl.h
index db31d6dcd..08b35f563 100644
--- a/src/TNL/Matrices/CSR_impl.h
+++ b/src/TNL/Matrices/Legacy/CSR_impl.h
@@ -10,7 +10,7 @@
 
 #pragma once
 
-#include <TNL/Matrices/CSR.h>
+#include <TNL/Matrices/Legacy/CSR.h>
 #include <TNL/Containers/VectorView.h>
 #include <TNL/Math.h>
 #include <TNL/Exceptions/NotImplementedError.h>
diff --git a/src/TNL/Matrices/ChunkedEllpack.h b/src/TNL/Matrices/Legacy/ChunkedEllpack.h
similarity index 99%
rename from src/TNL/Matrices/ChunkedEllpack.h
rename to src/TNL/Matrices/Legacy/ChunkedEllpack.h
index 9d4220796..a0f55b326 100644
--- a/src/TNL/Matrices/ChunkedEllpack.h
+++ b/src/TNL/Matrices/Legacy/ChunkedEllpack.h
@@ -22,7 +22,7 @@
 
 #pragma once
 
-#include <TNL/Matrices/Sparse.h>
+#include <TNL/Matrices/Legacy/Sparse.h>
 #include <TNL/Containers/Vector.h>
 
 namespace TNL {
@@ -352,5 +352,5 @@ protected:
 } // namespace Matrices
 } // namespace TNL
 
-#include <TNL/Matrices/ChunkedEllpack_impl.h>
+#include <TNL/Matrices/Legacy/ChunkedEllpack_impl.h>
 
diff --git a/src/TNL/Matrices/ChunkedEllpack_impl.h b/src/TNL/Matrices/Legacy/ChunkedEllpack_impl.h
similarity index 99%
rename from src/TNL/Matrices/ChunkedEllpack_impl.h
rename to src/TNL/Matrices/Legacy/ChunkedEllpack_impl.h
index 3b1fd9c8f..406159752 100644
--- a/src/TNL/Matrices/ChunkedEllpack_impl.h
+++ b/src/TNL/Matrices/Legacy/ChunkedEllpack_impl.h
@@ -10,7 +10,7 @@
 
 #pragma once
 
-#include <TNL/Matrices/ChunkedEllpack.h>
+#include <TNL/Matrices/Legacy/ChunkedEllpack.h>
 #include <TNL/Containers/Vector.h>
 #include <TNL/Math.h>
 #include <TNL/Exceptions/NotImplementedError.h>
diff --git a/src/TNL/Matrices/Ellpack.h b/src/TNL/Matrices/Legacy/Ellpack.h
similarity index 98%
rename from src/TNL/Matrices/Ellpack.h
rename to src/TNL/Matrices/Legacy/Ellpack.h
index 6536f5f6c..5f6e666f9 100644
--- a/src/TNL/Matrices/Ellpack.h
+++ b/src/TNL/Matrices/Legacy/Ellpack.h
@@ -10,7 +10,7 @@
 
 #pragma once
 
-#include <TNL/Matrices/Sparse.h>
+#include <TNL/Matrices/Legacy/Sparse.h>
 #include <TNL/Containers/Vector.h>
 
 namespace TNL {
@@ -207,4 +207,4 @@ protected:
 } // namespace Matrices
 } // namespace TNL
 
-#include <TNL/Matrices/Ellpack_impl.h>
+#include <TNL/Matrices/Legacy/Ellpack_impl.h>
diff --git a/src/TNL/Matrices/EllpackSymmetric.h b/src/TNL/Matrices/Legacy/EllpackSymmetric.h
similarity index 100%
rename from src/TNL/Matrices/EllpackSymmetric.h
rename to src/TNL/Matrices/Legacy/EllpackSymmetric.h
diff --git a/src/TNL/Matrices/EllpackSymmetricGraph.h b/src/TNL/Matrices/Legacy/EllpackSymmetricGraph.h
similarity index 100%
rename from src/TNL/Matrices/EllpackSymmetricGraph.h
rename to src/TNL/Matrices/Legacy/EllpackSymmetricGraph.h
diff --git a/src/TNL/Matrices/EllpackSymmetricGraph_impl.h b/src/TNL/Matrices/Legacy/EllpackSymmetricGraph_impl.h
similarity index 100%
rename from src/TNL/Matrices/EllpackSymmetricGraph_impl.h
rename to src/TNL/Matrices/Legacy/EllpackSymmetricGraph_impl.h
diff --git a/src/TNL/Matrices/EllpackSymmetric_impl.h b/src/TNL/Matrices/Legacy/EllpackSymmetric_impl.h
similarity index 100%
rename from src/TNL/Matrices/EllpackSymmetric_impl.h
rename to src/TNL/Matrices/Legacy/EllpackSymmetric_impl.h
diff --git a/src/TNL/Matrices/Ellpack_impl.h b/src/TNL/Matrices/Legacy/Ellpack_impl.h
similarity index 99%
rename from src/TNL/Matrices/Ellpack_impl.h
rename to src/TNL/Matrices/Legacy/Ellpack_impl.h
index 5ae12f408..656c3f7c2 100644
--- a/src/TNL/Matrices/Ellpack_impl.h
+++ b/src/TNL/Matrices/Legacy/Ellpack_impl.h
@@ -10,7 +10,7 @@
 
 #pragma once
 
-#include <TNL/Matrices/Ellpack.h>
+#include <TNL/Matrices/Legacy/Ellpack.h>
 #include <TNL/Containers/Vector.h>
 #include <TNL/Math.h>
 #include <TNL/Exceptions/NotImplementedError.h>
diff --git a/src/TNL/Matrices/SlicedEllpack.h b/src/TNL/Matrices/Legacy/SlicedEllpack.h
similarity index 98%
rename from src/TNL/Matrices/SlicedEllpack.h
rename to src/TNL/Matrices/Legacy/SlicedEllpack.h
index 7176019d2..b79913b23 100644
--- a/src/TNL/Matrices/SlicedEllpack.h
+++ b/src/TNL/Matrices/Legacy/SlicedEllpack.h
@@ -21,7 +21,7 @@
 
 #pragma once
 
-#include <TNL/Matrices/Sparse.h>
+#include <TNL/Matrices/Legacy/Sparse.h>
 #include <TNL/Containers/Vector.h>
 
 namespace TNL {
@@ -235,4 +235,4 @@ public:
 } // namespace Matrices
 } // namespace TNL
 
-#include <TNL/Matrices/SlicedEllpack_impl.h>
+#include <TNL/Matrices/Legacy/SlicedEllpack_impl.h>
diff --git a/src/TNL/Matrices/SlicedEllpackSymmetric.h b/src/TNL/Matrices/Legacy/SlicedEllpackSymmetric.h
similarity index 100%
rename from src/TNL/Matrices/SlicedEllpackSymmetric.h
rename to src/TNL/Matrices/Legacy/SlicedEllpackSymmetric.h
diff --git a/src/TNL/Matrices/SlicedEllpackSymmetricGraph.h b/src/TNL/Matrices/Legacy/SlicedEllpackSymmetricGraph.h
similarity index 100%
rename from src/TNL/Matrices/SlicedEllpackSymmetricGraph.h
rename to src/TNL/Matrices/Legacy/SlicedEllpackSymmetricGraph.h
diff --git a/src/TNL/Matrices/SlicedEllpackSymmetricGraph_impl.h b/src/TNL/Matrices/Legacy/SlicedEllpackSymmetricGraph_impl.h
similarity index 100%
rename from src/TNL/Matrices/SlicedEllpackSymmetricGraph_impl.h
rename to src/TNL/Matrices/Legacy/SlicedEllpackSymmetricGraph_impl.h
diff --git a/src/TNL/Matrices/SlicedEllpackSymmetric_impl.h b/src/TNL/Matrices/Legacy/SlicedEllpackSymmetric_impl.h
similarity index 100%
rename from src/TNL/Matrices/SlicedEllpackSymmetric_impl.h
rename to src/TNL/Matrices/Legacy/SlicedEllpackSymmetric_impl.h
diff --git a/src/TNL/Matrices/SlicedEllpack_impl.h b/src/TNL/Matrices/Legacy/SlicedEllpack_impl.h
similarity index 99%
rename from src/TNL/Matrices/SlicedEllpack_impl.h
rename to src/TNL/Matrices/Legacy/SlicedEllpack_impl.h
index 8c629b563..bfba092ff 100644
--- a/src/TNL/Matrices/SlicedEllpack_impl.h
+++ b/src/TNL/Matrices/Legacy/SlicedEllpack_impl.h
@@ -10,7 +10,7 @@
 
 #pragma once
 
-#include <TNL/Matrices/SlicedEllpack.h>
+#include <TNL/Matrices/Legacy/SlicedEllpack.h>
 #include <TNL/Containers/Vector.h>
 #include <TNL/Math.h>
 #include <TNL/Exceptions/NotImplementedError.h>
diff --git a/src/TNL/Matrices/Sparse.h b/src/TNL/Matrices/Legacy/Sparse.h
similarity index 95%
rename from src/TNL/Matrices/Sparse.h
rename to src/TNL/Matrices/Legacy/Sparse.h
index c19002443..12c76a6a0 100644
--- a/src/TNL/Matrices/Sparse.h
+++ b/src/TNL/Matrices/Legacy/Sparse.h
@@ -11,7 +11,7 @@
 #pragma once
 
 #include <TNL/Matrices/Matrix.h>
-#include <TNL/Matrices/SparseRow.h>
+#include <TNL/Matrices/Legacy/SparseRow.h>
 
 namespace TNL {
 namespace Matrices {
@@ -64,5 +64,5 @@ class Sparse : public Matrix< Real, Device, Index >
 } // namespace Matrices
 } // namespace TNL
 
-#include <TNL/Matrices/Sparse_impl.h>
+#include <TNL/Matrices/Legacy/Sparse_impl.h>
 #include <TNL/Matrices/SparseOperations.h>
diff --git a/src/TNL/Matrices/SparseRow.h b/src/TNL/Matrices/Legacy/SparseRow.h
similarity index 97%
rename from src/TNL/Matrices/SparseRow.h
rename to src/TNL/Matrices/Legacy/SparseRow.h
index f66cd2cea..4787e638a 100644
--- a/src/TNL/Matrices/SparseRow.h
+++ b/src/TNL/Matrices/Legacy/SparseRow.h
@@ -80,4 +80,4 @@ std::ostream& operator<<( std::ostream& str, const SparseRow< Real, Index >& row
 } // namespace Matrices
 } // namespace TNL
 
-#include <TNL/Matrices/SparseRow_impl.h>
+#include <TNL/Matrices/Legacy/SparseRow_impl.h>
diff --git a/src/TNL/Matrices/SparseRow_impl.h b/src/TNL/Matrices/Legacy/SparseRow_impl.h
similarity index 99%
rename from src/TNL/Matrices/SparseRow_impl.h
rename to src/TNL/Matrices/Legacy/SparseRow_impl.h
index 60dfd5034..84f8e210e 100644
--- a/src/TNL/Matrices/SparseRow_impl.h
+++ b/src/TNL/Matrices/Legacy/SparseRow_impl.h
@@ -10,7 +10,7 @@
 
 #pragma once
 
-#include <TNL/Matrices/SparseRow.h>
+#include <TNL/Matrices/Legacy/SparseRow.h>
 #include <TNL/Exceptions/NotImplementedError.h>
 
 // Following includes are here to enable usage of std::vector and std::cout. To avoid having to include Device type (HOW would this be done anyway)
diff --git a/src/TNL/Matrices/Sparse_impl.h b/src/TNL/Matrices/Legacy/Sparse_impl.h
similarity index 100%
rename from src/TNL/Matrices/Sparse_impl.h
rename to src/TNL/Matrices/Legacy/Sparse_impl.h
diff --git a/src/TNL/Problems/HeatEquationProblem.h b/src/TNL/Problems/HeatEquationProblem.h
index 26df28965..6a8974227 100644
--- a/src/TNL/Problems/HeatEquationProblem.h
+++ b/src/TNL/Problems/HeatEquationProblem.h
@@ -18,7 +18,7 @@
 
 #include <TNL/Problems/PDEProblem.h>
 #include <TNL/Operators/diffusion/LinearDiffusion.h>
-#include <TNL/Matrices/Ellpack.h>
+#include <TNL/Matrices/Legacy/Ellpack.h>
 #include <TNL/Functions/MeshFunction.h>
 #include <TNL/Timer.h>
 #include <TNL/Solvers/PDE/ExplicitUpdater.h>
diff --git a/src/TNL/Problems/PDEProblem.h b/src/TNL/Problems/PDEProblem.h
index 69d95aaee..c81ffdd39 100644
--- a/src/TNL/Problems/PDEProblem.h
+++ b/src/TNL/Problems/PDEProblem.h
@@ -13,7 +13,7 @@
 #include <TNL/Problems/Problem.h>
 #include <TNL/Problems/CommonData.h>
 #include <TNL/Pointers/SharedPointer.h>
-#include <TNL/Matrices/SlicedEllpack.h>
+#include <TNL/Matrices/Legacy/SlicedEllpack.h>
 #include <TNL/Solvers/PDE/TimeDependentPDESolver.h>
 
 namespace TNL {
diff --git a/src/TNL/Solvers/Linear/Preconditioners/ILU0.h b/src/TNL/Solvers/Linear/Preconditioners/ILU0.h
index d5127fab5..1f2b9f198 100644
--- a/src/TNL/Solvers/Linear/Preconditioners/ILU0.h
+++ b/src/TNL/Solvers/Linear/Preconditioners/ILU0.h
@@ -15,7 +15,7 @@
 #include "Preconditioner.h"
 
 #include <TNL/Containers/Vector.h>
-#include <TNL/Matrices/CSR.h>
+#include <TNL/Matrices/Legacy/CSR.h>
 #include <TNL/Pointers/UniquePointer.h>
 #include <TNL/Exceptions/NotImplementedError.h>
 
diff --git a/src/TNL/Solvers/Linear/Preconditioners/ILUT.h b/src/TNL/Solvers/Linear/Preconditioners/ILUT.h
index cce3dc5c4..6a4a4a83b 100644
--- a/src/TNL/Solvers/Linear/Preconditioners/ILUT.h
+++ b/src/TNL/Solvers/Linear/Preconditioners/ILUT.h
@@ -15,7 +15,7 @@
 #include "Preconditioner.h"
 
 #include <TNL/Containers/Vector.h>
-#include <TNL/Matrices/CSR.h>
+#include <TNL/Matrices/Legacy/CSR.h>
 
 namespace TNL {
 namespace Solvers {
diff --git a/src/TNL/Solvers/SolverConfig_impl.h b/src/TNL/Solvers/SolverConfig_impl.h
index 9d3515157..70e7737ee 100644
--- a/src/TNL/Solvers/SolverConfig_impl.h
+++ b/src/TNL/Solvers/SolverConfig_impl.h
@@ -16,8 +16,7 @@
 #include <TNL/Solvers/PDE/ExplicitTimeStepper.h>
 #include <TNL/Solvers/PDE/TimeDependentPDESolver.h>
 #include <TNL/Solvers/LinearSolverTypeResolver.h>
-#include <TNL/Matrices/CSR.h>
-#include <TNL/Meshes/DistributedMeshes/DistributedGrid.h>
+#include <TNL/Matrices/Legacy/CSR.h>
 
 namespace TNL {
 namespace Solvers {
diff --git a/src/UnitTests/Matrices/DistributedMatrixTest.h b/src/UnitTests/Matrices/DistributedMatrixTest.h
index 93673a290..a1a9f3eb8 100644
--- a/src/UnitTests/Matrices/DistributedMatrixTest.h
+++ b/src/UnitTests/Matrices/DistributedMatrixTest.h
@@ -13,7 +13,7 @@
 #include <TNL/Communicators/NoDistrCommunicator.h>
 #include <TNL/Matrices/DistributedMatrix.h>
 #include <TNL/Containers/Partitioner.h>
-#include <TNL/Matrices/CSR.h>
+#include <TNL/Matrices/Legacy/CSR.h>
 
 using namespace TNL;
 
diff --git a/src/UnitTests/Matrices/SparseMatrixCopyTest.h b/src/UnitTests/Matrices/SparseMatrixCopyTest.h
index d100bb939..f00daf1f3 100644
--- a/src/UnitTests/Matrices/SparseMatrixCopyTest.h
+++ b/src/UnitTests/Matrices/SparseMatrixCopyTest.h
@@ -8,9 +8,9 @@
 
 /* See Copyright Notice in tnl/Copyright */
 
-#include <TNL/Matrices/CSR.h>
-#include <TNL/Matrices/Ellpack.h>
-#include <TNL/Matrices/SlicedEllpack.h>
+#include <TNL/Matrices/Legacy/CSR.h>
+#include <TNL/Matrices/Legacy/Ellpack.h>
+#include <TNL/Matrices/Legacy/SlicedEllpack.h>
 
 #include <TNL/Matrices/SparseMatrix.h>
 #include <TNL/Matrices/MatrixType.h>
diff --git a/src/UnitTests/Matrices/SparseMatrixTest.h b/src/UnitTests/Matrices/SparseMatrixTest.h
index 5baeb4279..8b1d57566 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest.h
+++ b/src/UnitTests/Matrices/SparseMatrixTest.h
@@ -8,7 +8,7 @@
 
 /* See Copyright Notice in tnl/Copyright */
 
-#include <TNL/Matrices/CSR.h>
+#include <TNL/Matrices/Legacy/CSR.h>
 
 #include "SparseMatrixTest.hpp"
 #include <iostream>
diff --git a/src/UnitTests/Matrices/SparseMatrixTest.hpp b/src/UnitTests/Matrices/SparseMatrixTest.hpp
index 72dfc90e8..c6ff5cbd7 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest.hpp
+++ b/src/UnitTests/Matrices/SparseMatrixTest.hpp
@@ -15,9 +15,9 @@
 #include <iostream>
 
 // Temporary, until test_OperatorEquals doesn't work for all formats.
-#include <TNL/Matrices/ChunkedEllpack.h>
-#include <TNL/Matrices/AdEllpack.h>
-#include <TNL/Matrices/BiEllpack.h>
+#include <TNL/Matrices/Legacy/ChunkedEllpack.h>
+#include <TNL/Matrices/Legacy/AdEllpack.h>
+#include <TNL/Matrices/Legacy/BiEllpack.h>
 
 #ifdef HAVE_GTEST
 #include <gtest/gtest.h>
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_AdEllpack.h b/src/UnitTests/Matrices/SparseMatrixTest_AdEllpack.h
index 7effb52cd..2169b96df 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_AdEllpack.h
+++ b/src/UnitTests/Matrices/SparseMatrixTest_AdEllpack.h
@@ -8,7 +8,7 @@
 
 /* See Copyright Notice in tnl/Copyright */
 
-#include <TNL/Matrices/AdEllpack.h>
+#include <TNL/Matrices/Legacy/AdEllpack.h>
 
 #include "SparseMatrixTest.hpp"
 #include <iostream>
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_BiEllpack.h b/src/UnitTests/Matrices/SparseMatrixTest_BiEllpack.h
index 33e530be5..c74fa635f 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_BiEllpack.h
+++ b/src/UnitTests/Matrices/SparseMatrixTest_BiEllpack.h
@@ -8,7 +8,7 @@
 
 /* See Copyright Notice in tnl/Copyright */
 
-#include <TNL/Matrices/BiEllpack.h>
+#include <TNL/Matrices/Legacy/BiEllpack.h>
 
 #include "SparseMatrixTest.hpp"
 #include <iostream>
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR.h b/src/UnitTests/Matrices/SparseMatrixTest_CSR.h
index 3530db46c..c9dfc770f 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_CSR.h
+++ b/src/UnitTests/Matrices/SparseMatrixTest_CSR.h
@@ -8,7 +8,7 @@
 
 /* See Copyright Notice in tnl/Copyright */
 
-#include <TNL/Matrices/CSR.h>
+#include <TNL/Matrices/Legacy/CSR.h>
 
 #include "SparseMatrixTest.hpp"
 #include <iostream>
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.h b/src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.h
index 6909b53a5..45801fa3a 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.h
+++ b/src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.h
@@ -8,7 +8,7 @@
 
 /* See Copyright Notice in tnl/Copyright */
 
-#include <TNL/Matrices/ChunkedEllpack.h>
+#include <TNL/Matrices/Legacy/ChunkedEllpack.h>
 
 #include "SparseMatrixTest.hpp"
 #include <iostream>
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.h b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.h
index 979068e02..26d270a3d 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.h
+++ b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.h
@@ -8,7 +8,7 @@
 
 /* See Copyright Notice in tnl/Copyright */
 
-#include <TNL/Matrices/Ellpack.h>
+#include <TNL/Matrices/Legacy/Ellpack.h>
 
 #include "SparseMatrixTest.hpp"
 #include <iostream>
-- 
GitLab


From 2e95ddda03edf3135757df4217ac338a99e9d92f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Thu, 2 Jan 2020 14:40:01 +0100
Subject: [PATCH 049/179] Isolating legacy sparse matrix formats unit tests.

---
 src/TNL/Containers/Segments/SlicedEllpack.h   |    1 +
 src/UnitTests/Matrices/CMakeLists.txt         |   49 +-
 src/UnitTests/Matrices/Legacy/CMakeLists.txt  |   72 +
 .../Matrices/Legacy/SparseMatrixCopyTest.cpp  |   11 +
 .../Matrices/Legacy/SparseMatrixCopyTest.cu   |   11 +
 .../Matrices/Legacy/SparseMatrixCopyTest.h    |  573 ++++++
 .../Matrices/Legacy/SparseMatrixTest.cpp      |   11 +
 .../Matrices/Legacy/SparseMatrixTest.cu       |   11 +
 .../Matrices/Legacy/SparseMatrixTest.h        |   39 +
 .../Matrices/Legacy/SparseMatrixTest.hpp      | 1831 +++++++++++++++++
 .../SparseMatrixTest_AdEllpack.cpp            |    0
 .../SparseMatrixTest_AdEllpack.cu             |    0
 .../{ => Legacy}/SparseMatrixTest_AdEllpack.h |    2 +-
 .../SparseMatrixTest_BiEllpack.cpp            |    0
 .../SparseMatrixTest_BiEllpack.cu             |    0
 .../{ => Legacy}/SparseMatrixTest_BiEllpack.h |    2 +-
 .../{ => Legacy}/SparseMatrixTest_CSR.cpp     |    0
 .../{ => Legacy}/SparseMatrixTest_CSR.cu      |    0
 .../{ => Legacy}/SparseMatrixTest_CSR.h       |    2 +-
 .../SparseMatrixTest_ChunkedEllpack.cpp       |    0
 .../SparseMatrixTest_ChunkedEllpack.cu        |    0
 .../SparseMatrixTest_ChunkedEllpack.h         |    2 +-
 .../{ => Legacy}/SparseMatrixTest_Ellpack.cpp |    0
 .../{ => Legacy}/SparseMatrixTest_Ellpack.cu  |    0
 .../{ => Legacy}/SparseMatrixTest_Ellpack.h   |    2 +-
 .../Legacy/SparseMatrixTest_SlicedEllpack.cpp |    1 +
 .../Legacy/SparseMatrixTest_SlicedEllpack.cu  |    1 +
 .../SparseMatrixTest_SlicedEllpack.h          |   66 +-
 .../SparseMatrixTest_SlicedEllpack.cpp        |    1 -
 .../SparseMatrixTest_SlicedEllpack.cu         |    1 -
 30 files changed, 2598 insertions(+), 91 deletions(-)
 create mode 100644 src/UnitTests/Matrices/Legacy/CMakeLists.txt
 create mode 100644 src/UnitTests/Matrices/Legacy/SparseMatrixCopyTest.cpp
 create mode 100644 src/UnitTests/Matrices/Legacy/SparseMatrixCopyTest.cu
 create mode 100644 src/UnitTests/Matrices/Legacy/SparseMatrixCopyTest.h
 create mode 100644 src/UnitTests/Matrices/Legacy/SparseMatrixTest.cpp
 create mode 100644 src/UnitTests/Matrices/Legacy/SparseMatrixTest.cu
 create mode 100644 src/UnitTests/Matrices/Legacy/SparseMatrixTest.h
 create mode 100644 src/UnitTests/Matrices/Legacy/SparseMatrixTest.hpp
 rename src/UnitTests/Matrices/{ => Legacy}/SparseMatrixTest_AdEllpack.cpp (100%)
 rename src/UnitTests/Matrices/{ => Legacy}/SparseMatrixTest_AdEllpack.cu (100%)
 rename src/UnitTests/Matrices/{ => Legacy}/SparseMatrixTest_AdEllpack.h (99%)
 rename src/UnitTests/Matrices/{ => Legacy}/SparseMatrixTest_BiEllpack.cpp (100%)
 rename src/UnitTests/Matrices/{ => Legacy}/SparseMatrixTest_BiEllpack.cu (100%)
 rename src/UnitTests/Matrices/{ => Legacy}/SparseMatrixTest_BiEllpack.h (99%)
 rename src/UnitTests/Matrices/{ => Legacy}/SparseMatrixTest_CSR.cpp (100%)
 rename src/UnitTests/Matrices/{ => Legacy}/SparseMatrixTest_CSR.cu (100%)
 rename src/UnitTests/Matrices/{ => Legacy}/SparseMatrixTest_CSR.h (99%)
 rename src/UnitTests/Matrices/{ => Legacy}/SparseMatrixTest_ChunkedEllpack.cpp (100%)
 rename src/UnitTests/Matrices/{ => Legacy}/SparseMatrixTest_ChunkedEllpack.cu (100%)
 rename src/UnitTests/Matrices/{ => Legacy}/SparseMatrixTest_ChunkedEllpack.h (99%)
 rename src/UnitTests/Matrices/{ => Legacy}/SparseMatrixTest_Ellpack.cpp (100%)
 rename src/UnitTests/Matrices/{ => Legacy}/SparseMatrixTest_Ellpack.cu (100%)
 rename src/UnitTests/Matrices/{ => Legacy}/SparseMatrixTest_Ellpack.h (99%)
 create mode 100644 src/UnitTests/Matrices/Legacy/SparseMatrixTest_SlicedEllpack.cpp
 create mode 100644 src/UnitTests/Matrices/Legacy/SparseMatrixTest_SlicedEllpack.cu
 rename src/UnitTests/Matrices/{ => Legacy}/SparseMatrixTest_SlicedEllpack.h (53%)
 delete mode 100644 src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cpp
 delete mode 100644 src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cu

diff --git a/src/TNL/Containers/Segments/SlicedEllpack.h b/src/TNL/Containers/Segments/SlicedEllpack.h
index 1c110b1f1..76185bcac 100644
--- a/src/TNL/Containers/Segments/SlicedEllpack.h
+++ b/src/TNL/Containers/Segments/SlicedEllpack.h
@@ -10,6 +10,7 @@
 
 #pragma once
 
+#include <TNL/Allocators/Default.h>
 #include <TNL/Containers/Vector.h>
 #include <TNL/Containers/Segments/SlicedEllpackView.h>
 #include <TNL/Containers/Segments/EllpackSegmentView.h>
diff --git a/src/UnitTests/Matrices/CMakeLists.txt b/src/UnitTests/Matrices/CMakeLists.txt
index 9b168bd56..668e272df 100644
--- a/src/UnitTests/Matrices/CMakeLists.txt
+++ b/src/UnitTests/Matrices/CMakeLists.txt
@@ -1,3 +1,5 @@
+ADD_SUBDIRECTORY( Legacy )
+
 IF( BUILD_CUDA )
    CUDA_ADD_EXECUTABLE( SparseMatrixCopyTest SparseMatrixCopyTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
    TARGET_LINK_LIBRARIES( SparseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} )
@@ -5,24 +7,6 @@ IF( BUILD_CUDA )
    CUDA_ADD_EXECUTABLE( SparseMatrixTest SparseMatrixTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
    TARGET_LINK_LIBRARIES( SparseMatrixTest ${GTEST_BOTH_LIBRARIES} )
 
-   CUDA_ADD_EXECUTABLE( SparseMatrixTest_AdEllpack SparseMatrixTest_AdEllpack.cu OPTIONS ${CXX_TESTS_FLAGS} )
-   TARGET_LINK_LIBRARIES( SparseMatrixTest_AdEllpack ${GTEST_BOTH_LIBRARIES} )
-
-   CUDA_ADD_EXECUTABLE( SparseMatrixTest_BiEllpack SparseMatrixTest_BiEllpack.cu OPTIONS ${CXX_TESTS_FLAGS} )
-   TARGET_LINK_LIBRARIES( SparseMatrixTest_BiEllpack ${GTEST_BOTH_LIBRARIES} )
-
-   CUDA_ADD_EXECUTABLE( SparseMatrixTest_ChunkedEllpack SparseMatrixTest_ChunkedEllpack.cu OPTIONS ${CXX_TESTS_FLAGS} )
-   TARGET_LINK_LIBRARIES( SparseMatrixTest_ChunkedEllpack ${GTEST_BOTH_LIBRARIES} )
-
-   CUDA_ADD_EXECUTABLE( SparseMatrixTest_CSR SparseMatrixTest_CSR.cu OPTIONS ${CXX_TESTS_FLAGS} )
-   TARGET_LINK_LIBRARIES( SparseMatrixTest_CSR ${GTEST_BOTH_LIBRARIES} )
-
-   CUDA_ADD_EXECUTABLE( SparseMatrixTest_Ellpack SparseMatrixTest_Ellpack.cu OPTIONS ${CXX_TESTS_FLAGS} )
-   TARGET_LINK_LIBRARIES( SparseMatrixTest_Ellpack ${GTEST_BOTH_LIBRARIES} )
-
-   CUDA_ADD_EXECUTABLE( SparseMatrixTest_SlicedEllpack SparseMatrixTest_SlicedEllpack.cu OPTIONS ${CXX_TESTS_FLAGS} )
-   TARGET_LINK_LIBRARIES( SparseMatrixTest_SlicedEllpack ${GTEST_BOTH_LIBRARIES} )
-
    CUDA_ADD_EXECUTABLE( DenseMatrixTest DenseMatrixTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
    TARGET_LINK_LIBRARIES( DenseMatrixTest ${GTEST_BOTH_LIBRARIES} )
 
@@ -46,30 +30,6 @@ ELSE(  BUILD_CUDA )
    TARGET_COMPILE_OPTIONS( SparseMatrixTest PRIVATE ${CXX_TESTS_FLAGS} )
    TARGET_LINK_LIBRARIES( SparseMatrixTest ${GTEST_BOTH_LIBRARIES} )
 
-   ADD_EXECUTABLE( SparseMatrixTest_AdEllpack SparseMatrixTest_AdEllpack.cpp )
-   TARGET_COMPILE_OPTIONS( SparseMatrixTest_AdEllpack PRIVATE ${CXX_TESTS_FLAGS} )
-   TARGET_LINK_LIBRARIES( SparseMatrixTest_AdEllpack ${GTEST_BOTH_LIBRARIES} )
-
-   ADD_EXECUTABLE( SparseMatrixTest_BiEllpack SparseMatrixTest_BiEllpack.cpp )
-   TARGET_COMPILE_OPTIONS( SparseMatrixTest_BiEllpack PRIVATE ${CXX_TESTS_FLAGS} )
-   TARGET_LINK_LIBRARIES( SparseMatrixTest_BiEllpack ${GTEST_BOTH_LIBRARIES} )
-
-   ADD_EXECUTABLE( SparseMatrixTest_ChunkedEllpack SparseMatrixTest_ChunkedEllpack.cpp )
-   TARGET_COMPILE_OPTIONS( SparseMatrixTest_ChunkedEllpack PRIVATE ${CXX_TESTS_FLAGS} )
-   TARGET_LINK_LIBRARIES( SparseMatrixTest_ChunkedEllpack ${GTEST_BOTH_LIBRARIES} )
-
-   ADD_EXECUTABLE( SparseMatrixTest_CSR SparseMatrixTest_CSR.cpp )
-   TARGET_COMPILE_OPTIONS( SparseMatrixTest_CSR PRIVATE ${CXX_TESTS_FLAGS} )
-   TARGET_LINK_LIBRARIES( SparseMatrixTest_CSR ${GTEST_BOTH_LIBRARIES} )
-
-   ADD_EXECUTABLE( SparseMatrixTest_Ellpack SparseMatrixTest_Ellpack.cpp )
-   TARGET_COMPILE_OPTIONS( SparseMatrixTest_Ellpack PRIVATE ${CXX_TESTS_FLAGS} )
-   TARGET_LINK_LIBRARIES( SparseMatrixTest_Ellpack ${GTEST_BOTH_LIBRARIES} )
-
-   ADD_EXECUTABLE( SparseMatrixTest_SlicedEllpack SparseMatrixTest_SlicedEllpack.cpp )
-   TARGET_COMPILE_OPTIONS( SparseMatrixTest_SlicedEllpack PRIVATE ${CXX_TESTS_FLAGS} )
-   TARGET_LINK_LIBRARIES( SparseMatrixTest_SlicedEllpack ${GTEST_BOTH_LIBRARIES} )
-
    ADD_EXECUTABLE( DenseMatrixTest DenseMatrixTest.cpp )
    TARGET_COMPILE_OPTIONS( DenseMatrixTest PRIVATE ${CXX_TESTS_FLAGS} )
    TARGET_LINK_LIBRARIES( DenseMatrixTest ${GTEST_BOTH_LIBRARIES} )
@@ -95,11 +55,6 @@ ADD_TEST( SparseMatrixCopyTest ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixCopyTest${C
 ADD_TEST( SparseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} )
 # TODO: Uncomment the following when AdEllpack works
 #ADD_TEST( SparseMatrixTest_AdEllpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_AdEllpack${CMAKE_EXECUTABLE_SUFFIX} )
-ADD_TEST( SparseMatrixTest_BiEllpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_BiEllpack${CMAKE_EXECUTABLE_SUFFIX} )
-ADD_TEST( SparseMatrixTest_ChunkedEllpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_ChunkedEllpack${CMAKE_EXECUTABLE_SUFFIX} )
-ADD_TEST( SparseMatrixTest_CSR ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_CSR${CMAKE_EXECUTABLE_SUFFIX} )
-ADD_TEST( SparseMatrixTest_Ellpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_Ellpack${CMAKE_EXECUTABLE_SUFFIX} )
-ADD_TEST( SparseMatrixTest_SlicedEllpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_SlicedEllpack${CMAKE_EXECUTABLE_SUFFIX} )
 # TODO: DenseMatrixTest is not finished
 #ADD_TEST( DenseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/DenseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} )
 
diff --git a/src/UnitTests/Matrices/Legacy/CMakeLists.txt b/src/UnitTests/Matrices/Legacy/CMakeLists.txt
new file mode 100644
index 000000000..9cdfe2784
--- /dev/null
+++ b/src/UnitTests/Matrices/Legacy/CMakeLists.txt
@@ -0,0 +1,72 @@
+IF( BUILD_CUDA )
+   CUDA_ADD_EXECUTABLE( Legacy_SparseMatrixCopyTest SparseMatrixCopyTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( Legacy_SparseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} )
+
+   CUDA_ADD_EXECUTABLE( Legacy_SparseMatrixTest SparseMatrixTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( Legacy_SparseMatrixTest ${GTEST_BOTH_LIBRARIES} )
+
+   CUDA_ADD_EXECUTABLE( Legacy_SparseMatrixTest_AdEllpack SparseMatrixTest_AdEllpack.cu OPTIONS ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( Legacy_SparseMatrixTest_AdEllpack ${GTEST_BOTH_LIBRARIES} )
+
+   CUDA_ADD_EXECUTABLE( Legacy_SparseMatrixTest_BiEllpack SparseMatrixTest_BiEllpack.cu OPTIONS ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( Legacy_SparseMatrixTest_BiEllpack ${GTEST_BOTH_LIBRARIES} )
+
+   CUDA_ADD_EXECUTABLE( Legacy_SparseMatrixTest_ChunkedEllpack SparseMatrixTest_ChunkedEllpack.cu OPTIONS ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( Legacy_SparseMatrixTest_ChunkedEllpack ${GTEST_BOTH_LIBRARIES} )
+
+   CUDA_ADD_EXECUTABLE( Legacy_SparseMatrixTest_CSR SparseMatrixTest_CSR.cu OPTIONS ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( Legacy_SparseMatrixTest_CSR ${GTEST_BOTH_LIBRARIES} )
+
+   CUDA_ADD_EXECUTABLE( Legacy_SparseMatrixTest_Ellpack SparseMatrixTest_Ellpack.cu OPTIONS ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( Legacy_SparseMatrixTest_Ellpack ${GTEST_BOTH_LIBRARIES} )
+
+   CUDA_ADD_EXECUTABLE( Legacy_SparseMatrixTest_SlicedEllpack SparseMatrixTest_SlicedEllpack.cu OPTIONS ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( Legacy_SparseMatrixTest_SlicedEllpack ${GTEST_BOTH_LIBRARIES} )
+
+ELSE(  BUILD_CUDA )
+   ADD_EXECUTABLE( Legacy_SparseMatrixCopyTest SparseMatrixCopyTest.cpp )
+   TARGET_COMPILE_OPTIONS( Legacy_SparseMatrixCopyTest PRIVATE ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( Legacy_SparseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} )
+
+   ADD_EXECUTABLE( Legacy_SparseMatrixTest SparseMatrixTest.cpp )
+   TARGET_COMPILE_OPTIONS( Legacy_SparseMatrixTest PRIVATE ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( Legacy_SparseMatrixTest ${GTEST_BOTH_LIBRARIES} )
+
+   ADD_EXECUTABLE( Legacy_SparseMatrixTest_AdEllpack SparseMatrixTest_AdEllpack.cpp )
+   TARGET_COMPILE_OPTIONS( Legacy_SparseMatrixTest_AdEllpack PRIVATE ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( Legacy_SparseMatrixTest_AdEllpack ${GTEST_BOTH_LIBRARIES} )
+
+   ADD_EXECUTABLE( Legacy_SparseMatrixTest_BiEllpack SparseMatrixTest_BiEllpack.cpp )
+   TARGET_COMPILE_OPTIONS( Legacy_SparseMatrixTest_BiEllpack PRIVATE ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( Legacy_SparseMatrixTest_BiEllpack ${GTEST_BOTH_LIBRARIES} )
+
+   ADD_EXECUTABLE( Legacy_SparseMatrixTest_ChunkedEllpack SparseMatrixTest_ChunkedEllpack.cpp )
+   TARGET_COMPILE_OPTIONS( Legacy_SparseMatrixTest_ChunkedEllpack PRIVATE ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( Legacy_SparseMatrixTest_ChunkedEllpack ${GTEST_BOTH_LIBRARIES} )
+
+   ADD_EXECUTABLE( Legacy_SparseMatrixTest_CSR SparseMatrixTest_CSR.cpp )
+   TARGET_COMPILE_OPTIONS( Legacy_SparseMatrixTest_CSR PRIVATE ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( Legacy_SparseMatrixTest_CSR ${GTEST_BOTH_LIBRARIES} )
+
+   ADD_EXECUTABLE( Legacy_SparseMatrixTest_Ellpack SparseMatrixTest_Ellpack.cpp )
+   TARGET_COMPILE_OPTIONS( Legacy_SparseMatrixTest_Ellpack PRIVATE ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( Legacy_SparseMatrixTest_Ellpack ${GTEST_BOTH_LIBRARIES} )
+
+   ADD_EXECUTABLE( Legacy_SparseMatrixTest_SlicedEllpack SparseMatrixTest_SlicedEllpack.cpp )
+   TARGET_COMPILE_OPTIONS( Legacy_SparseMatrixTest_SlicedEllpack PRIVATE ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( Legacy_SparseMatrixTest_SlicedEllpack ${GTEST_BOTH_LIBRARIES} )
+
+ENDIF( BUILD_CUDA )
+
+
+ADD_TEST( Legacy_SparseMatrixCopyTest ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixCopyTest${CMAKE_EXECUTABLE_SUFFIX} )
+ADD_TEST( Legacy_SparseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} )
+# TODO: Uncomment the following when AdEllpack works
+#ADD_TEST( SparseMatrixTest_AdEllpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_AdEllpack${CMAKE_EXECUTABLE_SUFFIX} )
+ADD_TEST( Legacy_SparseMatrixTest_BiEllpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_BiEllpack${CMAKE_EXECUTABLE_SUFFIX} )
+ADD_TEST( Legacy_SparseMatrixTest_ChunkedEllpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_ChunkedEllpack${CMAKE_EXECUTABLE_SUFFIX} )
+ADD_TEST( Legacy_SparseMatrixTest_CSR ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_CSR${CMAKE_EXECUTABLE_SUFFIX} )
+ADD_TEST( Legacy_SparseMatrixTest_Ellpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_Ellpack${CMAKE_EXECUTABLE_SUFFIX} )
+ADD_TEST( Legacy_SparseMatrixTest_SlicedEllpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_SlicedEllpack${CMAKE_EXECUTABLE_SUFFIX} )
+# TODO: DenseMatrixTest is not finished
+#ADD_TEST( DenseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/DenseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} )
diff --git a/src/UnitTests/Matrices/Legacy/SparseMatrixCopyTest.cpp b/src/UnitTests/Matrices/Legacy/SparseMatrixCopyTest.cpp
new file mode 100644
index 000000000..30b8f64ec
--- /dev/null
+++ b/src/UnitTests/Matrices/Legacy/SparseMatrixCopyTest.cpp
@@ -0,0 +1,11 @@
+/***************************************************************************
+                          SparseMatrixCopyTest.cpp  -  description
+                             -------------------
+    begin                : Jun 25, 2017
+    copyright            : (C) 2017 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include "SparseMatrixCopyTest.h"
diff --git a/src/UnitTests/Matrices/Legacy/SparseMatrixCopyTest.cu b/src/UnitTests/Matrices/Legacy/SparseMatrixCopyTest.cu
new file mode 100644
index 000000000..431fe481c
--- /dev/null
+++ b/src/UnitTests/Matrices/Legacy/SparseMatrixCopyTest.cu
@@ -0,0 +1,11 @@
+/***************************************************************************
+                          SparseMatrixCopyTest.cu  -  description
+                             -------------------
+    begin                : Jun 25, 2017
+    copyright            : (C) 2017 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include "SparseMatrixCopyTest.h"
diff --git a/src/UnitTests/Matrices/Legacy/SparseMatrixCopyTest.h b/src/UnitTests/Matrices/Legacy/SparseMatrixCopyTest.h
new file mode 100644
index 000000000..7069fd777
--- /dev/null
+++ b/src/UnitTests/Matrices/Legacy/SparseMatrixCopyTest.h
@@ -0,0 +1,573 @@
+/***************************************************************************
+                          SparseMatrixCopyTest.h -  description
+                             -------------------
+    begin                : Jun 25, 2017
+    copyright            : (C) 2017 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include <TNL/Matrices/Legacy/CSR.h>
+#include <TNL/Matrices/Legacy/Ellpack.h>
+#include <TNL/Matrices/Legacy/SlicedEllpack.h>
+
+#include <TNL/Matrices/SparseMatrix.h>
+#include <TNL/Matrices/MatrixType.h>
+#include <TNL/Containers/Segments/CSR.h>
+#include <TNL/Containers/Segments/Ellpack.h>
+#include <TNL/Containers/Segments/SlicedEllpack.h>
+
+/*using CSR_host = TNL::Matrices::CSR< int, TNL::Devices::Host, int >;
+using CSR_cuda = TNL::Matrices::CSR< int, TNL::Devices::Cuda, int >;
+using E_host = TNL::Matrices::Ellpack< int, TNL::Devices::Host, int >;
+using E_cuda = TNL::Matrices::Ellpack< int, TNL::Devices::Cuda, int >;
+using SE_host = TNL::Matrices::SlicedEllpack< int, TNL::Devices::Host, int, 2 >;
+using SE_cuda = TNL::Matrices::SlicedEllpack< int, TNL::Devices::Cuda, int, 2 >;*/
+
+template< typename Device, typename Index, typename IndexAllocator >
+using EllpackSegments = TNL::Containers::Segments::Ellpack< Device, Index, IndexAllocator >;
+
+template< typename Device, typename Index, typename IndexAllocator >
+using SlicedEllpackSegments = TNL::Containers::Segments::SlicedEllpack< Device, Index, IndexAllocator >;
+
+using CSR_host = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >;
+using CSR_cuda = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >;
+using E_host   = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, EllpackSegments >;
+using E_cuda   = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, EllpackSegments >;
+using SE_host  = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, SlicedEllpackSegments >;
+using SE_cuda  = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, SlicedEllpackSegments >;
+
+
+#ifdef HAVE_GTEST 
+#include <gtest/gtest.h>
+
+/*
+ * Sets up the following 10x6 sparse matrix:
+ *
+ *    /  1  2             \
+ *    |           3  4  5 |
+ *    |  6  7  8          |
+ *    |     9 10 11 12 13 |
+ *    | 14 15 16 17 18    |
+ *    | 19 20             |
+ *    | 21                |
+ *    | 22                |
+ *    | 23 24 25 26 27    |
+ *    \                28 /
+ */
+template< typename Matrix >
+void setupUnevenRowSizeMatrix( Matrix& m )
+{
+    const int rows = 10;
+    const int cols = 6;
+    m.reset();
+    m.setDimensions( rows, cols );
+    typename Matrix::CompressedRowLengthsVector rowLengths;
+    rowLengths.setSize( rows );
+    rowLengths.setValue( 5 );
+    rowLengths.setElement( 0, 2 );
+    rowLengths.setElement( 1,  3 );
+    rowLengths.setElement( 2,  3 );
+    rowLengths.setElement( 5,  2 );
+    rowLengths.setElement( 6,  1 );
+    rowLengths.setElement( 7,  1 );
+    rowLengths.setElement( 9,  1 );
+    m.setCompressedRowLengths( rowLengths );
+
+    int value = 1;
+    for( int i = 0; i < cols - 4; i++ )  // 0th row
+        m.setElement( 0, i, value++ );
+
+    for( int i = 3; i < cols; i++ )      // 1st row
+        m.setElement( 1, i, value++ );
+
+    for( int i = 0; i < cols - 3; i++ )  // 2nd row
+        m.setElement( 2, i, value++ );
+
+    for( int i = 1; i < cols; i++ )      // 3rd row
+        m.setElement( 3, i, value++ );
+
+    for( int i = 0; i < cols - 1; i++ )  // 4th row
+        m.setElement( 4, i, value++ );
+
+    for( int i = 0; i < cols - 4; i++ )  // 5th row
+        m.setElement( 5, i, value++ );
+
+    m.setElement( 6, 0, value++ );   // 6th row
+
+    m.setElement( 7, 0, value++ );   // 7th row
+
+    for( int i = 0; i < cols - 1; i++ )  // 8th row 
+        m.setElement( 8, i, value++ );
+
+    m.setElement( 9, 5, value++ );   // 9th row
+}
+
+template< typename Matrix >
+void checkUnevenRowSizeMatrix( Matrix& m )
+{
+   ASSERT_EQ( m.getRows(), 10 );
+   ASSERT_EQ( m.getColumns(), 6 );
+
+   EXPECT_EQ( m.getElement( 0, 0 ),  1 );
+   EXPECT_EQ( m.getElement( 0, 1 ),  2 );
+   EXPECT_EQ( m.getElement( 0, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 5 ),  0);
+
+   EXPECT_EQ( m.getElement( 1, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 3 ),  3 );
+   EXPECT_EQ( m.getElement( 1, 4 ),  4 );
+   EXPECT_EQ( m.getElement( 1, 5 ),  5 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ),  6 );
+   EXPECT_EQ( m.getElement( 2, 1 ),  7 );
+   EXPECT_EQ( m.getElement( 2, 2 ),  8 );
+   EXPECT_EQ( m.getElement( 2, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 3, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 1 ),  9 );
+   EXPECT_EQ( m.getElement( 3, 2 ), 10 );
+   EXPECT_EQ( m.getElement( 3, 3 ), 11 );
+   EXPECT_EQ( m.getElement( 3, 4 ), 12 );
+   EXPECT_EQ( m.getElement( 3, 5 ), 13 );
+
+   EXPECT_EQ( m.getElement( 4, 0 ), 14 );
+   EXPECT_EQ( m.getElement( 4, 1 ), 15 );
+   EXPECT_EQ( m.getElement( 4, 2 ), 16 );
+   EXPECT_EQ( m.getElement( 4, 3 ), 17 );
+   EXPECT_EQ( m.getElement( 4, 4 ), 18 );
+   EXPECT_EQ( m.getElement( 4, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 5, 0 ), 19 );
+   EXPECT_EQ( m.getElement( 5, 1 ), 20 );
+   EXPECT_EQ( m.getElement( 5, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 6, 0 ), 21 );
+   EXPECT_EQ( m.getElement( 6, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 5 ),  0 );
+   
+   EXPECT_EQ( m.getElement( 7, 0 ), 22 );
+   EXPECT_EQ( m.getElement( 7, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 5 ),  0 );
+   
+   EXPECT_EQ( m.getElement( 8, 0 ), 23 );
+   EXPECT_EQ( m.getElement( 8, 1 ), 24 );
+   EXPECT_EQ( m.getElement( 8, 2 ), 25 );
+   EXPECT_EQ( m.getElement( 8, 3 ), 26 );
+   EXPECT_EQ( m.getElement( 8, 4 ), 27 );
+   EXPECT_EQ( m.getElement( 8, 5 ),  0 );
+   
+   EXPECT_EQ( m.getElement( 9, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 9, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 9, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 9, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 9, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 9, 5 ), 28 );
+}
+
+/*
+ * Sets up the following 7x6 sparse matrix:
+ *
+ *    /              2  1 \
+ *    |           5  4  3 |
+ *    |        8  7  6    |
+ *    |    11 10  9       |
+ *    | 14 13 12          |
+ *    | 16 15             |
+ *    \ 17                /
+ */
+template< typename Matrix >
+void setupAntiTriDiagMatrix( Matrix& m )
+{
+    const int rows = 7;
+    const int cols = 6;
+    m.reset();
+    m.setDimensions( rows, cols );
+    typename Matrix::CompressedRowLengthsVector rowLengths;
+    rowLengths.setSize( rows );
+    rowLengths.setValue( 3 );
+    rowLengths.setElement( 0, 4);
+    rowLengths.setElement( 1,  4 );
+    m.setCompressedRowLengths( rowLengths );
+    
+    int value = 1;
+    for( int i = 0; i < rows; i++ )
+        for( int j = cols - 1; j > 2; j-- )
+            if( j - i + 1 < cols && j - i + 1 >= 0 )
+                m.setElement( i, j - i + 1, value++ );
+}
+
+template< typename Matrix >
+void checkAntiTriDiagMatrix( Matrix& m )
+{
+   ASSERT_EQ( m.getRows(), 7 );
+   ASSERT_EQ( m.getColumns(), 6 );
+
+   EXPECT_EQ( m.getElement( 0, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 4 ),  2 );
+   EXPECT_EQ( m.getElement( 0, 5 ),  1);
+
+   EXPECT_EQ( m.getElement( 1, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 3 ),  5 );
+   EXPECT_EQ( m.getElement( 1, 4 ),  4 );
+   EXPECT_EQ( m.getElement( 1, 5 ),  3 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 2 ),  8 );
+   EXPECT_EQ( m.getElement( 2, 3 ),  7 );
+   EXPECT_EQ( m.getElement( 2, 4 ),  6 );
+   EXPECT_EQ( m.getElement( 2, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 3, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 1 ), 11 );
+   EXPECT_EQ( m.getElement( 3, 2 ), 10 );
+   EXPECT_EQ( m.getElement( 3, 3 ),  9 );
+   EXPECT_EQ( m.getElement( 3, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 4, 0 ), 14 );
+   EXPECT_EQ( m.getElement( 4, 1 ), 13 );
+   EXPECT_EQ( m.getElement( 4, 2 ), 12 );
+   EXPECT_EQ( m.getElement( 4, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 5, 0 ), 16 );
+   EXPECT_EQ( m.getElement( 5, 1 ), 15 );
+   EXPECT_EQ( m.getElement( 5, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 6, 0 ), 17 );
+   EXPECT_EQ( m.getElement( 6, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 5 ),  0 );
+}
+
+/*
+ * Sets up the following 7x6 sparse matrix:
+ *
+ *    / 1  2             \
+ *    | 3  4  5          |
+ *    |    6  7  8       |
+ *    |       9 10 11    |
+ *    |         12 13 14 |
+ *    |            15 16 |
+ *    \               17 /
+ */
+template< typename Matrix >
+void setupTriDiagMatrix( Matrix& m )
+{
+   const int rows = 7;
+   const int cols = 6;
+   m.reset();
+   m.setDimensions( rows, cols );
+   typename Matrix::CompressedRowLengthsVector rowLengths;
+   rowLengths.setSize( rows );
+   rowLengths.setValue( 3 );
+   rowLengths.setElement( 0 , 4 );
+   rowLengths.setElement( 1,  4 );
+   m.setCompressedRowLengths( rowLengths );
+
+   int value = 1;
+   for( int i = 0; i < rows; i++ )
+      for( int j = 0; j < 3; j++ )
+         if( i + j - 1 >= 0 && i + j - 1 < cols )
+            m.setElement( i, i + j - 1, value++ );
+}
+
+template< typename Matrix >
+void checkTriDiagMatrix( Matrix& m )
+{
+   ASSERT_EQ( m.getRows(), 7 );
+   ASSERT_EQ( m.getColumns(), 6 );
+
+   EXPECT_EQ( m.getElement( 0, 0 ),  1 );
+   EXPECT_EQ( m.getElement( 0, 1 ),  2 );
+   EXPECT_EQ( m.getElement( 0, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 1, 0 ),  3 );
+   EXPECT_EQ( m.getElement( 1, 1 ),  4 );
+   EXPECT_EQ( m.getElement( 1, 2 ),  5 );
+   EXPECT_EQ( m.getElement( 1, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 1 ),  6 );
+   EXPECT_EQ( m.getElement( 2, 2 ),  7 );
+   EXPECT_EQ( m.getElement( 2, 3 ),  8 );
+   EXPECT_EQ( m.getElement( 2, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 3, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 2 ),  9 );
+   EXPECT_EQ( m.getElement( 3, 3 ), 10 );
+   EXPECT_EQ( m.getElement( 3, 4 ), 11 );
+   EXPECT_EQ( m.getElement( 3, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 4, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 3 ), 12 );
+   EXPECT_EQ( m.getElement( 4, 4 ), 13 );
+   EXPECT_EQ( m.getElement( 4, 5 ), 14 );
+
+   EXPECT_EQ( m.getElement( 5, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 4 ), 15 );
+   EXPECT_EQ( m.getElement( 5, 5 ), 16 );
+
+   EXPECT_EQ( m.getElement( 6, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 5 ), 17 );
+}
+
+template< typename Matrix1, typename Matrix2 >
+void testCopyAssignment()
+{
+   {
+      SCOPED_TRACE("Tri Diagonal Matrix");
+
+      Matrix1 triDiag1;
+      setupTriDiagMatrix( triDiag1 );
+      checkTriDiagMatrix( triDiag1 );
+
+      Matrix2 triDiag2;
+      triDiag2 = triDiag1;
+      checkTriDiagMatrix( triDiag2 );
+   }
+   {
+      SCOPED_TRACE("Anti Tri Diagonal Matrix");
+      Matrix1 antiTriDiag1;
+      setupAntiTriDiagMatrix( antiTriDiag1 );
+      checkAntiTriDiagMatrix( antiTriDiag1 );
+
+      Matrix2 antiTriDiag2;
+      antiTriDiag2 = antiTriDiag1;
+      checkAntiTriDiagMatrix( antiTriDiag2 );
+   }
+   {
+      SCOPED_TRACE("Uneven Row Size Matrix");
+      Matrix1 unevenRowSize1;
+      setupUnevenRowSizeMatrix( unevenRowSize1 );
+      checkUnevenRowSizeMatrix( unevenRowSize1 );
+
+      Matrix2 unevenRowSize2;
+      unevenRowSize2 = unevenRowSize1;
+      checkUnevenRowSizeMatrix( unevenRowSize2 );
+   }
+}
+
+template< typename Matrix1, typename Matrix2 >
+void testConversion()
+{
+    
+   {
+        SCOPED_TRACE("Tri Diagonal Matrix");
+        
+        Matrix1 triDiag1;
+        setupTriDiagMatrix( triDiag1 );
+        checkTriDiagMatrix( triDiag1 );
+        
+        Matrix2 triDiag2;
+        //TNL::Matrices::copySparseMatrix( triDiag2, triDiag1 );
+        triDiag2 = triDiag1;
+        checkTriDiagMatrix( triDiag2 );
+   }
+   
+   {
+        SCOPED_TRACE("Anti Tri Diagonal Matrix");
+                
+        Matrix1 antiTriDiag1;
+        setupAntiTriDiagMatrix( antiTriDiag1 );
+        checkAntiTriDiagMatrix( antiTriDiag1 );
+        
+        Matrix2 antiTriDiag2;
+        //TNL::Matrices::copySparseMatrix( antiTriDiag2, antiTriDiag1 );
+        antiTriDiag2 = antiTriDiag1;
+        checkAntiTriDiagMatrix( antiTriDiag2 );
+   }
+   
+   {
+        SCOPED_TRACE("Uneven Row Size Matrix");
+        Matrix1 unevenRowSize1;
+        setupUnevenRowSizeMatrix( unevenRowSize1 );
+        checkUnevenRowSizeMatrix( unevenRowSize1 );
+        
+        Matrix2 unevenRowSize2;
+        //TNL::Matrices::copySparseMatrix( unevenRowSize2, unevenRowSize1 );
+        unevenRowSize2 = unevenRowSize1;
+        checkUnevenRowSizeMatrix( unevenRowSize2 );
+   }
+}
+
+TEST( SparseMatrixCopyTest, CSR_HostToHost )
+{
+   testCopyAssignment< CSR_host, CSR_host >();
+}
+
+#ifdef HAVE_CUDA
+TEST( SparseMatrixCopyTest, CSR_HostToCuda )
+{
+   testCopyAssignment< CSR_host, CSR_cuda >();
+}
+
+TEST( SparseMatrixCopyTest, CSR_CudaToHost )
+{
+   testCopyAssignment< CSR_cuda, CSR_host >();
+}
+
+TEST( SparseMatrixCopyTest, CSR_CudaToCuda )
+{
+   testCopyAssignment< CSR_cuda, CSR_cuda >();
+}
+#endif
+
+
+TEST( SparseMatrixCopyTest, Ellpack_HostToHost )
+{
+   testCopyAssignment< E_host, E_host >();
+}
+
+#ifdef HAVE_CUDA
+TEST( SparseMatrixCopyTest, Ellpack_HostToCuda )
+{
+   testCopyAssignment< E_host, E_cuda >();
+}
+
+TEST( SparseMatrixCopyTest, Ellpack_CudaToHost )
+{
+   testCopyAssignment< E_cuda, E_host >();
+}
+
+TEST( SparseMatrixCopyTest, Ellpack_CudaToCuda )
+{
+   testCopyAssignment< E_cuda, E_cuda >();
+}
+#endif
+
+
+TEST( SparseMatrixCopyTest, SlicedEllpack_HostToHost )
+{
+   testCopyAssignment< SE_host, SE_host >();
+}
+
+#ifdef HAVE_CUDA
+TEST( SparseMatrixCopyTest, SlicedEllpack_HostToCuda )
+{
+   testCopyAssignment< SE_host, SE_cuda >();
+}
+
+TEST( SparseMatrixCopyTest, SlicedEllpack_CudaToHost )
+{
+   testCopyAssignment< SE_cuda, SE_host >();
+}
+
+TEST( SparseMatrixCopyTest, SlicedEllpack_CudaToCuda )
+{
+   testCopyAssignment< SE_cuda, SE_cuda >();
+}
+#endif
+
+
+// test conversion between formats
+TEST( SparseMatrixCopyTest, CSR_to_Ellpack_host )
+{
+   testConversion< CSR_host, E_host >();
+}
+
+TEST( SparseMatrixCopyTest, Ellpack_to_CSR_host )
+{
+   testConversion< E_host, CSR_host >();
+}
+
+TEST( SparseMatrixCopyTest, CSR_to_SlicedEllpack_host )
+{
+   testConversion< CSR_host, SE_host >();
+}
+
+TEST( SparseMatrixCopyTest, SlicedEllpack_to_CSR_host )
+{
+   testConversion< SE_host, CSR_host >();
+}
+
+TEST( SparseMatrixCopyTest, Ellpack_to_SlicedEllpack_host )
+{
+   testConversion< E_host, SE_host >();
+}
+
+TEST( SparseMatrixCopyTest, SlicedEllpack_to_Ellpack_host )
+{
+   testConversion< SE_host, E_host >();
+}
+
+#ifdef HAVE_CUDA
+TEST( SparseMatrixCopyTest, CSR_to_Ellpack_cuda )
+{
+   testConversion< CSR_cuda, E_cuda >();
+}
+
+TEST( SparseMatrixCopyTest, Ellpack_to_CSR_cuda )
+{
+   testConversion< E_cuda, CSR_cuda >();
+}
+
+TEST( SparseMatrixCopyTest, CSR_to_SlicedEllpack_cuda )
+{
+   testConversion< CSR_cuda, SE_cuda >();
+}
+
+TEST( SparseMatrixCopyTest, SlicedEllpack_to_CSR_cuda )
+{
+   testConversion< SE_cuda, CSR_cuda >();
+}
+
+TEST( SparseMatrixCopyTest, Ellpack_to_SlicedEllpack_cuda )
+{
+   testConversion< E_cuda, SE_cuda >();
+}
+
+TEST( SparseMatrixCopyTest, SlicedEllpack_to_Ellpack_cuda )
+{
+   testConversion< SE_cuda, E_cuda >();
+}
+#endif
+
+#endif
+
+#include "../../main.h"
diff --git a/src/UnitTests/Matrices/Legacy/SparseMatrixTest.cpp b/src/UnitTests/Matrices/Legacy/SparseMatrixTest.cpp
new file mode 100644
index 000000000..46f6b9bd3
--- /dev/null
+++ b/src/UnitTests/Matrices/Legacy/SparseMatrixTest.cpp
@@ -0,0 +1,11 @@
+/***************************************************************************
+                          SparseMatrixTest.cpp -  description
+                             -------------------
+    begin                : Nov 2, 2018
+    copyright            : (C) 2018 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include "SparseMatrixTest.h"
\ No newline at end of file
diff --git a/src/UnitTests/Matrices/Legacy/SparseMatrixTest.cu b/src/UnitTests/Matrices/Legacy/SparseMatrixTest.cu
new file mode 100644
index 000000000..01c23c193
--- /dev/null
+++ b/src/UnitTests/Matrices/Legacy/SparseMatrixTest.cu
@@ -0,0 +1,11 @@
+/***************************************************************************
+                          SparseMatrixTest.cu -  description
+                             -------------------
+    begin                : Nov 2, 2018
+    copyright            : (C) 2018 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include "SparseMatrixTest.h"
\ No newline at end of file
diff --git a/src/UnitTests/Matrices/Legacy/SparseMatrixTest.h b/src/UnitTests/Matrices/Legacy/SparseMatrixTest.h
new file mode 100644
index 000000000..ed8bec796
--- /dev/null
+++ b/src/UnitTests/Matrices/Legacy/SparseMatrixTest.h
@@ -0,0 +1,39 @@
+/***************************************************************************
+                          SparseMatrixTest.h -  description
+                             -------------------
+    begin                : Nov 2, 2018
+    copyright            : (C) 2018 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include <TNL/Matrices/Legacy/CSR.h>
+
+#include "SparseMatrixTest.hpp"
+#include <iostream>
+
+#ifdef HAVE_GTEST 
+#include <gtest/gtest.h>
+
+using CSR_host_float = TNL::Matrices::CSR< float, TNL::Devices::Host, int >;
+using CSR_host_int = TNL::Matrices::CSR< int, TNL::Devices::Host, int >;
+
+using CSR_cuda_float = TNL::Matrices::CSR< float, TNL::Devices::Cuda, int >;
+using CSR_cuda_int = TNL::Matrices::CSR< int, TNL::Devices::Cuda, int >;
+
+TEST( SparseMatrixTest, CSR_perforSORIterationTest_Host )
+{
+    test_PerformSORIteration< CSR_host_float >();
+}
+
+#ifdef HAVE_CUDA
+TEST( SparseMatrixTest, CSR_perforSORIterationTest_Cuda )
+{
+   //    test_PerformSORIteration< CSR_cuda_float >();
+}
+#endif
+
+#endif
+
+#include "../../main.h"
diff --git a/src/UnitTests/Matrices/Legacy/SparseMatrixTest.hpp b/src/UnitTests/Matrices/Legacy/SparseMatrixTest.hpp
new file mode 100644
index 000000000..c6ff5cbd7
--- /dev/null
+++ b/src/UnitTests/Matrices/Legacy/SparseMatrixTest.hpp
@@ -0,0 +1,1831 @@
+/***************************************************************************
+                          SparseMatrixTest_impl.h -  description
+                             -------------------
+    begin                : Nov 22, 2018
+    copyright            : (C) 2018 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include <TNL/Containers/Vector.h>
+#include <TNL/Containers/VectorView.h>
+#include <TNL/Math.h>
+#include <TNL/Algorithms/ParallelFor.h>
+#include <iostream>
+
+// Temporary, until test_OperatorEquals doesn't work for all formats.
+#include <TNL/Matrices/Legacy/ChunkedEllpack.h>
+#include <TNL/Matrices/Legacy/AdEllpack.h>
+#include <TNL/Matrices/Legacy/BiEllpack.h>
+
+#ifdef HAVE_GTEST
+#include <gtest/gtest.h>
+
+template< typename MatrixHostFloat, typename MatrixHostInt >
+void host_test_GetType()
+{
+    bool testRan = false;
+    EXPECT_TRUE( testRan );
+    std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n";
+    std::cerr << "This test has not been implemented properly yet.\n" << std::endl;
+}
+
+template< typename MatrixCudaFloat, typename MatrixCudaInt >
+void cuda_test_GetType()
+{
+    bool testRan = false;
+    EXPECT_TRUE( testRan );
+    std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n";
+    std::cerr << "This test has not been implemented properly yet.\n" << std::endl;
+}
+
+template< typename Matrix >
+void test_SetDimensions()
+{
+    using RealType = typename Matrix::RealType;
+    using DeviceType = typename Matrix::DeviceType;
+    using IndexType = typename Matrix::IndexType;
+
+    const IndexType rows = 9;
+    const IndexType cols = 8;
+
+    Matrix m;
+    m.setDimensions( rows, cols );
+
+    EXPECT_EQ( m.getRows(), 9 );
+    EXPECT_EQ( m.getColumns(), 8 );
+}
+
+template< typename Matrix >
+void test_SetCompressedRowLengths()
+{
+    using RealType = typename Matrix::RealType;
+    using DeviceType = typename Matrix::DeviceType;
+    using IndexType = typename Matrix::IndexType;
+
+    const IndexType rows = 10;
+    const IndexType cols = 11;
+
+    Matrix m;
+    m.reset();
+    m.setDimensions( rows, cols );
+    typename Matrix::CompressedRowLengthsVector rowLengths;
+    rowLengths.setSize( rows );
+    rowLengths.setValue( 3 );
+
+    IndexType rowLength = 1;
+    for( IndexType i = 2; i < rows; i++ )
+        rowLengths.setElement( i, rowLength++ );
+
+    m.setCompressedRowLengths( rowLengths );
+
+    // Insert values into the rows.
+    RealType value = 1;
+
+    for( IndexType i = 0; i < 3; i++ )      // 0th row
+        m.setElement( 0, i, value++ );
+
+    for( IndexType i = 0; i < 3; i++ )      // 1st row
+        m.setElement( 1, i, value++ );
+
+    for( IndexType i = 0; i < 1; i++ )      // 2nd row
+        m.setElement( 2, i, value++ );
+
+    for( IndexType i = 0; i < 2; i++ )      // 3rd row
+        m.setElement( 3, i, value++ );
+
+    for( IndexType i = 0; i < 3; i++ )      // 4th row
+        m.setElement( 4, i, value++ );
+
+    for( IndexType i = 0; i < 4; i++ )      // 5th row
+        m.setElement( 5, i, value++ );
+
+    for( IndexType i = 0; i < 5; i++ )      // 6th row
+        m.setElement( 6, i, value++ );
+
+    for( IndexType i = 0; i < 6; i++ )      // 7th row
+        m.setElement( 7, i, value++ );
+
+    for( IndexType i = 0; i < 7; i++ )      // 8th row
+        m.setElement( 8, i, value++ );
+
+    for( IndexType i = 0; i < 8; i++ )      // 9th row
+        m.setElement( 9, i, value++ );
+
+
+    EXPECT_EQ( m.getNonZeroRowLength( 0 ), 3 );
+    EXPECT_EQ( m.getNonZeroRowLength( 1 ), 3 );
+    EXPECT_EQ( m.getNonZeroRowLength( 2 ), 1 );
+    EXPECT_EQ( m.getNonZeroRowLength( 3 ), 2 );
+    EXPECT_EQ( m.getNonZeroRowLength( 4 ), 3 );
+    EXPECT_EQ( m.getNonZeroRowLength( 5 ), 4 );
+    EXPECT_EQ( m.getNonZeroRowLength( 6 ), 5 );
+    EXPECT_EQ( m.getNonZeroRowLength( 7 ), 6 );
+    EXPECT_EQ( m.getNonZeroRowLength( 8 ), 7 );
+    EXPECT_EQ( m.getNonZeroRowLength( 9 ), 8 );
+}
+
+template< typename Matrix1, typename Matrix2 >
+void test_SetLike()
+{
+    using RealType = typename Matrix1::RealType;
+    using DeviceType = typename Matrix1::DeviceType;
+    using IndexType = typename Matrix1::IndexType;
+
+    const IndexType rows = 8;
+    const IndexType cols = 7;
+
+    Matrix1 m1;
+    m1.reset();
+    m1.setDimensions( rows + 1, cols + 2 );
+
+    Matrix2 m2;
+    m2.reset();
+    m2.setDimensions( rows, cols );
+
+    m1.setLike( m2 );
+
+
+    EXPECT_EQ( m1.getRows(), m2.getRows() );
+    EXPECT_EQ( m1.getColumns(), m2.getColumns() );
+}
+
+template< typename Matrix >
+void test_GetNumberOfNonzeroMatrixElements()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /*
+    * Sets up the following 10x10 sparse matrix:
+    *
+    *    /  1  0  2  0  3  0  4  0  0  0  \
+    *    |  5  6  7  0  0  0  0  0  0  0  |
+    *    |  8  9 10 11 12 13 14 15  0  0  |
+    *    | 16 17  0  0  0  0  0  0  0  0  |
+    *    | 18  0  0  0  0  0  0  0  0  0  |
+    *    | 19  0  0  0  0  0  0  0  0  0  |
+    *    | 20  0  0  0  0  0  0  0  0  0  |
+    *    | 21  0  0  0  0  0  0  0  0  0  |
+    *    | 22 23 24 25 26 27 28 29 30 31  |
+    *    \ 32 33 34 35 36 37 38 39 40 41 /
+    */
+
+   const IndexType rows = 10;
+   const IndexType cols = 10;
+
+   Matrix m;
+   m.reset();
+
+   m.setDimensions( rows, cols );
+
+   typename Matrix::CompressedRowLengthsVector rowLengths;
+   rowLengths.setSize( rows );
+   rowLengths.setElement( 0, 4 );
+   rowLengths.setElement( 1, 3 );
+   rowLengths.setElement( 2, 8 );
+   rowLengths.setElement( 3, 2 );
+   for( IndexType i = 4; i < rows - 2; i++ )
+   {
+      rowLengths.setElement( i, 1 );
+   }
+   rowLengths.setElement( 8, 10 );
+   rowLengths.setElement( 9, 10 );
+   m.setCompressedRowLengths( rowLengths );
+
+   RealType value = 1;
+   for( IndexType i = 0; i < 4; i++ )
+      m.setElement( 0, 2 * i, value++ );
+
+   for( IndexType i = 0; i < 3; i++ )
+      m.setElement( 1, i, value++ );
+
+   for( IndexType i = 0; i < 8; i++ )
+      m.setElement( 2, i, value++ );
+
+   for( IndexType i = 0; i < 2; i++ )
+      m.setElement( 3, i, value++ );
+
+   for( IndexType i = 4; i < 8; i++ )
+      m.setElement( i, 0, value++ );
+
+   for( IndexType j = 8; j < rows; j++)
+   {
+      for( IndexType i = 0; i < cols; i++ )
+         m.setElement( j, i, value++ );
+   }
+
+   EXPECT_EQ( m.getNumberOfNonzeroMatrixElements(), 41 );
+}
+
+template< typename Matrix >
+void test_Reset()
+{
+    using RealType = typename Matrix::RealType;
+    using DeviceType = typename Matrix::DeviceType;
+    using IndexType = typename Matrix::IndexType;
+
+/*
+ * Sets up the following 5x4 sparse matrix:
+ *
+ *    /  0  0  0  0 \
+ *    |  0  0  0  0 |
+ *    |  0  0  0  0 |
+ *    |  0  0  0  0 |
+ *    \  0  0  0  0 /
+ */
+
+    const IndexType rows = 5;
+    const IndexType cols = 4;
+
+    Matrix m;
+    m.setDimensions( rows, cols );
+
+    m.reset();
+
+
+    EXPECT_EQ( m.getRows(), 0 );
+    EXPECT_EQ( m.getColumns(), 0 );
+}
+
+template< typename Matrix >
+void test_GetRow()
+{
+    using RealType = typename Matrix::RealType;
+    using DeviceType = typename Matrix::DeviceType;
+    using IndexType = typename Matrix::IndexType;
+
+/*
+ * Sets up the following 10x10 sparse matrix:
+ *
+ *    /  1  0  2  0  3  0  4  0  0  0  \
+ *    |  5  6  7  0  0  0  0  0  0  0  |
+ *    |  8  9 10 11 12 13 14 15  0  0  |
+ *    | 16 17  0  0  0  0  0  0  0  0  |
+ *    | 18  0  0  0  0  0  0  0  0  0  |
+ *    | 19  0  0  0  0  0  0  0  0  0  |
+ *    | 20  0  0  0  0  0  0  0  0  0  |
+ *    | 21  0  0  0  0  0  0  0  0  0  |
+ *    | 22 23 24 25 26 27 28 29 30 31  |
+ *    \ 32 33 34 35 36 37 38 39 40 41 /
+ */
+
+    const IndexType rows = 10;
+    const IndexType cols = 10;
+
+    Matrix m( rows, cols );
+
+    typename Matrix::CompressedRowLengthsVector rowLengths;
+    rowLengths.setSize( rows );
+    rowLengths.setElement( 0, 4 );
+    rowLengths.setElement( 1, 3 );
+    rowLengths.setElement( 2, 8 );
+    rowLengths.setElement( 3, 2 );
+    for( IndexType i = 4; i < rows - 2; i++ )
+    {
+        rowLengths.setElement( i, 1 );
+    }
+    rowLengths.setElement( 8, 10 );
+    rowLengths.setElement( 9, 10 );
+    m.setCompressedRowLengths( rowLengths );
+
+    /*RealType value = 1;
+    for( IndexType i = 0; i < 4; i++ )
+        m.setElement( 0, 2 * i, value++ );
+
+    for( IndexType i = 0; i < 3; i++ )
+        m.setElement( 1, i, value++ );
+
+    for( IndexType i = 0; i < 8; i++ )
+        m.setElement( 2, i, value++ );
+
+    for( IndexType i = 0; i < 2; i++ )
+        m.setElement( 3, i, value++ );
+
+    for( IndexType i = 4; i < 8; i++ )
+        m.setElement( i, 0, value++ );
+
+    for( IndexType j = 8; j < rows; j++)
+    {
+        for( IndexType i = 0; i < cols; i++ )
+            m.setElement( j, i, value++ );
+    }*/
+    auto matrixView = m.getView();
+    auto f = [=] __cuda_callable__ ( const IndexType rowIdx ) mutable {
+       auto row = matrixView.getRow( rowIdx );
+       RealType val;
+       switch( rowIdx )
+       {
+          case 0:
+            val = 1;
+            for( IndexType i = 0; i < 4; i++ )
+               row.setElement( i, 2 * i, val++ );
+            break;
+         case 1:
+            val = 5;
+            for( IndexType i = 0; i < 3; i++ )
+               row.setElement( i, i, val++ );
+            break;
+         case 2:
+            val = 8;
+            for( IndexType i = 0; i < 8; i++ )
+               row.setElement( i, i, val++ );
+            break;
+         case 3:
+            val = 16;
+            for( IndexType i = 0; i < 2; i++ )
+               row.setElement( i, i, val++ );
+            break;
+         case 4:
+            row.setElement( 0, 0, 18 );
+            break;
+         case 5:
+            row.setElement( 0, 0, 19 );
+            break;
+         case 6:
+            row.setElement( 0, 0, 20 );
+            break;
+         case 7:
+            row.setElement( 0, 0, 21 );
+            break;
+         case 8:
+             val = 22;
+             for( IndexType i = 0; i < rows; i++ )
+                row.setElement( i, i, val++ );
+             break;
+         case 9:
+             val = 32;
+             for( IndexType i = 0; i < rows; i++ )
+                row.setElement( i, i, val++ );
+             break;
+       }
+    };
+    TNL::Algorithms::ParallelFor< DeviceType >::exec( ( IndexType ) 0, rows, f );
+
+    EXPECT_EQ( m.getElement( 0, 0 ),  1 );
+    EXPECT_EQ( m.getElement( 0, 1 ),  0 );
+    EXPECT_EQ( m.getElement( 0, 2 ),  2 );
+    EXPECT_EQ( m.getElement( 0, 3 ),  0 );
+    EXPECT_EQ( m.getElement( 0, 4 ),  3 );
+    EXPECT_EQ( m.getElement( 0, 5 ),  0 );
+    EXPECT_EQ( m.getElement( 0, 6 ),  4 );
+    EXPECT_EQ( m.getElement( 0, 7 ),  0 );
+    EXPECT_EQ( m.getElement( 0, 8 ),  0 );
+    EXPECT_EQ( m.getElement( 0, 9 ),  0 );
+
+    EXPECT_EQ( m.getElement( 1, 0 ),  5 );
+    EXPECT_EQ( m.getElement( 1, 1 ),  6 );
+    EXPECT_EQ( m.getElement( 1, 2 ),  7 );
+    EXPECT_EQ( m.getElement( 1, 3 ),  0 );
+    EXPECT_EQ( m.getElement( 1, 4 ),  0 );
+    EXPECT_EQ( m.getElement( 1, 5 ),  0 );
+    EXPECT_EQ( m.getElement( 1, 6 ),  0 );
+    EXPECT_EQ( m.getElement( 1, 7 ),  0 );
+    EXPECT_EQ( m.getElement( 1, 8 ),  0 );
+    EXPECT_EQ( m.getElement( 1, 9 ),  0 );
+
+    EXPECT_EQ( m.getElement( 2, 0 ),  8 );
+    EXPECT_EQ( m.getElement( 2, 1 ),  9 );
+    EXPECT_EQ( m.getElement( 2, 2 ), 10 );
+    EXPECT_EQ( m.getElement( 2, 3 ), 11 );
+    EXPECT_EQ( m.getElement( 2, 4 ), 12 );
+    EXPECT_EQ( m.getElement( 2, 5 ), 13 );
+    EXPECT_EQ( m.getElement( 2, 6 ), 14 );
+    EXPECT_EQ( m.getElement( 2, 7 ), 15 );
+    EXPECT_EQ( m.getElement( 2, 8 ),  0 );
+    EXPECT_EQ( m.getElement( 2, 9 ),  0 );
+
+    EXPECT_EQ( m.getElement( 3, 0 ), 16 );
+    EXPECT_EQ( m.getElement( 3, 1 ), 17 );
+    EXPECT_EQ( m.getElement( 3, 2 ),  0 );
+    EXPECT_EQ( m.getElement( 3, 3 ),  0 );
+    EXPECT_EQ( m.getElement( 3, 4 ),  0 );
+    EXPECT_EQ( m.getElement( 3, 5 ),  0 );
+    EXPECT_EQ( m.getElement( 3, 6 ),  0 );
+    EXPECT_EQ( m.getElement( 3, 7 ),  0 );
+    EXPECT_EQ( m.getElement( 3, 8 ),  0 );
+    EXPECT_EQ( m.getElement( 3, 9 ),  0 );
+
+    EXPECT_EQ( m.getElement( 4, 0 ), 18 );
+    EXPECT_EQ( m.getElement( 4, 1 ),  0 );
+    EXPECT_EQ( m.getElement( 4, 2 ),  0 );
+    EXPECT_EQ( m.getElement( 4, 3 ),  0 );
+    EXPECT_EQ( m.getElement( 4, 4 ),  0 );
+    EXPECT_EQ( m.getElement( 4, 5 ),  0 );
+    EXPECT_EQ( m.getElement( 4, 6 ),  0 );
+    EXPECT_EQ( m.getElement( 4, 7 ),  0 );
+    EXPECT_EQ( m.getElement( 4, 8 ),  0 );
+    EXPECT_EQ( m.getElement( 4, 9 ),  0 );
+
+    EXPECT_EQ( m.getElement( 5, 0 ), 19 );
+    EXPECT_EQ( m.getElement( 5, 1 ),  0 );
+    EXPECT_EQ( m.getElement( 5, 2 ),  0 );
+    EXPECT_EQ( m.getElement( 5, 3 ),  0 );
+    EXPECT_EQ( m.getElement( 5, 4 ),  0 );
+    EXPECT_EQ( m.getElement( 5, 5 ),  0 );
+    EXPECT_EQ( m.getElement( 5, 6 ),  0 );
+    EXPECT_EQ( m.getElement( 5, 7 ),  0 );
+    EXPECT_EQ( m.getElement( 5, 8 ),  0 );
+    EXPECT_EQ( m.getElement( 5, 9 ),  0 );
+
+    EXPECT_EQ( m.getElement( 6, 0 ), 20 );
+    EXPECT_EQ( m.getElement( 6, 1 ),  0 );
+    EXPECT_EQ( m.getElement( 6, 2 ),  0 );
+    EXPECT_EQ( m.getElement( 6, 3 ),  0 );
+    EXPECT_EQ( m.getElement( 6, 4 ),  0 );
+    EXPECT_EQ( m.getElement( 6, 5 ),  0 );
+    EXPECT_EQ( m.getElement( 6, 6 ),  0 );
+    EXPECT_EQ( m.getElement( 6, 7 ),  0 );
+    EXPECT_EQ( m.getElement( 6, 8 ),  0 );
+    EXPECT_EQ( m.getElement( 6, 9 ),  0 );
+
+    EXPECT_EQ( m.getElement( 7, 0 ), 21 );
+    EXPECT_EQ( m.getElement( 7, 1 ),  0 );
+    EXPECT_EQ( m.getElement( 7, 2 ),  0 );
+    EXPECT_EQ( m.getElement( 7, 3 ),  0 );
+    EXPECT_EQ( m.getElement( 7, 4 ),  0 );
+    EXPECT_EQ( m.getElement( 7, 5 ),  0 );
+    EXPECT_EQ( m.getElement( 7, 6 ),  0 );
+    EXPECT_EQ( m.getElement( 7, 7 ),  0 );
+    EXPECT_EQ( m.getElement( 7, 8 ),  0 );
+    EXPECT_EQ( m.getElement( 7, 9 ),  0 );
+
+    EXPECT_EQ( m.getElement( 8, 0 ), 22 );
+    EXPECT_EQ( m.getElement( 8, 1 ), 23 );
+    EXPECT_EQ( m.getElement( 8, 2 ), 24 );
+    EXPECT_EQ( m.getElement( 8, 3 ), 25 );
+    EXPECT_EQ( m.getElement( 8, 4 ), 26 );
+    EXPECT_EQ( m.getElement( 8, 5 ), 27 );
+    EXPECT_EQ( m.getElement( 8, 6 ), 28 );
+    EXPECT_EQ( m.getElement( 8, 7 ), 29 );
+    EXPECT_EQ( m.getElement( 8, 8 ), 30 );
+    EXPECT_EQ( m.getElement( 8, 9 ), 31 );
+
+    EXPECT_EQ( m.getElement( 9, 0 ), 32 );
+    EXPECT_EQ( m.getElement( 9, 1 ), 33 );
+    EXPECT_EQ( m.getElement( 9, 2 ), 34 );
+    EXPECT_EQ( m.getElement( 9, 3 ), 35 );
+    EXPECT_EQ( m.getElement( 9, 4 ), 36 );
+    EXPECT_EQ( m.getElement( 9, 5 ), 37 );
+    EXPECT_EQ( m.getElement( 9, 6 ), 38 );
+    EXPECT_EQ( m.getElement( 9, 7 ), 39 );
+    EXPECT_EQ( m.getElement( 9, 8 ), 40 );
+    EXPECT_EQ( m.getElement( 9, 9 ), 41 );
+}
+
+
+template< typename Matrix >
+void test_SetElement()
+{
+    using RealType = typename Matrix::RealType;
+    using DeviceType = typename Matrix::DeviceType;
+    using IndexType = typename Matrix::IndexType;
+
+/*
+ * Sets up the following 10x10 sparse matrix:
+ *
+ *    /  1  0  2  0  3  0  4  0  0  0  \
+ *    |  5  6  7  0  0  0  0  0  0  0  |
+ *    |  8  9 10 11 12 13 14 15  0  0  |
+ *    | 16 17  0  0  0  0  0  0  0  0  |
+ *    | 18  0  0  0  0  0  0  0  0  0  |
+ *    | 19  0  0  0  0  0  0  0  0  0  |
+ *    | 20  0  0  0  0  0  0  0  0  0  |
+ *    | 21  0  0  0  0  0  0  0  0  0  |
+ *    | 22 23 24 25 26 27 28 29 30 31  |
+ *    \ 32 33 34 35 36 37 38 39 40 41 /
+ */
+
+    const IndexType rows = 10;
+    const IndexType cols = 10;
+
+    Matrix m;
+    m.reset();
+
+    m.setDimensions( rows, cols );
+
+    typename Matrix::CompressedRowLengthsVector rowLengths;
+    rowLengths.setSize( rows );
+    rowLengths.setElement( 0, 4 );
+    rowLengths.setElement( 1, 3 );
+    rowLengths.setElement( 2, 8 );
+    rowLengths.setElement( 3, 2 );
+    for( IndexType i = 4; i < rows - 2; i++ )
+    {
+        rowLengths.setElement( i, 1 );
+    }
+    rowLengths.setElement( 8, 10 );
+    rowLengths.setElement( 9, 10 );
+    m.setCompressedRowLengths( rowLengths );
+
+    RealType value = 1;
+    for( IndexType i = 0; i < 4; i++ )
+        m.setElement( 0, 2 * i, value++ );
+
+    for( IndexType i = 0; i < 3; i++ )
+        m.setElement( 1, i, value++ );
+
+    for( IndexType i = 0; i < 8; i++ )
+        m.setElement( 2, i, value++ );
+
+    for( IndexType i = 0; i < 2; i++ )
+        m.setElement( 3, i, value++ );
+
+    for( IndexType i = 4; i < 8; i++ )
+        m.setElement( i, 0, value++ );
+
+    for( IndexType j = 8; j < rows; j++)
+    {
+        for( IndexType i = 0; i < cols; i++ )
+            m.setElement( j, i, value++ );
+    }
+
+    EXPECT_EQ( m.getElement( 0, 0 ),  1 );
+    EXPECT_EQ( m.getElement( 0, 1 ),  0 );
+    EXPECT_EQ( m.getElement( 0, 2 ),  2 );
+    EXPECT_EQ( m.getElement( 0, 3 ),  0 );
+    EXPECT_EQ( m.getElement( 0, 4 ),  3 );
+    EXPECT_EQ( m.getElement( 0, 5 ),  0 );
+    EXPECT_EQ( m.getElement( 0, 6 ),  4 );
+    EXPECT_EQ( m.getElement( 0, 7 ),  0 );
+    EXPECT_EQ( m.getElement( 0, 8 ),  0 );
+    EXPECT_EQ( m.getElement( 0, 9 ),  0 );
+
+    EXPECT_EQ( m.getElement( 1, 0 ),  5 );
+    EXPECT_EQ( m.getElement( 1, 1 ),  6 );
+    EXPECT_EQ( m.getElement( 1, 2 ),  7 );
+    EXPECT_EQ( m.getElement( 1, 3 ),  0 );
+    EXPECT_EQ( m.getElement( 1, 4 ),  0 );
+    EXPECT_EQ( m.getElement( 1, 5 ),  0 );
+    EXPECT_EQ( m.getElement( 1, 6 ),  0 );
+    EXPECT_EQ( m.getElement( 1, 7 ),  0 );
+    EXPECT_EQ( m.getElement( 1, 8 ),  0 );
+    EXPECT_EQ( m.getElement( 1, 9 ),  0 );
+
+    EXPECT_EQ( m.getElement( 2, 0 ),  8 );
+    EXPECT_EQ( m.getElement( 2, 1 ),  9 );
+    EXPECT_EQ( m.getElement( 2, 2 ), 10 );
+    EXPECT_EQ( m.getElement( 2, 3 ), 11 );
+    EXPECT_EQ( m.getElement( 2, 4 ), 12 );
+    EXPECT_EQ( m.getElement( 2, 5 ), 13 );
+    EXPECT_EQ( m.getElement( 2, 6 ), 14 );
+    EXPECT_EQ( m.getElement( 2, 7 ), 15 );
+    EXPECT_EQ( m.getElement( 2, 8 ),  0 );
+    EXPECT_EQ( m.getElement( 2, 9 ),  0 );
+
+    EXPECT_EQ( m.getElement( 3, 0 ), 16 );
+    EXPECT_EQ( m.getElement( 3, 1 ), 17 );
+    EXPECT_EQ( m.getElement( 3, 2 ),  0 );
+    EXPECT_EQ( m.getElement( 3, 3 ),  0 );
+    EXPECT_EQ( m.getElement( 3, 4 ),  0 );
+    EXPECT_EQ( m.getElement( 3, 5 ),  0 );
+    EXPECT_EQ( m.getElement( 3, 6 ),  0 );
+    EXPECT_EQ( m.getElement( 3, 7 ),  0 );
+    EXPECT_EQ( m.getElement( 3, 8 ),  0 );
+    EXPECT_EQ( m.getElement( 3, 9 ),  0 );
+
+    EXPECT_EQ( m.getElement( 4, 0 ), 18 );
+    EXPECT_EQ( m.getElement( 4, 1 ),  0 );
+    EXPECT_EQ( m.getElement( 4, 2 ),  0 );
+    EXPECT_EQ( m.getElement( 4, 3 ),  0 );
+    EXPECT_EQ( m.getElement( 4, 4 ),  0 );
+    EXPECT_EQ( m.getElement( 4, 5 ),  0 );
+    EXPECT_EQ( m.getElement( 4, 6 ),  0 );
+    EXPECT_EQ( m.getElement( 4, 7 ),  0 );
+    EXPECT_EQ( m.getElement( 4, 8 ),  0 );
+    EXPECT_EQ( m.getElement( 4, 9 ),  0 );
+
+    EXPECT_EQ( m.getElement( 5, 0 ), 19 );
+    EXPECT_EQ( m.getElement( 5, 1 ),  0 );
+    EXPECT_EQ( m.getElement( 5, 2 ),  0 );
+    EXPECT_EQ( m.getElement( 5, 3 ),  0 );
+    EXPECT_EQ( m.getElement( 5, 4 ),  0 );
+    EXPECT_EQ( m.getElement( 5, 5 ),  0 );
+    EXPECT_EQ( m.getElement( 5, 6 ),  0 );
+    EXPECT_EQ( m.getElement( 5, 7 ),  0 );
+    EXPECT_EQ( m.getElement( 5, 8 ),  0 );
+    EXPECT_EQ( m.getElement( 5, 9 ),  0 );
+
+    EXPECT_EQ( m.getElement( 6, 0 ), 20 );
+    EXPECT_EQ( m.getElement( 6, 1 ),  0 );
+    EXPECT_EQ( m.getElement( 6, 2 ),  0 );
+    EXPECT_EQ( m.getElement( 6, 3 ),  0 );
+    EXPECT_EQ( m.getElement( 6, 4 ),  0 );
+    EXPECT_EQ( m.getElement( 6, 5 ),  0 );
+    EXPECT_EQ( m.getElement( 6, 6 ),  0 );
+    EXPECT_EQ( m.getElement( 6, 7 ),  0 );
+    EXPECT_EQ( m.getElement( 6, 8 ),  0 );
+    EXPECT_EQ( m.getElement( 6, 9 ),  0 );
+
+    EXPECT_EQ( m.getElement( 7, 0 ), 21 );
+    EXPECT_EQ( m.getElement( 7, 1 ),  0 );
+    EXPECT_EQ( m.getElement( 7, 2 ),  0 );
+    EXPECT_EQ( m.getElement( 7, 3 ),  0 );
+    EXPECT_EQ( m.getElement( 7, 4 ),  0 );
+    EXPECT_EQ( m.getElement( 7, 5 ),  0 );
+    EXPECT_EQ( m.getElement( 7, 6 ),  0 );
+    EXPECT_EQ( m.getElement( 7, 7 ),  0 );
+    EXPECT_EQ( m.getElement( 7, 8 ),  0 );
+    EXPECT_EQ( m.getElement( 7, 9 ),  0 );
+
+    EXPECT_EQ( m.getElement( 8, 0 ), 22 );
+    EXPECT_EQ( m.getElement( 8, 1 ), 23 );
+    EXPECT_EQ( m.getElement( 8, 2 ), 24 );
+    EXPECT_EQ( m.getElement( 8, 3 ), 25 );
+    EXPECT_EQ( m.getElement( 8, 4 ), 26 );
+    EXPECT_EQ( m.getElement( 8, 5 ), 27 );
+    EXPECT_EQ( m.getElement( 8, 6 ), 28 );
+    EXPECT_EQ( m.getElement( 8, 7 ), 29 );
+    EXPECT_EQ( m.getElement( 8, 8 ), 30 );
+    EXPECT_EQ( m.getElement( 8, 9 ), 31 );
+
+    EXPECT_EQ( m.getElement( 9, 0 ), 32 );
+    EXPECT_EQ( m.getElement( 9, 1 ), 33 );
+    EXPECT_EQ( m.getElement( 9, 2 ), 34 );
+    EXPECT_EQ( m.getElement( 9, 3 ), 35 );
+    EXPECT_EQ( m.getElement( 9, 4 ), 36 );
+    EXPECT_EQ( m.getElement( 9, 5 ), 37 );
+    EXPECT_EQ( m.getElement( 9, 6 ), 38 );
+    EXPECT_EQ( m.getElement( 9, 7 ), 39 );
+    EXPECT_EQ( m.getElement( 9, 8 ), 40 );
+    EXPECT_EQ( m.getElement( 9, 9 ), 41 );
+}
+
+template< typename Matrix >
+void test_AddElement()
+{
+    using RealType = typename Matrix::RealType;
+    using DeviceType = typename Matrix::DeviceType;
+    using IndexType = typename Matrix::IndexType;
+
+/*
+ * Sets up the following 6x5 sparse matrix:
+ *
+ *    /  1  2  3  0  0 \
+ *    |  0  4  5  6  0 |
+ *    |  0  0  7  8  9 |
+ *    | 10  0  0  0  0 |
+ *    |  0 11  0  0  0 |
+ *    \  0  0  0 12  0 /
+ */
+
+    const IndexType rows = 6;
+    const IndexType cols = 5;
+
+    Matrix m;
+    m.reset();
+    m.setDimensions( rows, cols );
+    typename Matrix::CompressedRowLengthsVector rowLengths;
+    rowLengths.setSize( rows );
+    rowLengths.setValue( 3 );
+    m.setCompressedRowLengths( rowLengths );
+
+    RealType value = 1;
+    for( IndexType i = 0; i < cols - 2; i++ )     // 0th row
+        m.setElement( 0, i, value++ );
+
+    for( IndexType i = 1; i < cols - 1; i++ )     // 1st row
+        m.setElement( 1, i, value++ );
+
+    for( IndexType i = 2; i < cols; i++ )         // 2nd row
+        m.setElement( 2, i, value++ );
+
+    m.setElement( 3, 0, value++ );      // 3rd row
+
+    m.setElement( 4, 1, value++ );      // 4th row
+
+    m.setElement( 5, 3, value++ );      // 5th row
+
+
+    // Check the set elements
+    EXPECT_EQ( m.getElement( 0, 0 ),  1 );
+    EXPECT_EQ( m.getElement( 0, 1 ),  2 );
+    EXPECT_EQ( m.getElement( 0, 2 ),  3 );
+    EXPECT_EQ( m.getElement( 0, 3 ),  0 );
+    EXPECT_EQ( m.getElement( 0, 4 ),  0 );
+
+    EXPECT_EQ( m.getElement( 1, 0 ),  0 );
+    EXPECT_EQ( m.getElement( 1, 1 ),  4 );
+    EXPECT_EQ( m.getElement( 1, 2 ),  5 );
+    EXPECT_EQ( m.getElement( 1, 3 ),  6 );
+    EXPECT_EQ( m.getElement( 1, 4 ),  0 );
+
+    EXPECT_EQ( m.getElement( 2, 0 ),  0 );
+    EXPECT_EQ( m.getElement( 2, 1 ),  0 );
+    EXPECT_EQ( m.getElement( 2, 2 ),  7 );
+    EXPECT_EQ( m.getElement( 2, 3 ),  8 );
+    EXPECT_EQ( m.getElement( 2, 4 ),  9 );
+
+    EXPECT_EQ( m.getElement( 3, 0 ), 10 );
+    EXPECT_EQ( m.getElement( 3, 1 ),  0 );
+    EXPECT_EQ( m.getElement( 3, 2 ),  0 );
+    EXPECT_EQ( m.getElement( 3, 3 ),  0 );
+    EXPECT_EQ( m.getElement( 3, 4 ),  0 );
+
+    EXPECT_EQ( m.getElement( 4, 0 ),  0 );
+    EXPECT_EQ( m.getElement( 4, 1 ), 11 );
+    EXPECT_EQ( m.getElement( 4, 2 ),  0 );
+    EXPECT_EQ( m.getElement( 4, 3 ),  0 );
+    EXPECT_EQ( m.getElement( 4, 4 ),  0 );
+
+    EXPECT_EQ( m.getElement( 5, 0 ),  0 );
+    EXPECT_EQ( m.getElement( 5, 1 ),  0 );
+    EXPECT_EQ( m.getElement( 5, 2 ),  0 );
+    EXPECT_EQ( m.getElement( 5, 3 ), 12 );
+    EXPECT_EQ( m.getElement( 5, 4 ),  0 );
+
+    // Add new elements to the old elements with a multiplying factor applied to the old elements.
+
+/*
+ * Sets up the following 6x5 sparse matrix:
+ *
+ *    /  1  2  3  0  0 \
+ *    |  0  4  5  6  0 |
+ *    |  0  0  7  8  9 |
+ *    | 10  0  0  0  0 |
+ *    |  0 11  0  0  0 |
+ *    \  0  0  0 12  0 /
+ */
+
+/*
+ * The following setup results in the following 6x5 sparse matrix:
+ *
+ *    /  3  6  9  0  0 \
+ *    |  0 12 15 18  0 |
+ *    |  0  0 21 24 27 |
+ *    | 30 11 12  0  0 |
+ *    |  0 35 14 15  0 |
+ *    \  0  0 16 41 18 /
+ */
+
+    RealType newValue = 1;
+    for( IndexType i = 0; i < cols - 2; i++ )         // 0th row
+        m.addElement( 0, i, newValue++, 2.0 );
+
+    for( IndexType i = 1; i < cols - 1; i++ )         // 1st row
+        m.addElement( 1, i, newValue++, 2.0 );
+
+    for( IndexType i = 2; i < cols; i++ )             // 2nd row
+        m.addElement( 2, i, newValue++, 2.0 );
+
+    for( IndexType i = 0; i < cols - 2; i++ )         // 3rd row
+        m.addElement( 3, i, newValue++, 2.0 );
+
+    for( IndexType i = 1; i < cols - 1; i++ )         // 4th row
+        m.addElement( 4, i, newValue++, 2.0 );
+
+    for( IndexType i = 2; i < cols; i++ )             // 5th row
+        m.addElement( 5, i, newValue++, 2.0 );
+
+
+    EXPECT_EQ( m.getElement( 0, 0 ),  3 );
+    EXPECT_EQ( m.getElement( 0, 1 ),  6 );
+    EXPECT_EQ( m.getElement( 0, 2 ),  9 );
+    EXPECT_EQ( m.getElement( 0, 3 ),  0 );
+    EXPECT_EQ( m.getElement( 0, 4 ),  0 );
+
+    EXPECT_EQ( m.getElement( 1, 0 ),  0 );
+    EXPECT_EQ( m.getElement( 1, 1 ), 12 );
+    EXPECT_EQ( m.getElement( 1, 2 ), 15 );
+    EXPECT_EQ( m.getElement( 1, 3 ), 18 );
+    EXPECT_EQ( m.getElement( 1, 4 ),  0 );
+
+    EXPECT_EQ( m.getElement( 2, 0 ),  0 );
+    EXPECT_EQ( m.getElement( 2, 1 ),  0 );
+    EXPECT_EQ( m.getElement( 2, 2 ), 21 );
+    EXPECT_EQ( m.getElement( 2, 3 ), 24 );
+    EXPECT_EQ( m.getElement( 2, 4 ), 27 );
+
+    EXPECT_EQ( m.getElement( 3, 0 ), 30 );
+    EXPECT_EQ( m.getElement( 3, 1 ), 11 );
+    EXPECT_EQ( m.getElement( 3, 2 ), 12 );
+    EXPECT_EQ( m.getElement( 3, 3 ),  0 );
+    EXPECT_EQ( m.getElement( 3, 4 ),  0 );
+
+    EXPECT_EQ( m.getElement( 4, 0 ),  0 );
+    EXPECT_EQ( m.getElement( 4, 1 ), 35 );
+    EXPECT_EQ( m.getElement( 4, 2 ), 14 );
+    EXPECT_EQ( m.getElement( 4, 3 ), 15 );
+    EXPECT_EQ( m.getElement( 4, 4 ),  0 );
+
+    EXPECT_EQ( m.getElement( 5, 0 ),  0 );
+    EXPECT_EQ( m.getElement( 5, 1 ),  0 );
+    EXPECT_EQ( m.getElement( 5, 2 ), 16 );
+    EXPECT_EQ( m.getElement( 5, 3 ), 41 );
+    EXPECT_EQ( m.getElement( 5, 4 ), 18 );
+}
+
+template< typename Matrix >
+void test_SetRow()
+{
+    using RealType = typename Matrix::RealType;
+    using DeviceType = typename Matrix::DeviceType;
+    using IndexType = typename Matrix::IndexType;
+
+/*
+ * Sets up the following 3x7 sparse matrix:
+ *
+ *    /  0  0  0  1  1  1  0 \
+ *    |  2  2  2  0  0  0  0 |
+ *    \  3  3  3  0  0  0  0 /
+ */
+
+    const IndexType rows = 3;
+    const IndexType cols = 7;
+
+    Matrix m;
+    m.reset();
+    m.setDimensions( rows, cols );
+    typename Matrix::CompressedRowLengthsVector rowLengths;
+    rowLengths.setSize( rows );
+    rowLengths.setValue( 6 );
+    rowLengths.setElement( 1, 3 );
+    m.setCompressedRowLengths( rowLengths );
+
+    RealType value = 1;
+    for( IndexType i = 0; i < 3; i++ )
+    {
+        m.setElement( 0, i + 3, value );
+        m.setElement( 1, i, value + 1 );
+        m.setElement( 2, i, value + 2 );
+    }
+
+    RealType row1 [ 3 ] = { 11, 11, 11 }; IndexType colIndexes1 [ 3 ] = { 0, 1, 2 };
+    RealType row2 [ 3 ] = { 22, 22, 22 }; IndexType colIndexes2 [ 3 ] = { 0, 1, 2 };
+    RealType row3 [ 3 ] = { 33, 33, 33 }; IndexType colIndexes3 [ 3 ] = { 3, 4, 5 };
+
+    RealType row = 0;
+    IndexType elements = 3;
+
+    m.setRow( row++, colIndexes1, row1, elements );
+    m.setRow( row++, colIndexes2, row2, elements );
+    m.setRow( row++, colIndexes3, row3, elements );
+
+
+    EXPECT_EQ( m.getElement( 0, 0 ), 11 );
+    EXPECT_EQ( m.getElement( 0, 1 ), 11 );
+    EXPECT_EQ( m.getElement( 0, 2 ), 11 );
+    EXPECT_EQ( m.getElement( 0, 3 ),  0 );
+    EXPECT_EQ( m.getElement( 0, 4 ),  0 );
+    EXPECT_EQ( m.getElement( 0, 5 ),  0 );
+    EXPECT_EQ( m.getElement( 0, 6 ),  0 );
+
+    EXPECT_EQ( m.getElement( 1, 0 ), 22 );
+    EXPECT_EQ( m.getElement( 1, 1 ), 22 );
+    EXPECT_EQ( m.getElement( 1, 2 ), 22 );
+    EXPECT_EQ( m.getElement( 1, 3 ),  0 );
+    EXPECT_EQ( m.getElement( 1, 4 ),  0 );
+    EXPECT_EQ( m.getElement( 1, 5 ),  0 );
+    EXPECT_EQ( m.getElement( 1, 6 ),  0 );
+
+    EXPECT_EQ( m.getElement( 2, 0 ),  0 );
+    EXPECT_EQ( m.getElement( 2, 1 ),  0 );
+    EXPECT_EQ( m.getElement( 2, 2 ),  0 );
+    EXPECT_EQ( m.getElement( 2, 3 ), 33 );
+    EXPECT_EQ( m.getElement( 2, 4 ), 33 );
+    EXPECT_EQ( m.getElement( 2, 5 ), 33 );
+    EXPECT_EQ( m.getElement( 2, 6 ),  0 );
+}
+
+template< typename Matrix >
+void test_VectorProduct()
+{
+    using RealType = typename Matrix::RealType;
+    using DeviceType = typename Matrix::DeviceType;
+    using IndexType = typename Matrix::IndexType;
+    using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >;
+
+/*
+ * Sets up the following 4x4 sparse matrix:
+ *
+ *    /  1  0  0  0 \
+ *    |  0  2  0  3 |
+ *    |  0  4  0  0 |
+ *    \  0  0  5  0 /
+ */
+
+    const IndexType m_rows_1 = 4;
+    const IndexType m_cols_1 = 4;
+
+    Matrix m_1;
+    m_1.reset();
+    m_1.setDimensions( m_rows_1, m_cols_1 );
+    typename Matrix::CompressedRowLengthsVector rowLengths_1;
+    rowLengths_1.setSize( m_rows_1 );
+    rowLengths_1.setElement( 0, 1 );
+    rowLengths_1.setElement( 1, 2 );
+    rowLengths_1.setElement( 2, 1 );
+    rowLengths_1.setElement( 3, 1 );
+    m_1.setCompressedRowLengths( rowLengths_1 );
+
+    RealType value_1 = 1;
+    m_1.setElement( 0, 0, value_1++ );      // 0th row
+
+    m_1.setElement( 1, 1, value_1++ );      // 1st row
+    m_1.setElement( 1, 3, value_1++ );
+
+    m_1.setElement( 2, 1, value_1++ );      // 2nd row
+
+    m_1.setElement( 3, 2, value_1++ );      // 3rd row
+
+    VectorType inVector_1;
+    inVector_1.setSize( m_cols_1 );
+    for( IndexType i = 0; i < inVector_1.getSize(); i++ )
+        inVector_1.setElement( i, 2 );
+
+    VectorType outVector_1;
+    outVector_1.setSize( m_rows_1 );
+    for( IndexType j = 0; j < outVector_1.getSize(); j++ )
+        outVector_1.setElement( j, 0 );
+
+
+    m_1.vectorProduct( inVector_1, outVector_1 );
+
+
+    EXPECT_EQ( outVector_1.getElement( 0 ),  2 );
+    EXPECT_EQ( outVector_1.getElement( 1 ), 10 );
+    EXPECT_EQ( outVector_1.getElement( 2 ),  8 );
+    EXPECT_EQ( outVector_1.getElement( 3 ), 10 );
+
+
+/*
+ * Sets up the following 4x4 sparse matrix:
+ *
+ *    /  1  2  3  0 \
+ *    |  0  0  0  4 |
+ *    |  5  6  7  0 |
+ *    \  0  8  0  0 /
+ */
+
+    const IndexType m_rows_2 = 4;
+    const IndexType m_cols_2 = 4;
+
+    Matrix m_2;
+    m_2.reset();
+    m_2.setDimensions( m_rows_2, m_cols_2 );
+    typename Matrix::CompressedRowLengthsVector rowLengths_2;
+    rowLengths_2.setSize( m_rows_2 );
+    rowLengths_2.setValue( 3 );
+    rowLengths_2.setElement( 1, 1 );
+    rowLengths_2.setElement( 3, 1 );
+    m_2.setCompressedRowLengths( rowLengths_2 );
+
+    RealType value_2 = 1;
+    for( IndexType i = 0; i < 3; i++ )   // 0th row
+        m_2.setElement( 0, i, value_2++ );
+
+    m_2.setElement( 1, 3, value_2++ );      // 1st row
+
+    for( IndexType i = 0; i < 3; i++ )   // 2nd row
+        m_2.setElement( 2, i, value_2++ );
+
+    for( IndexType i = 1; i < 2; i++ )       // 3rd row
+        m_2.setElement( 3, i, value_2++ );
+
+    VectorType inVector_2;
+    inVector_2.setSize( m_cols_2 );
+    for( IndexType i = 0; i < inVector_2.getSize(); i++ )
+        inVector_2.setElement( i, 2 );
+
+    VectorType outVector_2;
+    outVector_2.setSize( m_rows_2 );
+    for( IndexType j = 0; j < outVector_2.getSize(); j++ )
+        outVector_2.setElement( j, 0 );
+
+
+    m_2.vectorProduct( inVector_2, outVector_2 );
+
+
+    EXPECT_EQ( outVector_2.getElement( 0 ), 12 );
+    EXPECT_EQ( outVector_2.getElement( 1 ),  8 );
+    EXPECT_EQ( outVector_2.getElement( 2 ), 36 );
+    EXPECT_EQ( outVector_2.getElement( 3 ), 16 );
+
+
+/*
+ * Sets up the following 4x4 sparse matrix:
+ *
+ *    /  1  2  3  0 \
+ *    |  0  4  5  6 |
+ *    |  7  8  9  0 |
+ *    \  0 10 11 12 /
+ */
+
+    const IndexType m_rows_3 = 4;
+    const IndexType m_cols_3 = 4;
+
+    Matrix m_3;
+    m_3.reset();
+    m_3.setDimensions( m_rows_3, m_cols_3 );
+    typename Matrix::CompressedRowLengthsVector rowLengths_3;
+    rowLengths_3.setSize( m_rows_3 );
+    rowLengths_3.setValue( 3 );
+    m_3.setCompressedRowLengths( rowLengths_3 );
+
+    RealType value_3 = 1;
+    for( IndexType i = 0; i < 3; i++ )          // 0th row
+        m_3.setElement( 0, i, value_3++ );
+
+    for( IndexType i = 1; i < 4; i++ )
+        m_3.setElement( 1, i, value_3++ );      // 1st row
+
+    for( IndexType i = 0; i < 3; i++ )          // 2nd row
+        m_3.setElement( 2, i, value_3++ );
+
+    for( IndexType i = 1; i < 4; i++ )          // 3rd row
+        m_3.setElement( 3, i, value_3++ );
+
+    VectorType inVector_3;
+    inVector_3.setSize( m_cols_3 );
+    for( IndexType i = 0; i < inVector_3.getSize(); i++ )
+        inVector_3.setElement( i, 2 );
+
+    VectorType outVector_3;
+    outVector_3.setSize( m_rows_3 );
+    for( IndexType j = 0; j < outVector_3.getSize(); j++ )
+        outVector_3.setElement( j, 0 );
+
+
+    m_3.vectorProduct( inVector_3, outVector_3 );
+
+
+    EXPECT_EQ( outVector_3.getElement( 0 ), 12 );
+    EXPECT_EQ( outVector_3.getElement( 1 ), 30 );
+    EXPECT_EQ( outVector_3.getElement( 2 ), 48 );
+    EXPECT_EQ( outVector_3.getElement( 3 ), 66 );
+
+
+/*
+ * Sets up the following 8x8 sparse matrix:
+ *
+ *    /  1  2  3  0  0  4  0  0 \
+ *    |  0  5  6  7  8  0  0  0 |
+ *    |  9 10 11 12 13  0  0  0 |
+ *    |  0 14 15 16 17  0  0  0 |
+ *    |  0  0 18 19 20 21  0  0 |
+ *    |  0  0  0 22 23 24 25  0 |
+ *    | 26 27 28 29 30  0  0  0 |
+ *    \ 31 32 33 34 35  0  0  0 /
+ */
+
+    const IndexType m_rows_4 = 8;
+    const IndexType m_cols_4 = 8;
+
+    Matrix m_4;
+    m_4.reset();
+    m_4.setDimensions( m_rows_4, m_cols_4 );
+    typename Matrix::CompressedRowLengthsVector rowLengths_4;
+    rowLengths_4.setSize( m_rows_4 );
+    rowLengths_4.setValue( 4 );
+    rowLengths_4.setElement( 2, 5 );
+    rowLengths_4.setElement( 6, 5 );
+    rowLengths_4.setElement( 7, 5 );
+    m_4.setCompressedRowLengths( rowLengths_4 );
+
+    RealType value_4 = 1;
+    for( IndexType i = 0; i < 3; i++ )       // 0th row
+        m_4.setElement( 0, i, value_4++ );
+
+    m_4.setElement( 0, 5, value_4++ );
+
+    for( IndexType i = 1; i < 5; i++ )       // 1st row
+        m_4.setElement( 1, i, value_4++ );
+
+    for( IndexType i = 0; i < 5; i++ )       // 2nd row
+        m_4.setElement( 2, i, value_4++ );
+
+    for( IndexType i = 1; i < 5; i++ )       // 3rd row
+        m_4.setElement( 3, i, value_4++ );
+
+    for( IndexType i = 2; i < 6; i++ )       // 4th row
+        m_4.setElement( 4, i, value_4++ );
+
+    for( IndexType i = 3; i < 7; i++ )       // 5th row
+        m_4.setElement( 5, i, value_4++ );
+
+    for( IndexType i = 0; i < 5; i++ )       // 6th row
+        m_4.setElement( 6, i, value_4++ );
+
+    for( IndexType i = 0; i < 5; i++ )       // 7th row
+        m_4.setElement( 7, i, value_4++ );
+
+    VectorType inVector_4;
+    inVector_4.setSize( m_cols_4 );
+    for( IndexType i = 0; i < inVector_4.getSize(); i++ )
+        inVector_4.setElement( i, 2 );
+
+    VectorType outVector_4;
+    outVector_4.setSize( m_rows_4 );
+    for( IndexType j = 0; j < outVector_4.getSize(); j++ )
+        outVector_4.setElement( j, 0 );
+
+
+    m_4.vectorProduct( inVector_4, outVector_4 );
+
+
+    EXPECT_EQ( outVector_4.getElement( 0 ),  20 );
+    EXPECT_EQ( outVector_4.getElement( 1 ),  52 );
+    EXPECT_EQ( outVector_4.getElement( 2 ), 110 );
+    EXPECT_EQ( outVector_4.getElement( 3 ), 124 );
+    EXPECT_EQ( outVector_4.getElement( 4 ), 156 );
+    EXPECT_EQ( outVector_4.getElement( 5 ), 188 );
+    EXPECT_EQ( outVector_4.getElement( 6 ), 280 );
+    EXPECT_EQ( outVector_4.getElement( 7 ), 330 );
+
+
+   /*
+    * Sets up the following 8x8 sparse matrix:
+    *
+    *    /  1  2  3  0  4  5  0  1 \   6
+    *    |  0  6  0  7  0  0  0  1 |   3
+    *    |  0  8  9  0 10  0  0  1 |   4
+    *    |  0 11 12 13 14  0  0  1 |   5
+    *    |  0 15  0  0  0  0  0  1 |   2
+    *    |  0 16 17 18 19 20 21  1 |   7
+    *    | 22 23 24 25 26 27 28  1 |   8
+    *    \ 29 30 31 32 33 34 35 36 /   8
+    */
+
+    const IndexType m_rows_5 = 8;
+    const IndexType m_cols_5 = 8;
+
+    Matrix m_5;
+    m_5.reset();
+    m_5.setDimensions( m_rows_5, m_cols_5 );
+    typename Matrix::CompressedRowLengthsVector rowLengths_5;
+    rowLengths_5.setSize( m_rows_5 );
+    rowLengths_5.setElement(0, 6);
+    rowLengths_5.setElement(1, 3);
+    rowLengths_5.setElement(2, 4);
+    rowLengths_5.setElement(3, 5);
+    rowLengths_5.setElement(4, 2);
+    rowLengths_5.setElement(5, 7);
+    rowLengths_5.setElement(6, 8);
+    rowLengths_5.setElement(7, 8);
+    m_5.setCompressedRowLengths( rowLengths_5 );
+
+    RealType value_5 = 1;
+    for( IndexType i = 0; i < 3; i++ )   // 0th row
+        m_5.setElement( 0, i, value_5++ );
+
+    m_5.setElement( 0, 4, value_5++ );           // 0th row
+    m_5.setElement( 0, 5, value_5++ );
+
+    m_5.setElement( 1, 1, value_5++ );           // 1st row
+    m_5.setElement( 1, 3, value_5++ );
+
+    for( IndexType i = 1; i < 3; i++ )            // 2nd row
+        m_5.setElement( 2, i, value_5++ );
+
+    m_5.setElement( 2, 4, value_5++ );           // 2nd row
+
+    for( IndexType i = 1; i < 5; i++ )            // 3rd row
+        m_5.setElement( 3, i, value_5++ );
+
+    m_5.setElement( 4, 1, value_5++ );           // 4th row
+
+    for( IndexType i = 1; i < 7; i++ )            // 5th row
+        m_5.setElement( 5, i, value_5++ );
+
+    for( IndexType i = 0; i < 7; i++ )            // 6th row
+        m_5.setElement( 6, i, value_5++ );
+
+    for( IndexType i = 0; i < 8; i++ )            // 7th row
+        m_5.setElement( 7, i, value_5++ );
+
+    for( IndexType i = 0; i < 7; i++ )            // 1s at the end of rows
+        m_5.setElement( i, 7, 1);
+
+    VectorType inVector_5;
+    inVector_5.setSize( m_cols_5 );
+    for( IndexType i = 0; i < inVector_5.getSize(); i++ )
+        inVector_5.setElement( i, 2 );
+
+    VectorType outVector_5;
+    outVector_5.setSize( m_rows_5 );
+    for( IndexType j = 0; j < outVector_5.getSize(); j++ )
+        outVector_5.setElement( j, 0 );
+
+    m_5.vectorProduct( inVector_5, outVector_5 );
+
+    EXPECT_EQ( outVector_5.getElement( 0 ),  32 );
+    EXPECT_EQ( outVector_5.getElement( 1 ),  28 );
+    EXPECT_EQ( outVector_5.getElement( 2 ),  56 );
+    EXPECT_EQ( outVector_5.getElement( 3 ), 102 );
+    EXPECT_EQ( outVector_5.getElement( 4 ),  32 );
+    EXPECT_EQ( outVector_5.getElement( 5 ), 224 );
+    EXPECT_EQ( outVector_5.getElement( 6 ), 352 );
+    EXPECT_EQ( outVector_5.getElement( 7 ), 520 );
+}
+
+template< typename Matrix >
+void test_RowsReduction()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /*
+    * Sets up the following 8x8 sparse matrix:
+    *
+    *    /  1  2  3  0  4  5  0  1 \   6
+    *    |  0  6  0  7  0  0  0  1 |   3
+    *    |  0  8  9  0 10  0  0  1 |   4
+    *    |  0 11 12 13 14  0  0  1 |   5
+    *    |  0 15  0  0  0  0  0  1 |   2
+    *    |  0 16 17 18 19 20 21  1 |   7
+    *    | 22 23 24 25 26 27 28  1 |   8
+    *    \ 29 30 31 32 33 34 35 36 /   8
+    */
+
+   const IndexType rows = 8;
+   const IndexType cols = 8;
+
+   Matrix m;
+   m.setDimensions( rows, cols );
+   typename Matrix::RowsCapacitiesType rowsCapacities( rows );
+   //rowLengths.setSize( rows );
+   rowsCapacities.setElement(0, 6);
+   rowsCapacities.setElement(1, 3);
+   rowsCapacities.setElement(2, 4);
+   rowsCapacities.setElement(3, 5);
+   rowsCapacities.setElement(4, 2);
+   rowsCapacities.setElement(5, 7);
+   rowsCapacities.setElement(6, 8);
+   rowsCapacities.setElement(7, 8);
+   m.setCompressedRowLengths( rowsCapacities );
+
+   RealType value = 1;
+   for( IndexType i = 0; i < 3; i++ )   // 0th row
+      m.setElement( 0, i, value++ );
+
+   m.setElement( 0, 4, value++ );           // 0th row
+   m.setElement( 0, 5, value++ );
+
+   m.setElement( 1, 1, value++ );           // 1st row
+   m.setElement( 1, 3, value++ );
+
+   for( IndexType i = 1; i < 3; i++ )            // 2nd row
+      m.setElement( 2, i, value++ );
+
+   m.setElement( 2, 4, value++ );           // 2nd row
+
+   for( IndexType i = 1; i < 5; i++ )            // 3rd row
+      m.setElement( 3, i, value++ );
+
+   m.setElement( 4, 1, value++ );           // 4th row
+
+   for( IndexType i = 1; i < 7; i++ )            // 5th row
+      m.setElement( 5, i, value++ );
+
+   for( IndexType i = 0; i < 7; i++ )            // 6th row
+      m.setElement( 6, i, value++ );
+
+   for( IndexType i = 0; i < 8; i++ )            // 7th row
+       m.setElement( 7, i, value++ );
+
+   for( IndexType i = 0; i < 7; i++ )            // 1s at the end of rows
+      m.setElement( i, 7, 1);
+
+   ////
+   // Compute number of non-zero elements in rows.
+   typename Matrix::RowsCapacitiesType rowLengths( rows );
+   auto rowLengths_view = rowLengths.getView();
+   auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, const RealType& value ) -> IndexType {
+      return ( value != 0.0 );
+   };
+   auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) {
+      aux += a;
+   };
+   auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable {
+      rowLengths_view[ rowIdx ] = value;
+   };
+   m.allRowsReduction( fetch, reduce, keep, 0 );
+   EXPECT_EQ( rowsCapacities, rowLengths );
+   m.getCompressedRowLengths( rowLengths );
+   EXPECT_EQ( rowsCapacities, rowLengths );
+
+   ////
+   // Compute max norm
+   TNL::Containers::Vector< RealType, DeviceType, IndexType > rowSums( rows );
+   auto rowSums_view = rowSums.getView();
+   auto max_fetch = [] __cuda_callable__ ( IndexType row, IndexType column, const RealType& value ) -> IndexType {
+      return abs( value );
+   };
+   auto max_reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) {
+      aux += a;
+   };
+   auto max_keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable {
+      rowSums_view[ rowIdx ] = value;
+   };
+   m.allRowsReduction( max_fetch, max_reduce, max_keep, 0 );
+   const RealType maxNorm = TNL::max( rowSums );
+   EXPECT_EQ( maxNorm, 260 ) ; // 29+30+31+32+33+34+35+36
+}
+
+template< typename Matrix >
+void test_PerformSORIteration()
+{
+    using RealType = typename Matrix::RealType;
+    using DeviceType = typename Matrix::DeviceType;
+    using IndexType = typename Matrix::IndexType;
+
+/*
+ * Sets up the following 4x4 sparse matrix:
+ *
+ *    /  4  1  0  0 \
+ *    |  1  4  1  0 |
+ *    |  0  1  4  1 |
+ *    \  0  0  1  4 /
+ */
+
+    const IndexType m_rows = 4;
+    const IndexType m_cols = 4;
+
+    Matrix m;
+    m.reset();
+    m.setDimensions( m_rows, m_cols );
+    typename Matrix::CompressedRowLengthsVector rowLengths;
+    rowLengths.setSize( m_rows );
+    rowLengths.setValue( 3 );
+    m.setCompressedRowLengths( rowLengths );
+
+    m.setElement( 0, 0, 4.0 );        // 0th row
+    m.setElement( 0, 1, 1.0);
+
+    m.setElement( 1, 0, 1.0 );        // 1st row
+    m.setElement( 1, 1, 4.0 );
+    m.setElement( 1, 2, 1.0 );
+
+    m.setElement( 2, 1, 1.0 );        // 2nd row
+    m.setElement( 2, 2, 4.0 );
+    m.setElement( 2, 3, 1.0 );
+
+    m.setElement( 3, 2, 1.0 );        // 3rd row
+    m.setElement( 3, 3, 4.0 );
+
+    RealType bVector [ 4 ] = { 1, 1, 1, 1 };
+    RealType xVector [ 4 ] = { 1, 1, 1, 1 };
+
+    IndexType row = 0;
+    RealType omega = 1;
+
+
+    m.performSORIteration( bVector, row++, xVector, omega);
+
+    EXPECT_EQ( xVector[ 0 ], 0.0 );
+    EXPECT_EQ( xVector[ 1 ], 1.0 );
+    EXPECT_EQ( xVector[ 2 ], 1.0 );
+    EXPECT_EQ( xVector[ 3 ], 1.0 );
+
+
+    m.performSORIteration( bVector, row++, xVector, omega);
+
+    EXPECT_EQ( xVector[ 0 ], 0.0 );
+    EXPECT_EQ( xVector[ 1 ], 0.0 );
+    EXPECT_EQ( xVector[ 2 ], 1.0 );
+    EXPECT_EQ( xVector[ 3 ], 1.0 );
+
+
+    m.performSORIteration( bVector, row++, xVector, omega);
+
+    EXPECT_EQ( xVector[ 0 ], 0.0 );
+    EXPECT_EQ( xVector[ 1 ], 0.0 );
+    EXPECT_EQ( xVector[ 2 ], 0.0 );
+    EXPECT_EQ( xVector[ 3 ], 1.0 );
+
+
+    m.performSORIteration( bVector, row++, xVector, omega);
+
+    EXPECT_EQ( xVector[ 0 ], 0.0 );
+    EXPECT_EQ( xVector[ 1 ], 0.0 );
+    EXPECT_EQ( xVector[ 2 ], 0.0 );
+    EXPECT_EQ( xVector[ 3 ], 0.25 );
+}
+
+// This test is only for AdEllpack
+template< typename Matrix >
+void test_OperatorEquals()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   if( std::is_same< DeviceType, TNL::Devices::Cuda >::value )
+       return;
+   else
+   {
+       using AdELL_host = TNL::Matrices::AdEllpack< RealType, TNL::Devices::Host, IndexType >;
+       using AdELL_cuda = TNL::Matrices::AdEllpack< RealType, TNL::Devices::Cuda, IndexType >;
+
+       /*
+        * Sets up the following 8x8 sparse matrix:
+        *
+        *    /  1  2  3  0  4  5  0  1 \   6
+        *    |  0  6  0  7  0  0  0  1 |   3
+        *    |  0  8  9  0 10  0  0  1 |   4
+        *    |  0 11 12 13 14  0  0  1 |   5
+        *    |  0 15  0  0  0  0  0  1 |   2
+        *    |  0 16 17 18 19 20 21  1 |   7
+        *    | 22 23 24 25 26 27 28  1 |   8
+        *    \ 29 30 31 32 33 34 35 36 /   8
+        */
+
+        const IndexType m_rows = 8;
+        const IndexType m_cols = 8;
+
+        AdELL_host m_host;
+
+        m_host.reset();
+        m_host.setDimensions( m_rows, m_cols );
+        typename AdELL_host::CompressedRowLengthsVector rowLengths;
+        rowLengths.setSize( m_rows );
+        rowLengths.setElement(0, 6);
+        rowLengths.setElement(1, 3);
+        rowLengths.setElement(2, 4);
+        rowLengths.setElement(3, 5);
+        rowLengths.setElement(4, 2);
+        rowLengths.setElement(5, 7);
+        rowLengths.setElement(6, 8);
+        rowLengths.setElement(7, 8);
+        m_host.setCompressedRowLengths( rowLengths );
+
+        RealType value = 1;
+        for( IndexType i = 0; i < 3; i++ )   // 0th row
+            m_host.setElement( 0, i, value++ );
+
+        m_host.setElement( 0, 4, value++ );           // 0th row
+        m_host.setElement( 0, 5, value++ );
+
+        m_host.setElement( 1, 1, value++ );           // 1st row
+        m_host.setElement( 1, 3, value++ );
+
+        for( IndexType i = 1; i < 3; i++ )            // 2nd row
+            m_host.setElement( 2, i, value++ );
+
+        m_host.setElement( 2, 4, value++ );           // 2nd row
+
+
+        for( IndexType i = 1; i < 5; i++ )            // 3rd row
+            m_host.setElement( 3, i, value++ );
+
+        m_host.setElement( 4, 1, value++ );           // 4th row
+
+        for( IndexType i = 1; i < 7; i++ )            // 5th row
+            m_host.setElement( 5, i, value++ );
+
+        for( IndexType i = 0; i < 7; i++ )            // 6th row
+            m_host.setElement( 6, i, value++ );
+
+        for( IndexType i = 0; i < 8; i++ )            // 7th row
+            m_host.setElement( 7, i, value++ );
+
+        for( IndexType i = 0; i < 7; i++ )            // 1s at the end or rows: 5, 6
+            m_host.setElement( i, 7, 1);
+
+        EXPECT_EQ( m_host.getElement( 0, 0 ),  1 );
+        EXPECT_EQ( m_host.getElement( 0, 1 ),  2 );
+        EXPECT_EQ( m_host.getElement( 0, 2 ),  3 );
+        EXPECT_EQ( m_host.getElement( 0, 3 ),  0 );
+        EXPECT_EQ( m_host.getElement( 0, 4 ),  4 );
+        EXPECT_EQ( m_host.getElement( 0, 5 ),  5 );
+        EXPECT_EQ( m_host.getElement( 0, 6 ),  0 );
+        EXPECT_EQ( m_host.getElement( 0, 7 ),  1 );
+
+        EXPECT_EQ( m_host.getElement( 1, 0 ),  0 );
+        EXPECT_EQ( m_host.getElement( 1, 1 ),  6 );
+        EXPECT_EQ( m_host.getElement( 1, 2 ),  0 );
+        EXPECT_EQ( m_host.getElement( 1, 3 ),  7 );
+        EXPECT_EQ( m_host.getElement( 1, 4 ),  0 );
+        EXPECT_EQ( m_host.getElement( 1, 5 ),  0 );
+        EXPECT_EQ( m_host.getElement( 1, 6 ),  0 );
+        EXPECT_EQ( m_host.getElement( 1, 7 ),  1 );
+
+        EXPECT_EQ( m_host.getElement( 2, 0 ),  0 );
+        EXPECT_EQ( m_host.getElement( 2, 1 ),  8 );
+        EXPECT_EQ( m_host.getElement( 2, 2 ),  9 );
+        EXPECT_EQ( m_host.getElement( 2, 3 ),  0 );
+        EXPECT_EQ( m_host.getElement( 2, 4 ), 10 );
+        EXPECT_EQ( m_host.getElement( 2, 5 ),  0 );
+        EXPECT_EQ( m_host.getElement( 2, 6 ),  0 );
+        EXPECT_EQ( m_host.getElement( 2, 7 ),  1 );
+
+        EXPECT_EQ( m_host.getElement( 3, 0 ),  0 );
+        EXPECT_EQ( m_host.getElement( 3, 1 ), 11 );
+        EXPECT_EQ( m_host.getElement( 3, 2 ), 12 );
+        EXPECT_EQ( m_host.getElement( 3, 3 ), 13 );
+        EXPECT_EQ( m_host.getElement( 3, 4 ), 14 );
+        EXPECT_EQ( m_host.getElement( 3, 5 ),  0 );
+        EXPECT_EQ( m_host.getElement( 3, 6 ),  0 );
+        EXPECT_EQ( m_host.getElement( 3, 7 ),  1 );
+
+        EXPECT_EQ( m_host.getElement( 4, 0 ),  0 );
+        EXPECT_EQ( m_host.getElement( 4, 1 ), 15 );
+        EXPECT_EQ( m_host.getElement( 4, 2 ),  0 );
+        EXPECT_EQ( m_host.getElement( 4, 3 ),  0 );
+        EXPECT_EQ( m_host.getElement( 4, 4 ),  0 );
+        EXPECT_EQ( m_host.getElement( 4, 5 ),  0 );
+        EXPECT_EQ( m_host.getElement( 4, 6 ),  0 );
+        EXPECT_EQ( m_host.getElement( 4, 7 ),  1 );
+
+        EXPECT_EQ( m_host.getElement( 5, 0 ),  0 );
+        EXPECT_EQ( m_host.getElement( 5, 1 ), 16 );
+        EXPECT_EQ( m_host.getElement( 5, 2 ), 17 );
+        EXPECT_EQ( m_host.getElement( 5, 3 ), 18 );
+        EXPECT_EQ( m_host.getElement( 5, 4 ), 19 );
+        EXPECT_EQ( m_host.getElement( 5, 5 ), 20 );
+        EXPECT_EQ( m_host.getElement( 5, 6 ), 21 );
+        EXPECT_EQ( m_host.getElement( 5, 7 ),  1 );
+
+        EXPECT_EQ( m_host.getElement( 6, 0 ), 22 );
+        EXPECT_EQ( m_host.getElement( 6, 1 ), 23 );
+        EXPECT_EQ( m_host.getElement( 6, 2 ), 24 );
+        EXPECT_EQ( m_host.getElement( 6, 3 ), 25 );
+        EXPECT_EQ( m_host.getElement( 6, 4 ), 26 );
+        EXPECT_EQ( m_host.getElement( 6, 5 ), 27 );
+        EXPECT_EQ( m_host.getElement( 6, 6 ), 28 );
+        EXPECT_EQ( m_host.getElement( 6, 7 ),  1 );
+
+        EXPECT_EQ( m_host.getElement( 7, 0 ), 29 );
+        EXPECT_EQ( m_host.getElement( 7, 1 ), 30 );
+        EXPECT_EQ( m_host.getElement( 7, 2 ), 31 );
+        EXPECT_EQ( m_host.getElement( 7, 3 ), 32 );
+        EXPECT_EQ( m_host.getElement( 7, 4 ), 33 );
+        EXPECT_EQ( m_host.getElement( 7, 5 ), 34 );
+        EXPECT_EQ( m_host.getElement( 7, 6 ), 35 );
+        EXPECT_EQ( m_host.getElement( 7, 7 ), 36 );
+
+        AdELL_cuda m_cuda;
+
+        // Copy the host matrix into the cuda matrix
+        m_cuda = m_host;
+
+        // Reset the host matrix
+        m_host.reset();
+
+        // Copy the cuda matrix back into the host matrix
+        m_host = m_cuda;
+
+        // Check the newly created double-copy host matrix
+        EXPECT_EQ( m_host.getElement( 0, 0 ),  1 );
+        EXPECT_EQ( m_host.getElement( 0, 1 ),  2 );
+        EXPECT_EQ( m_host.getElement( 0, 2 ),  3 );
+        EXPECT_EQ( m_host.getElement( 0, 3 ),  0 );
+        EXPECT_EQ( m_host.getElement( 0, 4 ),  4 );
+        EXPECT_EQ( m_host.getElement( 0, 5 ),  5 );
+        EXPECT_EQ( m_host.getElement( 0, 6 ),  0 );
+        EXPECT_EQ( m_host.getElement( 0, 7 ),  1 );
+
+        EXPECT_EQ( m_host.getElement( 1, 0 ),  0 );
+        EXPECT_EQ( m_host.getElement( 1, 1 ),  6 );
+        EXPECT_EQ( m_host.getElement( 1, 2 ),  0 );
+        EXPECT_EQ( m_host.getElement( 1, 3 ),  7 );
+        EXPECT_EQ( m_host.getElement( 1, 4 ),  0 );
+        EXPECT_EQ( m_host.getElement( 1, 5 ),  0 );
+        EXPECT_EQ( m_host.getElement( 1, 6 ),  0 );
+        EXPECT_EQ( m_host.getElement( 1, 7 ),  1 );
+
+        EXPECT_EQ( m_host.getElement( 2, 0 ),  0 );
+        EXPECT_EQ( m_host.getElement( 2, 1 ),  8 );
+        EXPECT_EQ( m_host.getElement( 2, 2 ),  9 );
+        EXPECT_EQ( m_host.getElement( 2, 3 ),  0 );
+        EXPECT_EQ( m_host.getElement( 2, 4 ), 10 );
+        EXPECT_EQ( m_host.getElement( 2, 5 ),  0 );
+        EXPECT_EQ( m_host.getElement( 2, 6 ),  0 );
+        EXPECT_EQ( m_host.getElement( 2, 7 ),  1 );
+
+        EXPECT_EQ( m_host.getElement( 3, 0 ),  0 );
+        EXPECT_EQ( m_host.getElement( 3, 1 ), 11 );
+        EXPECT_EQ( m_host.getElement( 3, 2 ), 12 );
+        EXPECT_EQ( m_host.getElement( 3, 3 ), 13 );
+        EXPECT_EQ( m_host.getElement( 3, 4 ), 14 );
+        EXPECT_EQ( m_host.getElement( 3, 5 ),  0 );
+        EXPECT_EQ( m_host.getElement( 3, 6 ),  0 );
+        EXPECT_EQ( m_host.getElement( 3, 7 ),  1 );
+
+        EXPECT_EQ( m_host.getElement( 4, 0 ),  0 );
+        EXPECT_EQ( m_host.getElement( 4, 1 ), 15 );
+        EXPECT_EQ( m_host.getElement( 4, 2 ),  0 );
+        EXPECT_EQ( m_host.getElement( 4, 3 ),  0 );
+        EXPECT_EQ( m_host.getElement( 4, 4 ),  0 );
+        EXPECT_EQ( m_host.getElement( 4, 5 ),  0 );
+        EXPECT_EQ( m_host.getElement( 4, 6 ),  0 );
+        EXPECT_EQ( m_host.getElement( 4, 7 ),  1 );
+
+        EXPECT_EQ( m_host.getElement( 5, 0 ),  0 );
+        EXPECT_EQ( m_host.getElement( 5, 1 ), 16 );
+        EXPECT_EQ( m_host.getElement( 5, 2 ), 17 );
+        EXPECT_EQ( m_host.getElement( 5, 3 ), 18 );
+        EXPECT_EQ( m_host.getElement( 5, 4 ), 19 );
+        EXPECT_EQ( m_host.getElement( 5, 5 ), 20 );
+        EXPECT_EQ( m_host.getElement( 5, 6 ), 21 );
+        EXPECT_EQ( m_host.getElement( 5, 7 ),  1 );
+
+        EXPECT_EQ( m_host.getElement( 6, 0 ), 22 );
+        EXPECT_EQ( m_host.getElement( 6, 1 ), 23 );
+        EXPECT_EQ( m_host.getElement( 6, 2 ), 24 );
+        EXPECT_EQ( m_host.getElement( 6, 3 ), 25 );
+        EXPECT_EQ( m_host.getElement( 6, 4 ), 26 );
+        EXPECT_EQ( m_host.getElement( 6, 5 ), 27 );
+        EXPECT_EQ( m_host.getElement( 6, 6 ), 28 );
+        EXPECT_EQ( m_host.getElement( 6, 7 ),  1 );
+
+        EXPECT_EQ( m_host.getElement( 7, 0 ), 29 );
+        EXPECT_EQ( m_host.getElement( 7, 1 ), 30 );
+        EXPECT_EQ( m_host.getElement( 7, 2 ), 31 );
+        EXPECT_EQ( m_host.getElement( 7, 3 ), 32 );
+        EXPECT_EQ( m_host.getElement( 7, 4 ), 33 );
+        EXPECT_EQ( m_host.getElement( 7, 5 ), 34 );
+        EXPECT_EQ( m_host.getElement( 7, 6 ), 35 );
+        EXPECT_EQ( m_host.getElement( 7, 7 ), 36 );
+
+        // Try vectorProduct with copied cuda matrix to see if it works correctly.
+        using VectorType = TNL::Containers::Vector< RealType, TNL::Devices::Cuda, IndexType >;
+
+        VectorType inVector;
+        inVector.setSize( m_cols );
+        for( IndexType i = 0; i < inVector.getSize(); i++ )
+            inVector.setElement( i, 2 );
+
+        VectorType outVector;
+        outVector.setSize( m_rows );
+        for( IndexType j = 0; j < outVector.getSize(); j++ )
+            outVector.setElement( j, 0 );
+
+        m_cuda.vectorProduct( inVector, outVector );
+
+        EXPECT_EQ( outVector.getElement( 0 ),  32 );
+        EXPECT_EQ( outVector.getElement( 1 ),  28 );
+        EXPECT_EQ( outVector.getElement( 2 ),  56 );
+        EXPECT_EQ( outVector.getElement( 3 ), 102 );
+        EXPECT_EQ( outVector.getElement( 4 ),  32 );
+        EXPECT_EQ( outVector.getElement( 5 ), 224 );
+        EXPECT_EQ( outVector.getElement( 6 ), 352 );
+        EXPECT_EQ( outVector.getElement( 7 ), 520 );
+   }
+}
+
+template< typename Matrix >
+void test_SaveAndLoad( const char* filename )
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /*
+    * Sets up the following 4x4 sparse matrix:
+    *
+    *    /  1  2  3  0 \
+    *    |  0  4  0  5 |
+    *    |  6  7  8  0 |
+    *    \  0  9 10 11 /
+    */
+
+    const IndexType m_rows = 4;
+    const IndexType m_cols = 4;
+
+    Matrix savedMatrix;
+    savedMatrix.reset();
+    savedMatrix.setDimensions( m_rows, m_cols );
+    typename Matrix::CompressedRowLengthsVector rowLengths;
+    rowLengths.setSize( m_rows );
+    rowLengths.setValue( 3 );
+    savedMatrix.setCompressedRowLengths( rowLengths );
+
+    RealType value = 1;
+    for( IndexType i = 0; i < m_cols - 1; i++ )   // 0th row
+        savedMatrix.setElement( 0, i, value++ );
+
+    savedMatrix.setElement( 1, 1, value++ );
+    savedMatrix.setElement( 1, 3, value++ );      // 1st row
+
+    for( IndexType i = 0; i < m_cols - 1; i++ )   // 2nd row
+        savedMatrix.setElement( 2, i, value++ );
+
+    for( IndexType i = 1; i < m_cols; i++ )       // 3rd row
+        savedMatrix.setElement( 3, i, value++ );
+
+    ASSERT_NO_THROW( savedMatrix.save( filename ) );
+
+    Matrix loadedMatrix;
+    loadedMatrix.reset();
+    loadedMatrix.setDimensions( m_rows, m_cols );
+    typename Matrix::CompressedRowLengthsVector rowLengths2;
+    rowLengths2.setSize( m_rows );
+    rowLengths2.setValue( 3 );
+    loadedMatrix.setCompressedRowLengths( rowLengths2 );
+
+
+    ASSERT_NO_THROW( loadedMatrix.load( filename ) );
+
+
+    EXPECT_EQ( savedMatrix.getElement( 0, 0 ), loadedMatrix.getElement( 0, 0 ) );
+    EXPECT_EQ( savedMatrix.getElement( 0, 1 ), loadedMatrix.getElement( 0, 1 ) );
+    EXPECT_EQ( savedMatrix.getElement( 0, 2 ), loadedMatrix.getElement( 0, 2 ) );
+    EXPECT_EQ( savedMatrix.getElement( 0, 3 ), loadedMatrix.getElement( 0, 3 ) );
+
+    EXPECT_EQ( savedMatrix.getElement( 1, 0 ), loadedMatrix.getElement( 1, 0 ) );
+    EXPECT_EQ( savedMatrix.getElement( 1, 1 ), loadedMatrix.getElement( 1, 1 ) );
+    EXPECT_EQ( savedMatrix.getElement( 1, 2 ), loadedMatrix.getElement( 1, 2 ) );
+    EXPECT_EQ( savedMatrix.getElement( 1, 3 ), loadedMatrix.getElement( 1, 3 ) );
+
+    EXPECT_EQ( savedMatrix.getElement( 2, 0 ), loadedMatrix.getElement( 2, 0 ) );
+    EXPECT_EQ( savedMatrix.getElement( 2, 1 ), loadedMatrix.getElement( 2, 1 ) );
+    EXPECT_EQ( savedMatrix.getElement( 2, 2 ), loadedMatrix.getElement( 2, 2 ) );
+    EXPECT_EQ( savedMatrix.getElement( 2, 3 ), loadedMatrix.getElement( 2, 3 ) );
+
+    EXPECT_EQ( savedMatrix.getElement( 3, 0 ), loadedMatrix.getElement( 3, 0 ) );
+    EXPECT_EQ( savedMatrix.getElement( 3, 1 ), loadedMatrix.getElement( 3, 1 ) );
+    EXPECT_EQ( savedMatrix.getElement( 3, 2 ), loadedMatrix.getElement( 3, 2 ) );
+    EXPECT_EQ( savedMatrix.getElement( 3, 3 ), loadedMatrix.getElement( 3, 3 ) );
+
+    EXPECT_EQ( savedMatrix.getElement( 0, 0 ),  1 );
+    EXPECT_EQ( savedMatrix.getElement( 0, 1 ),  2 );
+    EXPECT_EQ( savedMatrix.getElement( 0, 2 ),  3 );
+    EXPECT_EQ( savedMatrix.getElement( 0, 3 ),  0 );
+
+    EXPECT_EQ( savedMatrix.getElement( 1, 0 ),  0 );
+    EXPECT_EQ( savedMatrix.getElement( 1, 1 ),  4 );
+    EXPECT_EQ( savedMatrix.getElement( 1, 2 ),  0 );
+    EXPECT_EQ( savedMatrix.getElement( 1, 3 ),  5 );
+
+    EXPECT_EQ( savedMatrix.getElement( 2, 0 ),  6 );
+    EXPECT_EQ( savedMatrix.getElement( 2, 1 ),  7 );
+    EXPECT_EQ( savedMatrix.getElement( 2, 2 ),  8 );
+    EXPECT_EQ( savedMatrix.getElement( 2, 3 ),  0 );
+
+    EXPECT_EQ( savedMatrix.getElement( 3, 0 ),  0 );
+    EXPECT_EQ( savedMatrix.getElement( 3, 1 ),  9 );
+    EXPECT_EQ( savedMatrix.getElement( 3, 2 ), 10 );
+    EXPECT_EQ( savedMatrix.getElement( 3, 3 ), 11 );
+
+    EXPECT_EQ( std::remove( filename ), 0 );
+}
+
+template< typename Matrix >
+void test_Print()
+{
+    using RealType = typename Matrix::RealType;
+    using DeviceType = typename Matrix::DeviceType;
+    using IndexType = typename Matrix::IndexType;
+
+/*
+ * Sets up the following 5x4 sparse matrix:
+ *
+ *    /  1  2  3  0 \
+ *    |  0  0  0  4 |
+ *    |  5  6  7  0 |
+ *    |  0  8  9 10 |
+ *    \  0  0 11 12 /
+ */
+
+    const IndexType m_rows = 5;
+    const IndexType m_cols = 4;
+
+    Matrix m;
+    m.reset();
+    m.setDimensions( m_rows, m_cols );
+    typename Matrix::CompressedRowLengthsVector rowLengths;
+    rowLengths.setSize( m_rows );
+    rowLengths.setValue( 3 );
+    m.setCompressedRowLengths( rowLengths );
+
+    RealType value = 1;
+    for( IndexType i = 0; i < m_cols - 1; i++ )   // 0th row
+        m.setElement( 0, i, value++ );
+
+    m.setElement( 1, 3, value++ );      // 1st row
+
+    for( IndexType i = 0; i < m_cols - 1; i++ )   // 2nd row
+        m.setElement( 2, i, value++ );
+
+    for( IndexType i = 1; i < m_cols; i++ )       // 3rd row
+        m.setElement( 3, i, value++ );
+
+    for( IndexType i = 2; i < m_cols; i++ )       // 4th row
+        m.setElement( 4, i, value++ );
+
+    #include <sstream>
+    std::stringstream printed;
+    std::stringstream couted;
+
+    //change the underlying buffer and save the old buffer
+    auto old_buf = std::cout.rdbuf(printed.rdbuf());
+
+    m.print( std::cout ); //all the std::cout goes to ss
+
+    std::cout.rdbuf(old_buf); //reset
+
+    couted << "Row: 0 ->  Col:0->1	 Col:1->2	 Col:2->3\t\n"
+               "Row: 1 ->  Col:3->4\t\n"
+               "Row: 2 ->  Col:0->5	 Col:1->6	 Col:2->7\t\n"
+               "Row: 3 ->  Col:1->8	 Col:2->9	 Col:3->10\t\n"
+               "Row: 4 ->  Col:2->11	 Col:3->12\t\n";
+
+
+    EXPECT_EQ( printed.str(), couted.str() );
+}
+
+#endif
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_AdEllpack.cpp b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_AdEllpack.cpp
similarity index 100%
rename from src/UnitTests/Matrices/SparseMatrixTest_AdEllpack.cpp
rename to src/UnitTests/Matrices/Legacy/SparseMatrixTest_AdEllpack.cpp
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_AdEllpack.cu b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_AdEllpack.cu
similarity index 100%
rename from src/UnitTests/Matrices/SparseMatrixTest_AdEllpack.cu
rename to src/UnitTests/Matrices/Legacy/SparseMatrixTest_AdEllpack.cu
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_AdEllpack.h b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_AdEllpack.h
similarity index 99%
rename from src/UnitTests/Matrices/SparseMatrixTest_AdEllpack.h
rename to src/UnitTests/Matrices/Legacy/SparseMatrixTest_AdEllpack.h
index 2169b96df..d2d268dac 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_AdEllpack.h
+++ b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_AdEllpack.h
@@ -133,4 +133,4 @@ TYPED_TEST( AdEllpackMatrixTest, printTest )
 #endif
 
 
-#include "../main.h"
+#include "../../main.h"
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_BiEllpack.cpp b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_BiEllpack.cpp
similarity index 100%
rename from src/UnitTests/Matrices/SparseMatrixTest_BiEllpack.cpp
rename to src/UnitTests/Matrices/Legacy/SparseMatrixTest_BiEllpack.cpp
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_BiEllpack.cu b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_BiEllpack.cu
similarity index 100%
rename from src/UnitTests/Matrices/SparseMatrixTest_BiEllpack.cu
rename to src/UnitTests/Matrices/Legacy/SparseMatrixTest_BiEllpack.cu
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_BiEllpack.h b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_BiEllpack.h
similarity index 99%
rename from src/UnitTests/Matrices/SparseMatrixTest_BiEllpack.h
rename to src/UnitTests/Matrices/Legacy/SparseMatrixTest_BiEllpack.h
index c74fa635f..9dab63c1a 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_BiEllpack.h
+++ b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_BiEllpack.h
@@ -142,4 +142,4 @@ TYPED_TEST( BiEllpackMatrixTest, printTest )
 }
 #endif // HAVE_GTEST
 
-#include "../main.h"
+#include "../../main.h"
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR.cpp b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_CSR.cpp
similarity index 100%
rename from src/UnitTests/Matrices/SparseMatrixTest_CSR.cpp
rename to src/UnitTests/Matrices/Legacy/SparseMatrixTest_CSR.cpp
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR.cu b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_CSR.cu
similarity index 100%
rename from src/UnitTests/Matrices/SparseMatrixTest_CSR.cu
rename to src/UnitTests/Matrices/Legacy/SparseMatrixTest_CSR.cu
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR.h b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_CSR.h
similarity index 99%
rename from src/UnitTests/Matrices/SparseMatrixTest_CSR.h
rename to src/UnitTests/Matrices/Legacy/SparseMatrixTest_CSR.h
index c9dfc770f..3cae12e3a 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_CSR.h
+++ b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_CSR.h
@@ -136,4 +136,4 @@ TYPED_TEST( CSRMatrixTest, printTest )
 
 #endif
 
-#include "../main.h"
+#include "../../main.h"
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.cpp b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_ChunkedEllpack.cpp
similarity index 100%
rename from src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.cpp
rename to src/UnitTests/Matrices/Legacy/SparseMatrixTest_ChunkedEllpack.cpp
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.cu b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_ChunkedEllpack.cu
similarity index 100%
rename from src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.cu
rename to src/UnitTests/Matrices/Legacy/SparseMatrixTest_ChunkedEllpack.cu
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.h b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_ChunkedEllpack.h
similarity index 99%
rename from src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.h
rename to src/UnitTests/Matrices/Legacy/SparseMatrixTest_ChunkedEllpack.h
index 45801fa3a..a3c049910 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.h
+++ b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_ChunkedEllpack.h
@@ -144,4 +144,4 @@ TYPED_TEST( ChunkedEllpackMatrixTest, printTest )
 
 #endif
 
-#include "../main.h"
+#include "../../main.h"
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.cpp b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_Ellpack.cpp
similarity index 100%
rename from src/UnitTests/Matrices/SparseMatrixTest_Ellpack.cpp
rename to src/UnitTests/Matrices/Legacy/SparseMatrixTest_Ellpack.cpp
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.cu b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_Ellpack.cu
similarity index 100%
rename from src/UnitTests/Matrices/SparseMatrixTest_Ellpack.cu
rename to src/UnitTests/Matrices/Legacy/SparseMatrixTest_Ellpack.cu
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.h b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_Ellpack.h
similarity index 99%
rename from src/UnitTests/Matrices/SparseMatrixTest_Ellpack.h
rename to src/UnitTests/Matrices/Legacy/SparseMatrixTest_Ellpack.h
index 26d270a3d..fa6b2027c 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.h
+++ b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_Ellpack.h
@@ -136,4 +136,4 @@ TYPED_TEST( EllpackMatrixTest, printTest )
 
 #endif
 
-#include "../main.h"
+#include "../../main.h"
diff --git a/src/UnitTests/Matrices/Legacy/SparseMatrixTest_SlicedEllpack.cpp b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_SlicedEllpack.cpp
new file mode 100644
index 000000000..40e2e94b8
--- /dev/null
+++ b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_SlicedEllpack.cpp
@@ -0,0 +1 @@
+#include "SparseMatrixTest_SlicedEllpack.h"
diff --git a/src/UnitTests/Matrices/Legacy/SparseMatrixTest_SlicedEllpack.cu b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_SlicedEllpack.cu
new file mode 100644
index 000000000..40e2e94b8
--- /dev/null
+++ b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_SlicedEllpack.cu
@@ -0,0 +1 @@
+#include "SparseMatrixTest_SlicedEllpack.h"
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.h b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_SlicedEllpack.h
similarity index 53%
rename from src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.h
rename to src/UnitTests/Matrices/Legacy/SparseMatrixTest_SlicedEllpack.h
index 00184754c..7f5ad546f 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.h
+++ b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_SlicedEllpack.h
@@ -1,5 +1,5 @@
 /***************************************************************************
-                          SparseMatrixTest_SlicedEllpack_segments.h -  description
+                          SparseMatrixTest_SlicedEllpack.h -  description
                              -------------------
     begin                : Dec 9, 2019
     copyright            : (C) 2019 by Tomas Oberhuber et al.
@@ -8,8 +8,7 @@
 
 /* See Copyright Notice in tnl/Copyright */
 
-#include <TNL/Containers/Segments/SlicedEllpack.h>
-#include <TNL/Matrices/SparseMatrix.h>
+#include <TNL/Matrices/Legacy/SlicedEllpack.h>
 
 
 #include "SparseMatrixTest.hpp"
@@ -26,45 +25,38 @@ protected:
    using SlicedEllpackMatrixType = Matrix;
 };
 
-////
-// Row-major format is used for the host system
-template< typename Device, typename Index >
-using RowMajorSlicedEllpack = TNL::Containers::Segments::SlicedEllpack< Device, Index, true, 32 >;
+template< typename Real, typename Device, typename Index >
+using SlicedEllpackType = TNL::Matrices::SlicedEllpack< Real, Device, Index, 32 >;
 
 
-////
-// Column-major format is used for GPUs
-template< typename Device, typename Index >
-using ColumnMajorSlicedEllpack = TNL::Containers::Segments::SlicedEllpack< Device, Index, false, 32 >;
-
 // types for which MatrixTest is instantiated
 using SlicedEllpackMatrixTypes = ::testing::Types
 <
-    TNL::Matrices::SparseMatrix< int,     RowMajorSlicedEllpack, TNL::Devices::Host, short >,
-    TNL::Matrices::SparseMatrix< long,    RowMajorSlicedEllpack, TNL::Devices::Host, short >,
-    TNL::Matrices::SparseMatrix< float,   RowMajorSlicedEllpack, TNL::Devices::Host, short >,
-    TNL::Matrices::SparseMatrix< double,  RowMajorSlicedEllpack, TNL::Devices::Host, short >,
-    TNL::Matrices::SparseMatrix< int,     RowMajorSlicedEllpack, TNL::Devices::Host, int   >,
-    TNL::Matrices::SparseMatrix< long,    RowMajorSlicedEllpack, TNL::Devices::Host, int   >,
-    TNL::Matrices::SparseMatrix< float,   RowMajorSlicedEllpack, TNL::Devices::Host, int   >,
-    TNL::Matrices::SparseMatrix< double,  RowMajorSlicedEllpack, TNL::Devices::Host, int   >,
-    TNL::Matrices::SparseMatrix< int,     RowMajorSlicedEllpack, TNL::Devices::Host, long  >,
-    TNL::Matrices::SparseMatrix< long,    RowMajorSlicedEllpack, TNL::Devices::Host, long  >,
-    TNL::Matrices::SparseMatrix< float,   RowMajorSlicedEllpack, TNL::Devices::Host, long  >,
-    TNL::Matrices::SparseMatrix< double,  RowMajorSlicedEllpack, TNL::Devices::Host, long  >
+    SlicedEllpackType< int,     TNL::Devices::Host, short >,
+    SlicedEllpackType< long,    TNL::Devices::Host, short >,
+    SlicedEllpackType< float,   TNL::Devices::Host, short >,
+    SlicedEllpackType< double,  TNL::Devices::Host, short >,
+    SlicedEllpackType< int,     TNL::Devices::Host, int   >,
+    SlicedEllpackType< long,    TNL::Devices::Host, int   >,
+    SlicedEllpackType< float,   TNL::Devices::Host, int   >,
+    SlicedEllpackType< double,  TNL::Devices::Host, int   >,
+    SlicedEllpackType< int,     TNL::Devices::Host, long  >,
+    SlicedEllpackType< long,    TNL::Devices::Host, long  >,
+    SlicedEllpackType< float,   TNL::Devices::Host, long  >,
+    SlicedEllpackType< double,  TNL::Devices::Host, long  >
 #ifdef HAVE_CUDA
-   ,TNL::Matrices::SparseMatrix< int,     ColumnMajorSlicedEllpack, TNL::Devices::Cuda, short >,
-    TNL::Matrices::SparseMatrix< long,    ColumnMajorSlicedEllpack, TNL::Devices::Cuda, short >,
-    TNL::Matrices::SparseMatrix< float,   ColumnMajorSlicedEllpack, TNL::Devices::Cuda, short >,
-    TNL::Matrices::SparseMatrix< double,  ColumnMajorSlicedEllpack, TNL::Devices::Cuda, short >,
-    TNL::Matrices::SparseMatrix< int,     ColumnMajorSlicedEllpack, TNL::Devices::Cuda, int   >,
-    TNL::Matrices::SparseMatrix< long,    ColumnMajorSlicedEllpack, TNL::Devices::Cuda, int   >,
-    TNL::Matrices::SparseMatrix< float,   ColumnMajorSlicedEllpack, TNL::Devices::Cuda, int   >,
-    TNL::Matrices::SparseMatrix< double,  ColumnMajorSlicedEllpack, TNL::Devices::Cuda, int   >,
-    TNL::Matrices::SparseMatrix< int,     ColumnMajorSlicedEllpack, TNL::Devices::Cuda, long  >,
-    TNL::Matrices::SparseMatrix< long,    ColumnMajorSlicedEllpack, TNL::Devices::Cuda, long  >,
-    TNL::Matrices::SparseMatrix< float,   ColumnMajorSlicedEllpack, TNL::Devices::Cuda, long  >,
-    TNL::Matrices::SparseMatrix< double,  ColumnMajorSlicedEllpack, TNL::Devices::Cuda, long  >
+   ,SlicedEllpackType< int,     TNL::Devices::Cuda, short >,
+    SlicedEllpackType< long,    TNL::Devices::Cuda, short >,
+    SlicedEllpackType< float,   TNL::Devices::Cuda, short >,
+    SlicedEllpackType< double,  TNL::Devices::Cuda, short >,
+    SlicedEllpackType< int,     TNL::Devices::Cuda, int   >,
+    SlicedEllpackType< long,    TNL::Devices::Cuda, int   >,
+    SlicedEllpackType< float,   TNL::Devices::Cuda, int   >,
+    SlicedEllpackType< double,  TNL::Devices::Cuda, int   >,
+    SlicedEllpackType< int,     TNL::Devices::Cuda, long  >,
+    SlicedEllpackType< long,    TNL::Devices::Cuda, long  >,
+    SlicedEllpackType< float,   TNL::Devices::Cuda, long  >,
+    SlicedEllpackType< double,  TNL::Devices::Cuda, long  >
 #endif
 >;
 
@@ -149,4 +141,4 @@ TYPED_TEST( SlicedEllpackMatrixTest, printTest )
 
 #endif
 
-#include "../main.h"
+#include "../../main.h"
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cpp b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cpp
deleted file mode 100644
index a88301100..000000000
--- a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cpp
+++ /dev/null
@@ -1 +0,0 @@
-#include "SparseMatrixTest_SlicedEllpack_segments.h"
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cu b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cu
deleted file mode 100644
index a88301100..000000000
--- a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cu
+++ /dev/null
@@ -1 +0,0 @@
-#include "SparseMatrixTest_SlicedEllpack_segments.h"
-- 
GitLab


From b4cadcda32bc05fd62f5e5119abdda95e1305f67 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Thu, 2 Jan 2020 17:17:49 +0100
Subject: [PATCH 050/179] Deleting deprecated methods in SparseMatrix and
 SparseMatrixView.

---
 src/TNL/Matrices/Matrix.h                     |  18 +-
 src/TNL/Matrices/MatrixView.h                 |  11 -
 src/TNL/Matrices/SparseMatrix.h               |  77 +------
 src/TNL/Matrices/SparseMatrix.hpp             | 211 ------------------
 src/TNL/Matrices/SparseMatrixView.h           |  59 +----
 src/TNL/Matrices/SparseMatrixView.hpp         | 175 ---------------
 src/UnitTests/Matrices/SparseMatrixTest.hpp   |  72 ------
 .../Matrices/SparseMatrixTest_CSR_segments.h  |   7 -
 .../SparseMatrixTest_Ellpack_segments.h       |   7 -
 .../SparseMatrixTest_SlicedEllpack_segments.h |   7 -
 10 files changed, 14 insertions(+), 630 deletions(-)

diff --git a/src/TNL/Matrices/Matrix.h b/src/TNL/Matrices/Matrix.h
index 66a686046..30031da42 100644
--- a/src/TNL/Matrices/Matrix.h
+++ b/src/TNL/Matrices/Matrix.h
@@ -56,6 +56,7 @@ public:
 
    virtual void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ) = 0;
 
+   [[deprecated]]
    virtual IndexType getRowLength( const IndexType row ) const = 0;
 
    // TODO: implementation is not parallel
@@ -79,12 +80,6 @@ public:
    __cuda_callable__
    IndexType getColumns() const;
 
-   /****
-    * TODO: The fast variants of the following methods cannot be virtual.
-    * If they were, they could not be used in the CUDA kernels. If CUDA allows it
-    * in the future and it does not slow down, declare them as virtual here.
-    */
-
    virtual bool setElement( const IndexType row,
                             const IndexType column,
                             const RealType& value ) = 0;
@@ -94,17 +89,6 @@ public:
                             const RealType& value,
                             const RealType& thisElementMultiplicator = 1.0 ) = 0;
 
-   virtual bool setRow( const IndexType row,
-                        const IndexType* columns,
-                        const RealType* values,
-                        const IndexType numberOfElements ) = 0;
-
-   virtual bool addRow( const IndexType row,
-                        const IndexType* columns,
-                        const RealType* values,
-                        const IndexType numberOfElements,
-                        const RealType& thisElementMultiplicator = 1.0 ) = 0;
-
    virtual Real getElement( const IndexType row,
                             const IndexType column ) const = 0;
 
diff --git a/src/TNL/Matrices/MatrixView.h b/src/TNL/Matrices/MatrixView.h
index 18a9fb488..5a3cde478 100644
--- a/src/TNL/Matrices/MatrixView.h
+++ b/src/TNL/Matrices/MatrixView.h
@@ -90,17 +90,6 @@ public:
                             const RealType& value,
                             const RealType& thisElementMultiplicator = 1.0 ) = 0;
 
-   virtual bool setRow( const IndexType row,
-                        const IndexType* columns,
-                        const RealType* values,
-                        const IndexType numberOfElements ) = 0;
-
-   virtual bool addRow( const IndexType row,
-                        const IndexType* columns,
-                        const RealType* values,
-                        const IndexType numberOfElements,
-                        const RealType& thisElementMultiplicator = 1.0 ) = 0;
-
    virtual Real getElement( const IndexType row,
                             const IndexType column ) const = 0;
 
diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h
index 8169f89f2..a64c80934 100644
--- a/src/TNL/Matrices/SparseMatrix.h
+++ b/src/TNL/Matrices/SparseMatrix.h
@@ -91,18 +91,17 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
       template< typename Vector >
       void getCompressedRowLengths( Vector& rowLengths ) const;
 
-      IndexType getRowLength( const IndexType row ) const;
-
-      __cuda_callable__
-      IndexType getRowLengthFast( const IndexType row ) const;
-
-      IndexType getNonZeroRowLength( const IndexType row ) const;
-
-      __cuda_callable__
-      IndexType getNonZeroRowLengthFast( const IndexType row ) const;
-
-      template< typename Real2, typename Device2, typename Index2, typename MatrixType2, template< typename, typename, typename > class Segments2, typename RealAllocator2, typename IndexAllocator2 >
-      void setLike( const SparseMatrix< Real2, Device2, Index2, MatrixType2, Segments2, RealAllocator2, IndexAllocator2 >& matrix );
+      [[deprecated]]
+      virtual IndexType getRowLength( const IndexType row ) const {};
+
+      template< typename Real_,
+                typename Device_,
+                typename Index_,
+                typename MatrixType_,
+                template< typename, typename, typename > class Segments_,
+                typename RealAllocator_,
+                typename IndexAllocator_ >
+      void setLike( const SparseMatrix< Real_, Device_, Index_, MatrixType_, Segments_, RealAllocator_, IndexAllocator_ >& matrix );
 
       IndexType getNumberOfNonzeroMatrixElements() const;
 
@@ -114,68 +113,18 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
       __cuda_callable__
       RowView getRow( const IndexType& rowIdx );
 
-      [[deprecated("")]] __cuda_callable__
-      bool setElementFast( const IndexType row,
-                           const IndexType column,
-                           const RealType& value );
-
       bool setElement( const IndexType row,
                        const IndexType column,
                        const RealType& value );
 
-      [[deprecated("")]] __cuda_callable__
-      bool addElementFast( const IndexType row,
-                           const IndexType column,
-                           const RealType& value,
-                           const RealType& thisElementMultiplicator = 1.0 );
-
-      [[deprecated("")]]
       bool addElement( const IndexType row,
                        const IndexType column,
                        const RealType& value,
-                       const RealType& thisElementMultiplicator = 1.0 );
-
-
-      [[deprecated("")]] __cuda_callable__
-      bool setRowFast( const IndexType row,
-                       const IndexType* columnIndexes,
-                       const RealType* values,
-                       const IndexType elements );
-
-      [[deprecated("")]] 
-      bool setRow( const IndexType row,
-                   const IndexType* columnIndexes,
-                   const RealType* values,
-                   const IndexType elements );
-
-
-      [[deprecated("")]] __cuda_callable__
-      bool addRowFast( const IndexType row,
-                       const IndexType* columns,
-                       const RealType* values,
-                       const IndexType numberOfElements,
-                       const RealType& thisElementMultiplicator = 1.0 );
-
-      [[deprecated("")]] 
-      bool addRow( const IndexType row,
-                   const IndexType* columns,
-                   const RealType* values,
-                   const IndexType numberOfElements,
-                   const RealType& thisElementMultiplicator = 1.0 );
-
-
-      [[deprecated("")]] __cuda_callable__
-      RealType getElementFast( const IndexType row,
-                               const IndexType column ) const;
+                       const RealType& thisElementMultiplicator );
 
       RealType getElement( const IndexType row,
                            const IndexType column ) const;
 
-      [[deprecated("")]] __cuda_callable__
-      void getRowFast( const IndexType row,
-                       IndexType* columns,
-                       RealType* values ) const;
-
       template< typename Vector >
       __cuda_callable__
       typename Vector::RealType rowVectorProduct( const IndexType row,
@@ -255,8 +204,6 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
       IndexAllocator indexAllocator;
 
       RealAllocator realAllocator;
-
-
 };
 
 }  // namespace Conatiners
diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp
index 691157a9c..e43a4fbed 100644
--- a/src/TNL/Matrices/SparseMatrix.hpp
+++ b/src/TNL/Matrices/SparseMatrix.hpp
@@ -195,64 +195,6 @@ getCompressedRowLengths( Vector& rowLengths ) const
    this->allRowsReduction( fetch, reduce, keep, 0 );
 }
 
-template< typename Real,
-          typename Device,
-          typename Index,
-          typename MatrixType,
-          template< typename, typename, typename > class Segments,
-          typename RealAllocator,
-          typename IndexAllocator >
-Index
-SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
-getRowLength( const IndexType row ) const
-{
-
-}
-
-template< typename Real,
-          typename Device,
-          typename Index,
-          typename MatrixType,
-          template< typename, typename, typename > class Segments,
-          typename RealAllocator,
-          typename IndexAllocator >
-__cuda_callable__
-Index
-SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
-getRowLengthFast( const IndexType row ) const
-{
-
-}
-
-template< typename Real,
-          typename Device,
-          typename Index,
-          typename MatrixType,
-          template< typename, typename, typename > class Segments,
-          typename RealAllocator,
-          typename IndexAllocator >
-Index
-SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
-getNonZeroRowLength( const IndexType row ) const
-{
-
-}
-
-template< typename Real,
-          typename Device,
-          typename Index,
-          typename MatrixType,
-          template< typename, typename, typename > class Segments,
-          typename RealAllocator,
-          typename IndexAllocator >
-__cuda_callable__
-Index
-SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
-getNonZeroRowLengthFast( const IndexType row ) const
-{
-
-}
-
 template< typename Real,
           typename Device,
           typename Index,
@@ -331,23 +273,6 @@ getRow( const IndexType& rowIdx ) -> RowView
    return RowView( this->segments.getSegmentView( rowIdx ), this->values.getView(), this->columnIndexes.getView() );
 }
 
-template< typename Real,
-          typename Device,
-          typename Index,
-          typename MatrixType,
-          template< typename, typename, typename > class Segments,
-          typename RealAllocator,
-          typename IndexAllocator >
-__cuda_callable__
-bool
-SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
-setElementFast( const IndexType row,
-                const IndexType column,
-                const RealType& value )
-{
-   return this->addElementFast( row, column, value, 0.0 );
-}
-
 template< typename Real,
           typename Device,
           typename Index,
@@ -364,24 +289,6 @@ setElement( const IndexType row,
    return this->addElement( row, column, value, 0.0 );
 }
 
-template< typename Real,
-          typename Device,
-          typename Index,
-          typename MatrixType,
-          template< typename, typename, typename > class Segments,
-          typename RealAllocator,
-          typename IndexAllocator >
-__cuda_callable__
-bool
-SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
-addElementFast( const IndexType row,
-                const IndexType column,
-                const RealType& value,
-                const RealType& thisElementMultiplicator )
-{
-
-}
-
 template< typename Real,
           typename Device,
           typename Index,
@@ -448,107 +355,6 @@ addElement( const IndexType row,
    }
 }
 
-
-template< typename Real,
-          typename Device,
-          typename Index,
-          typename MatrixType,
-          template< typename, typename, typename > class Segments,
-          typename RealAllocator,
-          typename IndexAllocator >
-__cuda_callable__
-bool
-SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
-setRowFast( const IndexType row,
-            const IndexType* columnIndexes,
-            const RealType* values,
-            const IndexType elements )
-{
-}
-
-template< typename Real,
-          typename Device,
-          typename Index,
-          typename MatrixType,
-          template< typename, typename, typename > class Segments,
-          typename RealAllocator,
-          typename IndexAllocator >
-bool
-SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
-setRow( const IndexType row,
-        const IndexType* columnIndexes,
-        const RealType* values,
-        const IndexType elements )
-{
-   const IndexType rowLength = this->segments.getSegmentSize( row );
-   if( elements > rowLength )
-      return false;
-
-   for( IndexType i = 0; i < elements; i++ )
-   {
-      const IndexType globalIdx = this->segments.getGlobalIndex( row, i );
-      this->columnIndexes.setElement( globalIdx, columnIndexes[ i ] );
-      this->values.setElement( globalIdx, values[ i ] );
-   }
-   for( IndexType i = elements; i < rowLength; i++ )
-      this->columnIndexes.setElement( this->segments.getGlobalIndex( row, i ), this->getPaddingIndex() );
-   return true;
-}
-
-
-template< typename Real,
-          typename Device,
-          typename Index,
-          typename MatrixType,
-          template< typename, typename, typename > class Segments,
-          typename RealAllocator,
-          typename IndexAllocator >
-__cuda_callable__
-bool
-SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
-addRowFast( const IndexType row,
-            const IndexType* columns,
-            const RealType* values,
-            const IndexType numberOfElements,
-            const RealType& thisElementMultiplicator )
-{
-
-}
-
-template< typename Real,
-          typename Device,
-          typename Index,
-          typename MatrixType,
-          template< typename, typename, typename > class Segments,
-          typename RealAllocator,
-          typename IndexAllocator >
-bool
-SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
-addRow( const IndexType row,
-        const IndexType* columns,
-        const RealType* values,
-        const IndexType numberOfElements,
-        const RealType& thisElementMultiplicator )
-{
-
-}
-
-template< typename Real,
-          typename Device,
-          typename Index,
-          typename MatrixType,
-          template< typename, typename, typename > class Segments,
-          typename RealAllocator,
-          typename IndexAllocator >
-__cuda_callable__
-Real
-SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
-getElementFast( const IndexType row,
-                const IndexType column ) const
-{
-
-}
-
 template< typename Real,
           typename Device,
           typename Index,
@@ -573,23 +379,6 @@ getElement( const IndexType row,
    return 0.0;
 }
 
-template< typename Real,
-          typename Device,
-          typename Index,
-          typename MatrixType,
-          template< typename, typename, typename > class Segments,
-          typename RealAllocator,
-          typename IndexAllocator >
-__cuda_callable__
-void
-SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
-getRowFast( const IndexType row,
-            IndexType* columns,
-            RealType* values ) const
-{
-
-}
-
 template< typename Real,
           typename Device,
           typename Index,
diff --git a/src/TNL/Matrices/SparseMatrixView.h b/src/TNL/Matrices/SparseMatrixView.h
index 714692df8..29ea99f75 100644
--- a/src/TNL/Matrices/SparseMatrixView.h
+++ b/src/TNL/Matrices/SparseMatrixView.h
@@ -79,16 +79,9 @@ class SparseMatrixView : public MatrixView< Real, Device, Index >
       template< typename Vector >
       void getCompressedRowLengths( Vector& rowLengths ) const;
 
+      [[deprecated]]
       IndexType getRowLength( const IndexType row ) const;
 
-      __cuda_callable__
-      IndexType getRowLengthFast( const IndexType row ) const;
-
-      IndexType getNonZeroRowLength( const IndexType row ) const;
-
-      __cuda_callable__
-      IndexType getNonZeroRowLengthFast( const IndexType row ) const;
-
       IndexType getNumberOfNonzeroMatrixElements() const;
 
       void reset();
@@ -99,68 +92,18 @@ class SparseMatrixView : public MatrixView< Real, Device, Index >
       __cuda_callable__
       RowView getRow( const IndexType& rowIdx );
 
-      [[deprecated("")]] __cuda_callable__
-      bool setElementFast( const IndexType row,
-                           const IndexType column,
-                           const RealType& value );
-
       bool setElement( const IndexType row,
                        const IndexType column,
                        const RealType& value );
 
-      [[deprecated("")]] __cuda_callable__
-      bool addElementFast( const IndexType row,
-                           const IndexType column,
-                           const RealType& value,
-                           const RealType& thisElementMultiplicator = 1.0 );
-
-      [[deprecated("")]] 
       bool addElement( const IndexType row,
                        const IndexType column,
                        const RealType& value,
                        const RealType& thisElementMultiplicator = 1.0 );
 
-
-      [[deprecated("")]] __cuda_callable__
-      bool setRowFast( const IndexType row,
-                       const IndexType* columnIndexes,
-                       const RealType* values,
-                       const IndexType elements );
-
-      [[deprecated("")]] 
-      bool setRow( const IndexType row,
-                   const IndexType* columnIndexes,
-                   const RealType* values,
-                   const IndexType elements );
-
-
-      [[deprecated("")]] __cuda_callable__
-      bool addRowFast( const IndexType row,
-                       const IndexType* columns,
-                       const RealType* values,
-                       const IndexType numberOfElements,
-                       const RealType& thisElementMultiplicator = 1.0 );
-
-      [[deprecated("")]] 
-      bool addRow( const IndexType row,
-                   const IndexType* columns,
-                   const RealType* values,
-                   const IndexType numberOfElements,
-                   const RealType& thisElementMultiplicator = 1.0 );
-
-
-      [[deprecated("")]] __cuda_callable__
-      RealType getElementFast( const IndexType row,
-                               const IndexType column ) const;
-
       RealType getElement( const IndexType row,
                            const IndexType column ) const;
 
-      [[deprecated("")]] __cuda_callable__
-      void getRowFast( const IndexType row,
-                       IndexType* columns,
-                       RealType* values ) const;
-
       template< typename Vector >
       __cuda_callable__
       typename Vector::RealType rowVectorProduct( const IndexType row,
diff --git a/src/TNL/Matrices/SparseMatrixView.hpp b/src/TNL/Matrices/SparseMatrixView.hpp
index ce0e7aa18..408222373 100644
--- a/src/TNL/Matrices/SparseMatrixView.hpp
+++ b/src/TNL/Matrices/SparseMatrixView.hpp
@@ -142,44 +142,6 @@ getRowLength( const IndexType row ) const
 
 }
 
-template< typename Real,
-          typename Device,
-          typename Index,
-          typename MatrixType,
-          template< typename, typename > class SegmentsView >
-__cuda_callable__
-Index
-SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
-getRowLengthFast( const IndexType row ) const
-{
-
-}
-
-template< typename Real,
-          typename Device,
-          typename Index,
-          typename MatrixType,
-          template< typename, typename > class SegmentsView >
-Index
-SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
-getNonZeroRowLength( const IndexType row ) const
-{
-
-}
-
-template< typename Real,
-          typename Device,
-          typename Index,
-          typename MatrixType,
-          template< typename, typename > class SegmentsView >
-__cuda_callable__
-Index
-SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
-getNonZeroRowLengthFast( const IndexType row ) const
-{
-
-}
-
 template< typename Real,
           typename Device,
           typename Index,
@@ -237,21 +199,6 @@ getRow( const IndexType& rowIdx ) -> RowView
    return RowView( this->segments.getSegmentView( rowIdx ), this->values.getView(), this->columnIndexes.getView() );
 }
 
-template< typename Real,
-          typename Device,
-          typename Index,
-          typename MatrixType,
-          template< typename, typename > class SegmentsView >
-__cuda_callable__
-bool
-SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
-setElementFast( const IndexType row,
-                const IndexType column,
-                const RealType& value )
-{
-   return this->addElementFast( row, column, value, 0.0 );
-}
-
 template< typename Real,
           typename Device,
           typename Index,
@@ -266,22 +213,6 @@ setElement( const IndexType row,
    return this->addElement( row, column, value, 0.0 );
 }
 
-template< typename Real,
-          typename Device,
-          typename Index,
-          typename MatrixType,
-          template< typename, typename > class SegmentsView >
-__cuda_callable__
-bool
-SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
-addElementFast( const IndexType row,
-                const IndexType column,
-                const RealType& value,
-                const RealType& thisElementMultiplicator )
-{
-
-}
-
 template< typename Real,
           typename Device,
           typename Index,
@@ -346,97 +277,6 @@ addElement( const IndexType row,
    }
 }
 
-
-template< typename Real,
-          typename Device,
-          typename Index,
-          typename MatrixType,
-          template< typename, typename > class SegmentsView >
-__cuda_callable__
-bool
-SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
-setRowFast( const IndexType row,
-            const IndexType* columnIndexes,
-            const RealType* values,
-            const IndexType elements )
-{
-}
-
-template< typename Real,
-          typename Device,
-          typename Index,
-          typename MatrixType,
-          template< typename, typename > class SegmentsView >
-bool
-SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
-setRow( const IndexType row,
-        const IndexType* columnIndexes,
-        const RealType* values,
-        const IndexType elements )
-{
-   const IndexType rowLength = this->segments.getSegmentSize( row );
-   if( elements > rowLength )
-      return false;
-
-   for( IndexType i = 0; i < elements; i++ )
-   {
-      const IndexType globalIdx = this->segments.getGlobalIndex( row, i );
-      this->columnIndexes.setElement( globalIdx, columnIndexes[ i ] );
-      this->values.setElement( globalIdx, values[ i ] );
-   }
-   for( IndexType i = elements; i < rowLength; i++ )
-      this->columnIndexes.setElement( this->segments.getGlobalIndex( row, i ), this->getPaddingIndex() );
-   return true;
-}
-
-
-template< typename Real,
-          typename Device,
-          typename Index,
-          typename MatrixType,
-          template< typename, typename > class SegmentsView >
-__cuda_callable__
-bool
-SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
-addRowFast( const IndexType row,
-            const IndexType* columns,
-            const RealType* values,
-            const IndexType numberOfElements,
-            const RealType& thisElementMultiplicator )
-{
-
-}
-
-template< typename Real,
-          typename Device,
-          typename Index,
-          typename MatrixType,
-          template< typename, typename > class SegmentsView >
-bool
-SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
-addRow( const IndexType row,
-        const IndexType* columns,
-        const RealType* values,
-        const IndexType numberOfElements,
-        const RealType& thisElementMultiplicator )
-{
-
-}
-
-template< typename Real,
-          typename Device,
-          typename Index,
-          typename MatrixType,
-          template< typename, typename > class SegmentsView >
-__cuda_callable__
-Real
-SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
-getElementFast( const IndexType row,
-                const IndexType column ) const
-{
-
-}
-
 template< typename Real,
           typename Device,
           typename Index,
@@ -459,21 +299,6 @@ getElement( const IndexType row,
    return 0.0;
 }
 
-template< typename Real,
-          typename Device,
-          typename Index,
-          typename MatrixType,
-          template< typename, typename > class SegmentsView >
-__cuda_callable__
-void
-SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
-getRowFast( const IndexType row,
-            IndexType* columns,
-            RealType* values ) const
-{
-
-}
-
 template< typename Real,
           typename Device,
           typename Index,
diff --git a/src/UnitTests/Matrices/SparseMatrixTest.hpp b/src/UnitTests/Matrices/SparseMatrixTest.hpp
index c6ff5cbd7..df06d28fc 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest.hpp
+++ b/src/UnitTests/Matrices/SparseMatrixTest.hpp
@@ -817,78 +817,6 @@ void test_AddElement()
     EXPECT_EQ( m.getElement( 5, 4 ), 18 );
 }
 
-template< typename Matrix >
-void test_SetRow()
-{
-    using RealType = typename Matrix::RealType;
-    using DeviceType = typename Matrix::DeviceType;
-    using IndexType = typename Matrix::IndexType;
-
-/*
- * Sets up the following 3x7 sparse matrix:
- *
- *    /  0  0  0  1  1  1  0 \
- *    |  2  2  2  0  0  0  0 |
- *    \  3  3  3  0  0  0  0 /
- */
-
-    const IndexType rows = 3;
-    const IndexType cols = 7;
-
-    Matrix m;
-    m.reset();
-    m.setDimensions( rows, cols );
-    typename Matrix::CompressedRowLengthsVector rowLengths;
-    rowLengths.setSize( rows );
-    rowLengths.setValue( 6 );
-    rowLengths.setElement( 1, 3 );
-    m.setCompressedRowLengths( rowLengths );
-
-    RealType value = 1;
-    for( IndexType i = 0; i < 3; i++ )
-    {
-        m.setElement( 0, i + 3, value );
-        m.setElement( 1, i, value + 1 );
-        m.setElement( 2, i, value + 2 );
-    }
-
-    RealType row1 [ 3 ] = { 11, 11, 11 }; IndexType colIndexes1 [ 3 ] = { 0, 1, 2 };
-    RealType row2 [ 3 ] = { 22, 22, 22 }; IndexType colIndexes2 [ 3 ] = { 0, 1, 2 };
-    RealType row3 [ 3 ] = { 33, 33, 33 }; IndexType colIndexes3 [ 3 ] = { 3, 4, 5 };
-
-    RealType row = 0;
-    IndexType elements = 3;
-
-    m.setRow( row++, colIndexes1, row1, elements );
-    m.setRow( row++, colIndexes2, row2, elements );
-    m.setRow( row++, colIndexes3, row3, elements );
-
-
-    EXPECT_EQ( m.getElement( 0, 0 ), 11 );
-    EXPECT_EQ( m.getElement( 0, 1 ), 11 );
-    EXPECT_EQ( m.getElement( 0, 2 ), 11 );
-    EXPECT_EQ( m.getElement( 0, 3 ),  0 );
-    EXPECT_EQ( m.getElement( 0, 4 ),  0 );
-    EXPECT_EQ( m.getElement( 0, 5 ),  0 );
-    EXPECT_EQ( m.getElement( 0, 6 ),  0 );
-
-    EXPECT_EQ( m.getElement( 1, 0 ), 22 );
-    EXPECT_EQ( m.getElement( 1, 1 ), 22 );
-    EXPECT_EQ( m.getElement( 1, 2 ), 22 );
-    EXPECT_EQ( m.getElement( 1, 3 ),  0 );
-    EXPECT_EQ( m.getElement( 1, 4 ),  0 );
-    EXPECT_EQ( m.getElement( 1, 5 ),  0 );
-    EXPECT_EQ( m.getElement( 1, 6 ),  0 );
-
-    EXPECT_EQ( m.getElement( 2, 0 ),  0 );
-    EXPECT_EQ( m.getElement( 2, 1 ),  0 );
-    EXPECT_EQ( m.getElement( 2, 2 ),  0 );
-    EXPECT_EQ( m.getElement( 2, 3 ), 33 );
-    EXPECT_EQ( m.getElement( 2, 4 ), 33 );
-    EXPECT_EQ( m.getElement( 2, 5 ), 33 );
-    EXPECT_EQ( m.getElement( 2, 6 ),  0 );
-}
-
 template< typename Matrix >
 void test_VectorProduct()
 {
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h
index e86e34f0a..34f7b4f70 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h
+++ b/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h
@@ -116,13 +116,6 @@ TYPED_TEST( CSRMatrixTest, addElementTest )
     test_AddElement< CSRMatrixType >();
 }
 
-TYPED_TEST( CSRMatrixTest, setRowTest )
-{
-    using CSRMatrixType = typename TestFixture::CSRMatrixType;
-
-    test_SetRow< CSRMatrixType >();
-}
-
 TYPED_TEST( CSRMatrixTest, vectorProductTest )
 {
     using CSRMatrixType = typename TestFixture::CSRMatrixType;
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h
index f597e3199..48cf9afbf 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h
+++ b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h
@@ -126,13 +126,6 @@ TYPED_TEST( EllpackMatrixTest, addElementTest )
     test_AddElement< EllpackMatrixType >();
 }
 
-TYPED_TEST( EllpackMatrixTest, setRowTest )
-{
-    using EllpackMatrixType = typename TestFixture::EllpackMatrixType;
-
-    test_SetRow< EllpackMatrixType >();
-}
-
 TYPED_TEST( EllpackMatrixTest, vectorProductTest )
 {
     using EllpackMatrixType = typename TestFixture::EllpackMatrixType;
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h
index 172ed722a..8eba34a2b 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h
+++ b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h
@@ -127,13 +127,6 @@ TYPED_TEST( SlicedEllpackMatrixTest, addElementTest )
     test_AddElement< SlicedEllpackMatrixType >();
 }
 
-TYPED_TEST( SlicedEllpackMatrixTest, setRowTest )
-{
-    using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
-
-    test_SetRow< SlicedEllpackMatrixType >();
-}
-
 TYPED_TEST( SlicedEllpackMatrixTest, vectorProductTest )
 {
     using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
-- 
GitLab


From 1276b9e51707185b7f523c02797e5228afa287d7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Thu, 2 Jan 2020 17:19:16 +0100
Subject: [PATCH 051/179] Deleted getView in Matrix and MatrixView.

---
 src/TNL/Matrices/Matrix.h       |  4 ----
 src/TNL/Matrices/Matrix.hpp     | 22 ----------------------
 src/TNL/Matrices/MatrixView.h   |  6 ------
 src/TNL/Matrices/MatrixView.hpp | 22 ----------------------
 4 files changed, 54 deletions(-)

diff --git a/src/TNL/Matrices/Matrix.h b/src/TNL/Matrices/Matrix.h
index 30031da42..8fc8cb5f2 100644
--- a/src/TNL/Matrices/Matrix.h
+++ b/src/TNL/Matrices/Matrix.h
@@ -47,10 +47,6 @@ public:
            const IndexType columns,
            const RealAllocatorType& allocator = RealAllocatorType() );
 
-   /*ViewType getView();
-
-   ConstViewType getConstView() const;*/
-
    virtual void setDimensions( const IndexType rows,
                                const IndexType columns );
 
diff --git a/src/TNL/Matrices/Matrix.hpp b/src/TNL/Matrices/Matrix.hpp
index 3a09d0088..9fc5ea620 100644
--- a/src/TNL/Matrices/Matrix.hpp
+++ b/src/TNL/Matrices/Matrix.hpp
@@ -43,28 +43,6 @@ Matrix( const IndexType rows_, const IndexType columns_, const RealAllocatorType
 {
 }
 
-/*template< typename Real,
-          typename Device,
-          typename Index,
-          typename RealAllocator >
-auto
-Matrix< Real, Device, Index, RealAllocator >::
-getView() -> ViewType
-{
-   return ViewType( rows, columns, values.getView() );
-}
-
-template< typename Real,
-          typename Device,
-          typename Index,
-          typename RealAllocator >
-auto
-Matrix< Real, Device, Index, RealAllocator >::
-getConstView() const -> ConstViewType
-{
-   return ConstViewType( rows, columns, values.getConstView() );
-}*/
-
 template< typename Real,
           typename Device,
           typename Index,
diff --git a/src/TNL/Matrices/MatrixView.h b/src/TNL/Matrices/MatrixView.h
index 5a3cde478..76965e511 100644
--- a/src/TNL/Matrices/MatrixView.h
+++ b/src/TNL/Matrices/MatrixView.h
@@ -49,12 +49,6 @@ public:
    __cuda_callable__
    MatrixView( const MatrixView& view ) = default;
 
-   //__cuda_callable__
-   //ViewType getView();
-
-   //__cuda_callable__
-   //ConstViewType getConstView() const;
-
    virtual IndexType getRowLength( const IndexType row ) const = 0;
 
    // TODO: implementation is not parallel
diff --git a/src/TNL/Matrices/MatrixView.hpp b/src/TNL/Matrices/MatrixView.hpp
index 0473f52b8..bd3d9beae 100644
--- a/src/TNL/Matrices/MatrixView.hpp
+++ b/src/TNL/Matrices/MatrixView.hpp
@@ -42,28 +42,6 @@ MatrixView( const IndexType rows_,
 {
 }
 
-/*template< typename Real,
-          typename Device,
-          typename Index >
-__cuda_callable__
-auto
-MatrixView< Real, Device, Index >::
-getView() ->ViewType
-{
-   return ViewType( rows, columns, values.getView() );
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-__cuda_callable__
-auto
-MatrixView< Real, Device, Index >::
-getConstView() const -> ConstViewType
-{
-   return ConstViewType( rows, columns, values.getConstView() );
-}*/
-
 template< typename Real,
           typename Device,
           typename Index >
-- 
GitLab


From 1a519384e45caab6676075dabd57d8cb4233f080 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Thu, 2 Jan 2020 21:40:53 +0100
Subject: [PATCH 052/179] Rewritting assignement operator of sparse matrix to
 work with any matrix view.

---
 src/TNL/Containers/Segments/CSRView.h   |   9 +-
 src/TNL/Containers/Segments/CSRView.hpp |   9 --
 src/TNL/Matrices/SparseMatrix.h         |  28 ++++--
 src/TNL/Matrices/SparseMatrix.hpp       | 116 +++++++++++++++---------
 src/TNL/Matrices/SparseMatrixView.h     |   6 ++
 src/TNL/Matrices/SparseMatrixView.hpp   |  32 +++++++
 6 files changed, 133 insertions(+), 67 deletions(-)

diff --git a/src/TNL/Containers/Segments/CSRView.h b/src/TNL/Containers/Segments/CSRView.h
index 860a35a0a..f8bcacd0f 100644
--- a/src/TNL/Containers/Segments/CSRView.h
+++ b/src/TNL/Containers/Segments/CSRView.h
@@ -27,8 +27,8 @@ class CSRView
 
       using DeviceType = Device;
       using IndexType = Index;
-      using OffsetsView = typename Containers::VectorView< IndexType, DeviceType, IndexType >;
-      using ConstOffsetsView = typename Containers::Vector< IndexType, DeviceType, IndexType >::ConstViewType;
+      using OffsetsView = typename Containers::VectorView< IndexType, DeviceType, typename std::remove_const< IndexType >::type >;
+      using ConstOffsetsView = typename Containers::Vector< IndexType, DeviceType, typename std::remove_const< IndexType >::type >::ConstViewType;
       using ViewType = CSRView;
       template< typename Device_, typename Index_ >
       using ViewTemplate = CSRView< Device_, Index_ >;
@@ -39,10 +39,9 @@ class CSRView
       CSRView();
 
       __cuda_callable__
-      CSRView( const OffsetsView&& offsets );
+      CSRView( const OffsetsView& offsets );
 
-      __cuda_callable__
-      CSRView( const ConstOffsetsView&& offsets );
+      CSRView( const OffsetsView&& offsets );
 
       __cuda_callable__
       CSRView( const CSRView& csr_view );
diff --git a/src/TNL/Containers/Segments/CSRView.hpp b/src/TNL/Containers/Segments/CSRView.hpp
index b4304ee32..b0bb35313 100644
--- a/src/TNL/Containers/Segments/CSRView.hpp
+++ b/src/TNL/Containers/Segments/CSRView.hpp
@@ -37,15 +37,6 @@ CSRView( const OffsetsView&& offsets_view )
 {
 }
 
-template< typename Device,
-          typename Index >
-__cuda_callable__
-CSRView< Device, Index >::
-CSRView( const ConstOffsetsView&& offsets_view )
-   : offsets( offsets_view )
-{
-}
-
 template< typename Device,
           typename Index >
 __cuda_callable__
diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h
index a64c80934..0e3484c10 100644
--- a/src/TNL/Matrices/SparseMatrix.h
+++ b/src/TNL/Matrices/SparseMatrix.h
@@ -159,27 +159,35 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
       template< typename Function >
       void forRows( IndexType first, IndexType last, Function& function ) const;
 
+      template< typename Function >
+      void forRows( IndexType first, IndexType last, Function& function );
+
       template< typename Function >
       void forAllRows( Function& function ) const;
 
+      template< typename Function >
+      void forAllRows( Function& function );
+
       template< typename Vector1, typename Vector2 >
       bool performSORIteration( const Vector1& b,
                                 const IndexType row,
                                 Vector2& x,
                                 const RealType& omega = 1.0 ) const;
 
-      // copy assignment
+      /**
+       * \brief Assignment of exactly the same matrix type.
+       * @param matrix
+       * @return 
+       */
       SparseMatrix& operator=( const SparseMatrix& matrix );
 
-      // cross-device copy assignment
-      template< typename Real2,
-                typename Device2,
-                typename Index2,
-                typename MatrixType2,
-                template< typename, typename, typename > class Segments2,
-                typename RealAllocator2,
-                typename IndexAllocator2 >
-      SparseMatrix& operator=( const SparseMatrix< Real2, Device2, Index2, MatrixType2, Segments2, RealAllocator2, IndexAllocator2 >& matrix );
+      /**
+       * \brief Assignment of any other matrix type.
+       * @param matrix
+       * @return 
+       */
+      template< typename RHSMatrix >
+      SparseMatrix& operator=( const RHSMatrix& matrix );
 
       void save( File& file ) const;
 
diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp
index e43a4fbed..72184738b 100644
--- a/src/TNL/Matrices/SparseMatrix.hpp
+++ b/src/TNL/Matrices/SparseMatrix.hpp
@@ -11,8 +11,8 @@
 #pragma once
 
 #include <functional>
-#include <TNL/Matrices/SparseMatrix.h>
 #include <TNL/Algorithms/Reduction.h>
+#include <TNL/Matrices/SparseMatrix.h>
 
 namespace TNL {
 namespace Matrices {
@@ -488,7 +488,30 @@ forRows( IndexType first, IndexType last, Function& function ) const
    const auto values_view = this->values.getConstView();
    const IndexType paddingIndex_ = this->getPaddingIndex();
    auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx ) mutable -> bool {
-      function( rowIdx, localIdx, globalIdx );
+      function( rowIdx, localIdx, columns_view[ globalIdx ], values_view[ globalIdx ] );
+      return true;
+   };
+   this->segments.forSegments( first, last, f );
+
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename Function >
+void
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
+forRows( IndexType first, IndexType last, Function& function )
+{
+   auto columns_view = this->columnIndexes.getView();
+   auto values_view = this->values.getView();
+   const IndexType paddingIndex_ = this->getPaddingIndex();
+   auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx ) mutable -> bool {
+      function( rowIdx, localIdx, columns_view[ globalIdx ], values_view[ globalIdx ] );
       return true;
    };
    this->segments.forSegments( first, last, f );
@@ -510,6 +533,21 @@ forAllRows( Function& function ) const
    this->forRows( 0, this->getRows(), function );
 }
 
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename Function >
+void
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
+forAllRows( Function& function )
+{
+   this->forRows( 0, this->getRows(), function );
+}
+
 /*template< typename Real,
           template< typename, typename, typename > class Segments,
           typename Device,
@@ -585,56 +623,52 @@ template< typename Real,
           template< typename, typename, typename > class Segments,
           typename RealAllocator,
           typename IndexAllocator >
-   template< typename Real2,
-             typename Device2,
-             typename Index2,
-             typename MatrixType2,
-             template< typename, typename, typename > class Segments2,
-             typename RealAllocator2,
-             typename IndexAllocator2 >
+   template< typename RHSMatrix >
 SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >&
 SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
-operator=( const SparseMatrix< Real2, Device2, Index2, MatrixType2, Segments2, RealAllocator2, IndexAllocator2 >& matrix )
+operator=( const RHSMatrix& matrix )
 {
-   using RHSMatrixType = SparseMatrix< Real2, Device2, Index2, MatrixType2, Segments2, RealAllocator2, IndexAllocator2 >;
-   typename RHSMatrixType::RowsCapacitiesType rowLengths;
+   using RHSIndexType = typename RHSMatrix::IndexType;
+   using RHSRealType = typename RHSMatrix::RealType;
+   using RHSDeviceType = typename RHSMatrix::DeviceType;
+   using RHSRealAllocatorType = typename RHSMatrix::RealAllocatorType;
+   using RHSIndexAllocatorType = typename RHSMatrix::IndexAllocatorType;
+
+   typename RHSMatrix::RowsCapacitiesType rowLengths;
    matrix.getCompressedRowLengths( rowLengths );
    this->setDimensions( matrix.getRows(), matrix.getColumns() );
    this->setCompressedRowLengths( rowLengths );
 
-   // TODO: Replace this with SparseMatrixView
-   const auto matrix_columns_view = matrix.columnIndexes.getConstView();
-   const auto matrix_values_view = matrix.values.getConstView();
+   // TODO: use getConstView when it works
+   const auto matrixView = const_cast< RHSMatrix& >( matrix ).getView();
    const IndexType paddingIndex = this->getPaddingIndex();
-   auto this_columns_view = this->columnIndexes.getView();
-   auto this_values_view = this->values.getView();
-   this_columns_view = paddingIndex;
+   auto columns_view = this->columnIndexes.getView();
+   auto values_view = this->values.getView();
+   columns_view = paddingIndex;
 
-   if( std::is_same< Device, Device2 >::value )
+   if( std::is_same< DeviceType, RHSDeviceType >::value )
    {
       const auto this_segments_view = this->segments.getView();
-      auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx ) mutable {
-         const IndexType column = matrix_columns_view[ globalIdx ];
-         if( column != paddingIndex )
+      const auto segments_view = this->segments.getView();
+      auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIndex, const RHSRealType& value ) mutable {
+         if( columnIndex != paddingIndex )
          {
-            const RealType value = matrix_values_view[ globalIdx ];
-            IndexType thisGlobalIdx = this_segments_view.getGlobalIndex( rowIdx, localIdx );
-            this_columns_view[ thisGlobalIdx ] = column;
-            this_values_view[ thisGlobalIdx ] = value;
+            IndexType thisGlobalIdx = segments_view.getGlobalIndex( rowIdx, localIdx );
+            columns_view[ thisGlobalIdx ] = columnIndex;
+            values_view[ thisGlobalIdx ] = value;
          }
       };
       matrix.forAllRows( f );
    }
    else
    {
-      //std::cerr << "Matrix = " << std::endl << matrix << std::endl;
       const IndexType maxRowLength = max( rowLengths );
-      const IndexType bufferRowsCount( 8 );
+      const IndexType bufferRowsCount( 128 );
       const size_t bufferSize = bufferRowsCount * maxRowLength;
-      Containers::Vector< Real2, Device2, Index2, RealAllocator2 > matrixValuesBuffer( bufferSize );
-      Containers::Vector< Index2, Device2, Index2, IndexAllocator2 > matrixColumnsBuffer( bufferSize );
-      Containers::Vector< RealType, DeviceType, IndexType, RealAllocator > thisValuesBuffer( bufferSize );
-      Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocator > thisColumnsBuffer( bufferSize );
+      Containers::Vector< RHSRealType, RHSDeviceType, RHSIndexType, RHSRealAllocatorType > matrixValuesBuffer( bufferSize );
+      Containers::Vector< RHSIndexType, RHSDeviceType, RHSIndexType, RHSIndexAllocatorType > matrixColumnsBuffer( bufferSize );
+      Containers::Vector< RealType, DeviceType, IndexType, RealAllocatorType > thisValuesBuffer( bufferSize );
+      Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocatorType > thisColumnsBuffer( bufferSize );
       auto matrixValuesBuffer_view = matrixValuesBuffer.getView();
       auto matrixColumnsBuffer_view = matrixColumnsBuffer.getView();
       auto thisValuesBuffer_view = thisValuesBuffer.getView();
@@ -650,20 +684,16 @@ operator=( const SparseMatrix< Real2, Device2, Index2, MatrixType2, Segments2, R
 
          ////
          // Copy matrix elements into buffer
-         auto f1 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx ) mutable {
-            const IndexType column = matrix_columns_view[ globalIdx ];
-            if( column != paddingIndex )
+         auto f1 = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIndex, const RHSRealType& value ) mutable {
+            if( columnIndex != paddingIndex )
             {
                const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx;
-               //printf( ">>>RowIdx = %d GlobalIdx = %d  column = %d bufferIdx = %d \n", rowIdx, globalIdx, column, bufferIdx );
-               matrixValuesBuffer_view[ bufferIdx ] = matrix_values_view[ globalIdx ];
-               matrixColumnsBuffer_view[ bufferIdx ] = column;
+               matrixColumnsBuffer_view[ bufferIdx ] = columnIndex;
+               matrixValuesBuffer_view[ bufferIdx ] = value;
             }
          };
          matrix.forRows( baseRow, lastRow, f1 );
 
-         //std::cerr << "Values = " << matrixValuesBuffer_view << std::endl;
-         //std::cerr << "Columns = " << matrixColumnsBuffer_view << std::endl;
          ////
          // Copy the source matrix buffer to this matrix buffer
          thisValuesBuffer_view = matrixValuesBuffer_view;
@@ -671,13 +701,13 @@ operator=( const SparseMatrix< Real2, Device2, Index2, MatrixType2, Segments2, R
 
          ////
          // Copy matrix elements from the buffer to the matrix
-         auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx ) mutable {
+         auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType& columnIndex, RealType& value  ) mutable {
             const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx;
             const IndexType column = thisColumnsBuffer_view[ bufferIdx ];
             if( column != paddingIndex )
             {
-               this_columns_view[ globalIdx ] = column;
-               this_values_view[ globalIdx ] = thisValuesBuffer_view[ bufferIdx ];
+               columnIndex = column;
+               value = thisValuesBuffer_view[ bufferIdx ];
             }
          };
          this->forRows( baseRow, lastRow, f2 );
diff --git a/src/TNL/Matrices/SparseMatrixView.h b/src/TNL/Matrices/SparseMatrixView.h
index 29ea99f75..1f587acf3 100644
--- a/src/TNL/Matrices/SparseMatrixView.h
+++ b/src/TNL/Matrices/SparseMatrixView.h
@@ -128,9 +128,15 @@ class SparseMatrixView : public MatrixView< Real, Device, Index >
       template< typename Function >
       void forRows( IndexType first, IndexType last, Function& function ) const;
 
+      template< typename Function >
+      void forRows( IndexType first, IndexType last, Function& function );
+
       template< typename Function >
       void forAllRows( Function& function ) const;
 
+      template< typename Function >
+      void forAllRows( Function& function );
+
       template< typename Vector1, typename Vector2 >
       bool performSORIteration( const Vector1& b,
                                 const IndexType row,
diff --git a/src/TNL/Matrices/SparseMatrixView.hpp b/src/TNL/Matrices/SparseMatrixView.hpp
index 408222373..df136388e 100644
--- a/src/TNL/Matrices/SparseMatrixView.hpp
+++ b/src/TNL/Matrices/SparseMatrixView.hpp
@@ -402,7 +402,26 @@ forRows( IndexType first, IndexType last, Function& function ) const
       return true;
    };
    this->segments.forSegments( first, last, f );
+}
 
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+   template< typename Function >
+void
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+forRows( IndexType first, IndexType last, Function& function )
+{
+   auto columns_view = this->columnIndexes.getView();
+   auto values_view = this->values.getView();
+   const IndexType paddingIndex_ = this->getPaddingIndex();
+   auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx ) mutable -> bool {
+      function( rowIdx, localIdx, globalIdx );
+      return true;
+   };
+   this->segments.forSegments( first, last, f );
 }
 
 template< typename Real,
@@ -418,6 +437,19 @@ forAllRows( Function& function ) const
    this->forRows( 0, this->getRows(), function );
 }
 
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+   template< typename Function >
+void
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+forAllRows( Function& function )
+{
+   this->forRows( 0, this->getRows(), function );
+}
+
 /*template< typename Real,
           template< typename, typename > class SegmentsView,
           typename Device,
-- 
GitLab


From 9895f08102c06c1ca46be39240a55258742349b5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Fri, 3 Jan 2020 11:03:10 +0100
Subject: [PATCH 053/179] Fixed legacy sparse matrix formats unit tests.

---
 src/TNL/Matrices/{Dense_impl.h => Dense.hpp} |  0
 src/UnitTests/Matrices/Legacy/CMakeLists.txt | 14 +++++++-------
 2 files changed, 7 insertions(+), 7 deletions(-)
 rename src/TNL/Matrices/{Dense_impl.h => Dense.hpp} (100%)

diff --git a/src/TNL/Matrices/Dense_impl.h b/src/TNL/Matrices/Dense.hpp
similarity index 100%
rename from src/TNL/Matrices/Dense_impl.h
rename to src/TNL/Matrices/Dense.hpp
diff --git a/src/UnitTests/Matrices/Legacy/CMakeLists.txt b/src/UnitTests/Matrices/Legacy/CMakeLists.txt
index 9cdfe2784..4320b6c1f 100644
--- a/src/UnitTests/Matrices/Legacy/CMakeLists.txt
+++ b/src/UnitTests/Matrices/Legacy/CMakeLists.txt
@@ -59,14 +59,14 @@ ELSE(  BUILD_CUDA )
 ENDIF( BUILD_CUDA )
 
 
-ADD_TEST( Legacy_SparseMatrixCopyTest ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixCopyTest${CMAKE_EXECUTABLE_SUFFIX} )
-ADD_TEST( Legacy_SparseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} )
+ADD_TEST( Legacy_SparseMatrixCopyTest ${EXECUTABLE_OUTPUT_PATH}/Legacy_SparseMatrixCopyTest${CMAKE_EXECUTABLE_SUFFIX} )
+ADD_TEST( Legacy_SparseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/Legacy_SparseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} )
 # TODO: Uncomment the following when AdEllpack works
 #ADD_TEST( SparseMatrixTest_AdEllpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_AdEllpack${CMAKE_EXECUTABLE_SUFFIX} )
-ADD_TEST( Legacy_SparseMatrixTest_BiEllpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_BiEllpack${CMAKE_EXECUTABLE_SUFFIX} )
-ADD_TEST( Legacy_SparseMatrixTest_ChunkedEllpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_ChunkedEllpack${CMAKE_EXECUTABLE_SUFFIX} )
-ADD_TEST( Legacy_SparseMatrixTest_CSR ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_CSR${CMAKE_EXECUTABLE_SUFFIX} )
-ADD_TEST( Legacy_SparseMatrixTest_Ellpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_Ellpack${CMAKE_EXECUTABLE_SUFFIX} )
-ADD_TEST( Legacy_SparseMatrixTest_SlicedEllpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_SlicedEllpack${CMAKE_EXECUTABLE_SUFFIX} )
+ADD_TEST( Legacy_SparseMatrixTest_BiEllpack ${EXECUTABLE_OUTPUT_PATH}/Legacy_SparseMatrixTest_BiEllpack${CMAKE_EXECUTABLE_SUFFIX} )
+ADD_TEST( Legacy_SparseMatrixTest_ChunkedEllpack ${EXECUTABLE_OUTPUT_PATH}/Legacy_SparseMatrixTest_ChunkedEllpack${CMAKE_EXECUTABLE_SUFFIX} )
+ADD_TEST( Legacy_SparseMatrixTest_CSR ${EXECUTABLE_OUTPUT_PATH}/Legacy_SparseMatrixTest_CSR${CMAKE_EXECUTABLE_SUFFIX} )
+ADD_TEST( Legacy_SparseMatrixTest_Ellpack ${EXECUTABLE_OUTPUT_PATH}/Legacy_SparseMatrixTest_Ellpack${CMAKE_EXECUTABLE_SUFFIX} )
+ADD_TEST( Legacy_SparseMatrixTest_SlicedEllpack ${EXECUTABLE_OUTPUT_PATH}/Legacy_SparseMatrixTest_SlicedEllpack${CMAKE_EXECUTABLE_SUFFIX} )
 # TODO: DenseMatrixTest is not finished
 #ADD_TEST( DenseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/DenseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} )
-- 
GitLab


From bc185a64f9cdfd383156a7ed06ab19789c1dfab1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Fri, 3 Jan 2020 11:03:54 +0100
Subject: [PATCH 054/179] Added template parameters to dense matrix:
 RowMajorOrder and RealAllocator.

---
 src/TNL/Matrices/Dense.h           |  37 ++--
 src/TNL/Matrices/Dense.hpp         | 284 +++++++++++++++++++----------
 src/TNL/Matrices/DistributedSpMV.h |   3 +-
 src/TNL/Matrices/Matrix.h          |   4 +-
 src/TNL/Matrices/Matrix.hpp        |   7 +-
 src/TNL/Matrices/SparseMatrix.h    |  10 +-
 src/TNL/Matrices/SparseMatrix.hpp  |   4 +-
 7 files changed, 215 insertions(+), 134 deletions(-)

diff --git a/src/TNL/Matrices/Dense.h b/src/TNL/Matrices/Dense.h
index c46992723..3fc6d8908 100644
--- a/src/TNL/Matrices/Dense.h
+++ b/src/TNL/Matrices/Dense.h
@@ -10,6 +10,7 @@
 
 #pragma once
 
+#include <TNL/Allocators/Default.h>
 #include <TNL/Devices/Host.h>
 #include <TNL/Matrices/Matrix.h>
 #include <TNL/Matrices/DenseRow.h>
@@ -23,7 +24,9 @@ class DenseDeviceDependentCode;
 
 template< typename Real = double,
           typename Device = Devices::Host,
-          typename Index = int >
+          typename Index = int,
+          bool RowMajorOrder = std::is_same< Device, Devices::Host >::value,
+          typename RealAllocator = typename Allocators::Default< Device >::template Allocator< Real > >
 class Dense : public Matrix< Real, Device, Index >
 {
 private:
@@ -32,17 +35,17 @@ private:
    using Enabler = std::enable_if< ! std::is_same< Device2, Device >::value >;
 
    // friend class will be needed for templated assignment operators
-   template< typename Real2, typename Device2, typename Index2 >
-   friend class Dense;
+   //template< typename Real2, typename Device2, typename Index2 >
+   //friend class Dense;
 
 public:
-   typedef Real RealType;
-   typedef Device DeviceType;
-   typedef Index IndexType;
-   typedef typename Matrix< Real, Device, Index >::CompressedRowLengthsVector CompressedRowLengthsVector;
-   typedef typename Matrix< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView;
-   typedef Matrix< Real, Device, Index > BaseType;
-   typedef DenseRow< Real, Index > MatrixRow;
+   using RealType = Real;
+   using DeviceType = Device;
+   using IndexType = Index;
+   using CompressedRowLengthsVector = typename Matrix< Real, Device, Index >::CompressedRowLengthsVector;
+   using ConstCompressedRowLengthsVectorView = typename Matrix< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView;
+   using BaseType = Matrix< Real, Device, Index >;
+   using MatrixRow = DenseRow< Real, Index >;
 
    template< typename _Real = Real,
              typename _Device = Device,
@@ -58,23 +61,17 @@ public:
    void setDimensions( const IndexType rows,
                        const IndexType columns );
 
-   template< typename Real2, typename Device2, typename Index2 >
-   void setLike( const Dense< Real2, Device2, Index2 >& matrix );
+   template< typename Matrix >
+   void setLike( const Matrix& matrix );
 
    /****
     * This method is only for the compatibility with the sparse matrices.
     */
    void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths );
 
-   /****
-    * Returns maximal number of the nonzero matrix elements that can be stored
-    * in a given row.
-    */
+   [[deprecated]]
    IndexType getRowLength( const IndexType row ) const;
 
-   __cuda_callable__
-   IndexType getRowLengthFast( const IndexType row ) const;
-
    IndexType getMaxRowLength() const;
 
    IndexType getNumberOfMatrixElements() const;
@@ -220,4 +217,4 @@ protected:
 } // namespace Matrices
 } // namespace TNL
 
-#include <TNL/Matrices/Dense_impl.h>
+#include <TNL/Matrices/Dense.hpp>
diff --git a/src/TNL/Matrices/Dense.hpp b/src/TNL/Matrices/Dense.hpp
index 246bd09ed..70e5018dd 100644
--- a/src/TNL/Matrices/Dense.hpp
+++ b/src/TNL/Matrices/Dense.hpp
@@ -19,15 +19,19 @@ namespace Matrices {
 
 template< typename Real,
           typename Device,
-          typename Index >
-Dense< Real, Device, Index >::Dense()
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::Dense()
 {
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-String Dense< Real, Device, Index >::getSerializationType()
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+String Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getSerializationType()
 {
    return String( "Matrices::Dense< " ) +
           getType< RealType >() + ", " +
@@ -37,16 +41,20 @@ String Dense< Real, Device, Index >::getSerializationType()
 
 template< typename Real,
           typename Device,
-          typename Index >
-String Dense< Real, Device, Index >::getSerializationTypeVirtual() const
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+String Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getSerializationTypeVirtual() const
 {
    return this->getSerializationType();
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-void Dense< Real, Device, Index >::setDimensions( const IndexType rows,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::setDimensions( const IndexType rows,
                                                   const IndexType columns )
 {
    Matrix< Real, Device, Index >::setDimensions( rows, columns );
@@ -56,59 +64,71 @@ void Dense< Real, Device, Index >::setDimensions( const IndexType rows,
 
 template< typename Real,
           typename Device,
-          typename Index >
-   template< typename Real2,
-             typename Device2,
-             typename Index2 >
-void Dense< Real, Device, Index >::setLike( const Dense< Real2, Device2, Index2 >& matrix )
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+   template< typename Matrix_ >
+void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::setLike( const Matrix_& matrix )
 {
-   this->setDimensions( matrix.getRows(), matrix.getColumns() );
+   Matrix< Real, Device, Index, RealAllocator >::setLike( matrix );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-void Dense< Real, Device, Index >::setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths )
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths )
 {
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-Index Dense< Real, Device, Index >::getRowLength( const IndexType row ) const
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+Index Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getRowLength( const IndexType row ) const
 {
    return this->getColumns();
 }
 
-template< typename Real,
+/*template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
 __cuda_callable__
-Index Dense< Real, Device, Index >::getRowLengthFast( const IndexType row ) const
+Index Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getRowLengthFast( const IndexType row ) const
 {
    return this->getColumns();
-}
+}*/
 
 template< typename Real,
           typename Device,
-          typename Index >
-Index Dense< Real, Device, Index >::getMaxRowLength() const
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+Index Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getMaxRowLength() const
 {
    return this->getColumns();
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-Index Dense< Real, Device, Index >::getNumberOfMatrixElements() const
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+Index Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getNumberOfMatrixElements() const
 {
    return this->getRows() * this->getColumns();
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-Index Dense< Real, Device, Index >::getNumberOfNonzeroMatrixElements() const
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+Index Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getNumberOfNonzeroMatrixElements() const
 {
    IndexType nonzeroElements( 0 );
    for( IndexType row = 0; row < this->getRows(); row++ )
@@ -120,8 +140,10 @@ Index Dense< Real, Device, Index >::getNumberOfNonzeroMatrixElements() const
 
 template< typename Real,
           typename Device,
-          typename Index >
-void Dense< Real, Device, Index >::reset()
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::reset()
 {
    Matrix< Real, Device, Index >::reset();
    this->values.reset();
@@ -129,8 +151,10 @@ void Dense< Real, Device, Index >::reset()
 
 template< typename Real,
           typename Device,
-          typename Index >
-void Dense< Real, Device, Index >::setValue( const Real& value )
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::setValue( const Real& value )
 {
    this->values.setValue( value );
 }
@@ -138,9 +162,11 @@ void Dense< Real, Device, Index >::setValue( const Real& value )
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
 __cuda_callable__
-Real& Dense< Real, Device, Index >::operator()( const IndexType row,
+Real& Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::operator()( const IndexType row,
                                                 const IndexType column )
 {
    TNL_ASSERT_GE( row, 0, "Row index must be non-negative." );
@@ -153,9 +179,11 @@ Real& Dense< Real, Device, Index >::operator()( const IndexType row,
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
 __cuda_callable__
-const Real& Dense< Real, Device, Index >::operator()( const IndexType row,
+const Real& Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::operator()( const IndexType row,
                                                       const IndexType column ) const
 {
    TNL_ASSERT_GE( row, 0, "Row index must be non-negative." );
@@ -169,9 +197,11 @@ const Real& Dense< Real, Device, Index >::operator()( const IndexType row,
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
 __cuda_callable__
-bool Dense< Real, Device, Index >::setElementFast( const IndexType row,
+bool Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::setElementFast( const IndexType row,
                                                             const IndexType column,
                                                             const RealType& value )
 {
@@ -186,8 +216,10 @@ bool Dense< Real, Device, Index >::setElementFast( const IndexType row,
 
 template< typename Real,
           typename Device,
-          typename Index >
-bool Dense< Real, Device, Index >::setElement( const IndexType row,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+bool Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::setElement( const IndexType row,
                                                const IndexType column,
                                                const RealType& value )
 {
@@ -198,9 +230,11 @@ bool Dense< Real, Device, Index >::setElement( const IndexType row,
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
 __cuda_callable__
-bool Dense< Real, Device, Index >::addElementFast( const IndexType row,
+bool Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::addElementFast( const IndexType row,
                                                    const IndexType column,
                                                    const RealType& value,
                                                    const RealType& thisElementMultiplicator )
@@ -221,8 +255,10 @@ bool Dense< Real, Device, Index >::addElementFast( const IndexType row,
 
 template< typename Real,
           typename Device,
-          typename Index >
-bool Dense< Real, Device, Index >::addElement( const IndexType row,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+bool Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::addElement( const IndexType row,
                                                         const IndexType column,
                                                         const RealType& value,
                                                         const RealType& thisElementMultiplicator )
@@ -240,9 +276,11 @@ bool Dense< Real, Device, Index >::addElement( const IndexType row,
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
 __cuda_callable__
-bool Dense< Real, Device, Index >::setRowFast( const IndexType row,
+bool Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::setRowFast( const IndexType row,
                                                         const IndexType* columns,
                                                         const RealType* values,
                                                         const IndexType elements )
@@ -257,8 +295,10 @@ bool Dense< Real, Device, Index >::setRowFast( const IndexType row,
 
 template< typename Real,
           typename Device,
-          typename Index >
-bool Dense< Real, Device, Index >::setRow( const IndexType row,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+bool Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::setRow( const IndexType row,
                                                     const IndexType* columns,
                                                     const RealType* values,
                                                     const IndexType elements )
@@ -273,9 +313,11 @@ bool Dense< Real, Device, Index >::setRow( const IndexType row,
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
 __cuda_callable__
-bool Dense< Real, Device, Index >::addRowFast( const IndexType row,
+bool Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::addRowFast( const IndexType row,
                                                         const IndexType* columns,
                                                         const RealType* values,
                                                         const IndexType elements,
@@ -292,8 +334,10 @@ bool Dense< Real, Device, Index >::addRowFast( const IndexType row,
 
 template< typename Real,
           typename Device,
-          typename Index >
-bool Dense< Real, Device, Index >::addRow( const IndexType row,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+bool Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::addRow( const IndexType row,
                                                     const IndexType* columns,
                                                     const RealType* values,
                                                     const IndexType elements,
@@ -311,9 +355,11 @@ bool Dense< Real, Device, Index >::addRow( const IndexType row,
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
 __cuda_callable__
-const Real& Dense< Real, Device, Index >::getElementFast( const IndexType row,
+const Real& Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getElementFast( const IndexType row,
                                                           const IndexType column ) const
 {
    TNL_ASSERT_GE( row, 0, "Row index must be non-negative." );
@@ -326,8 +372,10 @@ const Real& Dense< Real, Device, Index >::getElementFast( const IndexType row,
 
 template< typename Real,
           typename Device,
-          typename Index >
-Real Dense< Real, Device, Index >::getElement( const IndexType row,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+Real Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getElement( const IndexType row,
                                                         const IndexType column ) const
 {
    return this->values.getElement( this->getElementIndex( row, column ) );
@@ -335,9 +383,11 @@ Real Dense< Real, Device, Index >::getElement( const IndexType row,
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
 __cuda_callable__
-void Dense< Real, Device, Index >::getRowFast( const IndexType row,
+void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getRowFast( const IndexType row,
                                                         IndexType* columns,
                                                         RealType* values ) const
 {
@@ -350,10 +400,12 @@ void Dense< Real, Device, Index >::getRowFast( const IndexType row,
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
 __cuda_callable__
-typename Dense< Real, Device, Index >::MatrixRow
-Dense< Real, Device, Index >::
+typename Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::MatrixRow
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
 getRow( const IndexType rowIndex )
 {
    if( std::is_same< Device, Devices::Host >::value )
@@ -368,10 +420,12 @@ getRow( const IndexType rowIndex )
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
 __cuda_callable__
-const typename Dense< Real, Device, Index >::MatrixRow
-Dense< Real, Device, Index >::
+const typename Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::MatrixRow
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
 getRow( const IndexType rowIndex ) const
 {
    if( std::is_same< Device, Devices::Host >::value )
@@ -386,10 +440,12 @@ getRow( const IndexType rowIndex ) const
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
    template< typename Vector >
 __cuda_callable__
-typename Vector::RealType Dense< Real, Device, Index >::rowVectorProduct( const IndexType row,
+typename Vector::RealType Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::rowVectorProduct( const IndexType row,
                                                                                    const Vector& vector ) const
 {
    RealType sum( 0.0 );
@@ -400,10 +456,12 @@ typename Vector::RealType Dense< Real, Device, Index >::rowVectorProduct( const
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
    template< typename InVector,
              typename OutVector >
-void Dense< Real, Device, Index >::vectorProduct( const InVector& inVector,
+void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::vectorProduct( const InVector& inVector,
                                                            OutVector& outVector ) const
 {
    TNL_ASSERT( this->getColumns() == inVector.getSize(),
@@ -418,9 +476,11 @@ void Dense< Real, Device, Index >::vectorProduct( const InVector& inVector,
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
    template< typename Matrix >
-void Dense< Real, Device, Index >::addMatrix( const Matrix& matrix,
+void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::addMatrix( const Matrix& matrix,
                                               const RealType& matrixMultiplicator,
                                               const RealType& thisMatrixMultiplicator )
 {
@@ -440,6 +500,8 @@ void Dense< Real, Device, Index >::addMatrix( const Matrix& matrix,
 #ifdef HAVE_CUDA
 template< typename Real,
           typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
           typename Matrix1,
           typename Matrix2,
           int tileDim,
@@ -538,9 +600,11 @@ __global__ void DenseMatrixProductKernel( Dense< Real, Devices::Cuda, Index >* r
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
    template< typename Matrix1, typename Matrix2, int tileDim >
-void Dense< Real, Device, Index >::getMatrixProduct( const Matrix1& matrix1,
+void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getMatrixProduct( const Matrix1& matrix1,
                                                               const Matrix2& matrix2,
                                                               const RealType& matrix1Multiplicator,
                                                               const RealType& matrix2Multiplicator )
@@ -628,6 +692,8 @@ void Dense< Real, Device, Index >::getMatrixProduct( const Matrix1& matrix1,
 template< typename Real,
           typename Index,
           typename Matrix,
+          bool RowMajorOrder,
+          typename RealAllocator,
           int tileDim,
           int tileRowBlockSize >
 __global__ void DenseTranspositionAlignedKernel( Dense< Real, Devices::Cuda, Index >* resultMatrix,
@@ -696,6 +762,8 @@ __global__ void DenseTranspositionAlignedKernel( Dense< Real, Devices::Cuda, Ind
 
 template< typename Real,
           typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
           typename Matrix,
           int tileDim,
           int tileRowBlockSize >
@@ -776,9 +844,11 @@ __global__ void DenseTranspositionNonAlignedKernel( Dense< Real, Devices::Cuda,
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
    template< typename Matrix, int tileDim >
-void Dense< Real, Device, Index >::getTransposition( const Matrix& matrix,
+void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getTransposition( const Matrix& matrix,
                                                               const RealType& matrixMultiplicator )
 {
    TNL_ASSERT( this->getColumns() == matrix.getRows() &&
@@ -867,9 +937,11 @@ void Dense< Real, Device, Index >::getTransposition( const Matrix& matrix,
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
    template< typename Vector1, typename Vector2 >
-void Dense< Real, Device, Index >::performSORIteration( const Vector1& b,
+void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::performSORIteration( const Vector1& b,
                                                         const IndexType row,
                                                         Vector2& x,
                                                         const RealType& omega ) const
@@ -889,9 +961,11 @@ void Dense< Real, Device, Index >::performSORIteration( const Vector1& b,
 // copy assignment
 template< typename Real,
           typename Device,
-          typename Index >
-Dense< Real, Device, Index >&
-Dense< Real, Device, Index >::operator=( const Dense& matrix )
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >&
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::operator=( const Dense& matrix )
 {
    this->setLike( matrix );
    this->values = matrix.values;
@@ -901,10 +975,12 @@ Dense< Real, Device, Index >::operator=( const Dense& matrix )
 // cross-device copy assignment
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
    template< typename Real2, typename Device2, typename Index2, typename >
-Dense< Real, Device, Index >&
-Dense< Real, Device, Index >::operator=( const Dense< Real2, Device2, Index2 >& matrix )
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >&
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::operator=( const Dense< Real2, Device2, Index2 >& matrix )
 {
    static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Cuda >::value,
                   "unknown device" );
@@ -919,40 +995,50 @@ Dense< Real, Device, Index >::operator=( const Dense< Real2, Device2, Index2 >&
 
 template< typename Real,
           typename Device,
-          typename Index >
-void Dense< Real, Device, Index >::save( const String& fileName ) const
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::save( const String& fileName ) const
 {
    Object::save( fileName );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-void Dense< Real, Device, Index >::load( const String& fileName )
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::load( const String& fileName )
 {
    Object::load( fileName );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-void Dense< Real, Device, Index >::save( File& file ) const
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::save( File& file ) const
 {
    Matrix< Real, Device, Index >::save( file );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-void Dense< Real, Device, Index >::load( File& file )
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::load( File& file )
 {
    Matrix< Real, Device, Index >::load( file );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-void Dense< Real, Device, Index >::print( std::ostream& str ) const
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::print( std::ostream& str ) const
 {
    for( IndexType row = 0; row < this->getRows(); row++ )
    {
@@ -965,9 +1051,11 @@ void Dense< Real, Device, Index >::print( std::ostream& str ) const
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
 __cuda_callable__
-Index Dense< Real, Device, Index >::getElementIndex( const IndexType row,
+Index Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getElementIndex( const IndexType row,
                                                               const IndexType column ) const
 {
    TNL_ASSERT( ( std::is_same< Device, Devices::Host >::value ||
@@ -988,9 +1076,11 @@ class DenseDeviceDependentCode< Devices::Host >
 
       template< typename Real,
                 typename Index,
+                bool RowMajorOrder,
+                typename RealAllocator,
                 typename InVector,
                 typename OutVector >
-      static void vectorProduct( const Dense< Real, Device, Index >& matrix,
+      static void vectorProduct( const Dense< Real, Device, Index, RowMajorOrder, RealAllocator >& matrix,
                                  const InVector& inVector,
                                  OutVector& outVector )
       {
@@ -1011,9 +1101,11 @@ class DenseDeviceDependentCode< Devices::Cuda >
 
       template< typename Real,
                 typename Index,
+                bool RowMajorOrder,
+                typename RealAllocator,
                 typename InVector,
                 typename OutVector >
-      static void vectorProduct( const Dense< Real, Device, Index >& matrix,
+      static void vectorProduct( const Dense< Real, Device, Index, RowMajorOrder, RealAllocator >& matrix,
                                  const InVector& inVector,
                                  OutVector& outVector )
       {
diff --git a/src/TNL/Matrices/DistributedSpMV.h b/src/TNL/Matrices/DistributedSpMV.h
index b2abd13c5..e5b2e9008 100644
--- a/src/TNL/Matrices/DistributedSpMV.h
+++ b/src/TNL/Matrices/DistributedSpMV.h
@@ -19,6 +19,7 @@
 #include <vector>
 #include <utility>  // std::pair
 #include <limits>   // std::numeric_limits
+#include <TNL/Allocators/Host.h>
 #include <TNL/Matrices/Dense.h>
 #include <TNL/Containers/Vector.h>
 #include <TNL/Containers/VectorView.h>
@@ -235,7 +236,7 @@ public:
 
 protected:
    // communication pattern
-   Matrices::Dense< IndexType, Devices::Host, int > commPatternStarts, commPatternEnds;
+   Matrices::Dense< IndexType, Devices::Host, int, true, Allocators::Host< IndexType > > commPatternStarts, commPatternEnds;
 
    // span of rows with only block-diagonal entries
    std::pair< IndexType, IndexType > localOnlySpan;
diff --git a/src/TNL/Matrices/Matrix.h b/src/TNL/Matrices/Matrix.h
index 8fc8cb5f2..a9b458d7b 100644
--- a/src/TNL/Matrices/Matrix.h
+++ b/src/TNL/Matrices/Matrix.h
@@ -61,8 +61,8 @@ public:
 
    virtual void getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const;
 
-   template< typename Real2, typename Device2, typename Index2, typename RealAllocator2 >
-   void setLike( const Matrix< Real2, Device2, Index2, RealAllocator2 >& matrix );
+   template< typename Matrix_ >
+   void setLike( const Matrix_& matrix );
 
    IndexType getNumberOfMatrixElements() const;
 
diff --git a/src/TNL/Matrices/Matrix.hpp b/src/TNL/Matrices/Matrix.hpp
index 9fc5ea620..29226cb00 100644
--- a/src/TNL/Matrices/Matrix.hpp
+++ b/src/TNL/Matrices/Matrix.hpp
@@ -81,11 +81,8 @@ template< typename Real,
           typename Device,
           typename Index,
           typename RealAllocator >
-   template< typename Real2,
-             typename Device2,
-             typename Index2,
-             typename RealAllocator2 >
-void Matrix< Real, Device, Index, RealAllocator >::setLike( const Matrix< Real2, Device2, Index2, RealAllocator2 >& matrix )
+   template< typename Matrix_ >
+void Matrix< Real, Device, Index, RealAllocator >::setLike( const Matrix_& matrix )
 {
    setDimensions( matrix.getRows(), matrix.getColumns() );
 }
diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h
index 0e3484c10..8f96af169 100644
--- a/src/TNL/Matrices/SparseMatrix.h
+++ b/src/TNL/Matrices/SparseMatrix.h
@@ -94,14 +94,8 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
       [[deprecated]]
       virtual IndexType getRowLength( const IndexType row ) const {};
 
-      template< typename Real_,
-                typename Device_,
-                typename Index_,
-                typename MatrixType_,
-                template< typename, typename, typename > class Segments_,
-                typename RealAllocator_,
-                typename IndexAllocator_ >
-      void setLike( const SparseMatrix< Real_, Device_, Index_, MatrixType_, Segments_, RealAllocator_, IndexAllocator_ >& matrix );
+      template< typename Matrix >
+      void setLike( const Matrix& matrix );
 
       IndexType getNumberOfNonzeroMatrixElements() const;
 
diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp
index 72184738b..6c0655ce0 100644
--- a/src/TNL/Matrices/SparseMatrix.hpp
+++ b/src/TNL/Matrices/SparseMatrix.hpp
@@ -202,10 +202,10 @@ template< typename Real,
           template< typename, typename, typename > class Segments,
           typename RealAllocator,
           typename IndexAllocator >
-   template< typename Real2, typename Device2, typename Index2, typename MatrixType2, template< typename, typename, typename > class Segments2, typename RealAllocator2, typename IndexAllocator2 >
+   template< typename Matrix_ >
 void
 SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
-setLike( const SparseMatrix< Real2, Device2, Index2, MatrixType2, Segments2, RealAllocator2, IndexAllocator2 >& matrix )
+setLike( const Matrix_& matrix )
 {
    Matrix< Real, Device, Index, RealAllocator >::setLike( matrix );
 }
-- 
GitLab


From 25d43763f10fe3f6f19851daa152f035e2a08439 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Fri, 3 Jan 2020 11:07:16 +0100
Subject: [PATCH 055/179] Reimplementation of
 Matrices::Dense::getNumberOfNonzeroMatrixElements.

---
 src/TNL/Matrices/Dense.hpp | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/src/TNL/Matrices/Dense.hpp b/src/TNL/Matrices/Dense.hpp
index 70e5018dd..7e6dec9ce 100644
--- a/src/TNL/Matrices/Dense.hpp
+++ b/src/TNL/Matrices/Dense.hpp
@@ -130,12 +130,11 @@ template< typename Real,
           typename RealAllocator >
 Index Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getNumberOfNonzeroMatrixElements() const
 {
-   IndexType nonzeroElements( 0 );
-   for( IndexType row = 0; row < this->getRows(); row++ )
-      for( IndexType column = 0; column < this->getColumns(); column++ )
-         if( this->getElement( row, column ) != 0 )
-            nonzeroElements++;
-   return nonzeroElements;
+   const auto values_view = this->values.getConstView();
+   auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType {
+      return ( values_view[ i ] != 0.0 );
+   };
+   return Algorithms::Reduction< DeviceType >::reduce( this->values.getSize(), std::plus<>{}, fetch, 0 );
 }
 
 template< typename Real,
-- 
GitLab


From 8758a8e6a550ee9c24dcb2d8258aac6d0adcea5c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Fri, 3 Jan 2020 11:19:12 +0100
Subject: [PATCH 056/179] Deleting of deprecated methods in dense matrix.

---
 src/TNL/Matrices/Dense.h                 |  48 -------
 src/TNL/Matrices/Dense.hpp               | 173 +----------------------
 src/TNL/Matrices/DistributedSpMV.h       |   4 +-
 src/UnitTests/Matrices/DenseMatrixTest.h |  10 +-
 4 files changed, 14 insertions(+), 221 deletions(-)

diff --git a/src/TNL/Matrices/Dense.h b/src/TNL/Matrices/Dense.h
index 3fc6d8908..a2c6a7eda 100644
--- a/src/TNL/Matrices/Dense.h
+++ b/src/TNL/Matrices/Dense.h
@@ -90,66 +90,18 @@ public:
    const Real& operator()( const IndexType row,
                            const IndexType column ) const;
 
-   __cuda_callable__
-   bool setElementFast( const IndexType row,
-                        const IndexType column,
-                        const RealType& value );
-
    bool setElement( const IndexType row,
                     const IndexType column,
                     const RealType& value );
 
-   __cuda_callable__
-   bool addElementFast( const IndexType row,
-                        const IndexType column,
-                        const RealType& value,
-                        const RealType& thisElementMultiplicator = 1.0 );
-
    bool addElement( const IndexType row,
                     const IndexType column,
                     const RealType& value,
                     const RealType& thisElementMultiplicator = 1.0 );
 
-   __cuda_callable__
-   bool setRowFast( const IndexType row,
-                    const IndexType* columns,
-                    const RealType* values,
-                    const IndexType elements );
-
-   bool setRow( const IndexType row,
-                const IndexType* columns,
-                const RealType* values,
-                const IndexType elements );
-
-   __cuda_callable__
-   bool addRowFast( const IndexType row,
-                    const IndexType* columns,
-                    const RealType* values,
-                    const IndexType elements,
-                    const RealType& thisRowMultiplicator = 1.0 );
-
-   bool addRow( const IndexType row,
-                const IndexType* columns,
-                const RealType* values,
-                const IndexType elements,
-                const RealType& thisRowMultiplicator = 1.0 );
-
-   __cuda_callable__
-   const Real& getElementFast( const IndexType row,
-                               const IndexType column ) const;
-
    Real getElement( const IndexType row,
                     const IndexType column ) const;
 
-   __cuda_callable__
-   void getRowFast( const IndexType row,
-                    IndexType* columns,
-                    RealType* values ) const;
-
-   /*void getRow( const IndexType row,
-                IndexType* columns,
-                RealType* values ) const;*/
-
    __cuda_callable__
    MatrixRow getRow( const IndexType rowIndex );
 
diff --git a/src/TNL/Matrices/Dense.hpp b/src/TNL/Matrices/Dense.hpp
index 7e6dec9ce..190052390 100644
--- a/src/TNL/Matrices/Dense.hpp
+++ b/src/TNL/Matrices/Dense.hpp
@@ -145,7 +145,6 @@ template< typename Real,
 void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::reset()
 {
    Matrix< Real, Device, Index >::reset();
-   this->values.reset();
 }
 
 template< typename Real,
@@ -155,10 +154,9 @@ template< typename Real,
           typename RealAllocator >
 void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::setValue( const Real& value )
 {
-   this->values.setValue( value );
+   this->values = value;
 }
 
-
 template< typename Real,
           typename Device,
           typename Index,
@@ -193,26 +191,6 @@ const Real& Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::operator
    return this->values.operator[]( this->getElementIndex( row, column ) );
 }
 
-
-template< typename Real,
-          typename Device,
-          typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
-__cuda_callable__
-bool Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::setElementFast( const IndexType row,
-                                                            const IndexType column,
-                                                            const RealType& value )
-{
-   TNL_ASSERT_GE( row, 0, "Row index must be non-negative." );
-   TNL_ASSERT_LT( row, this->getRows(), "Row index is out of bounds." );
-   TNL_ASSERT_GE( column, 0, "Column index must be non-negative." );
-   TNL_ASSERT_LT( column, this->getColumns(), "Column index is out of bounds." );
-
-   this->values.operator[]( this->getElementIndex( row, column ) ) = value;
-   return true;
-}
-
 template< typename Real,
           typename Device,
           typename Index,
@@ -226,32 +204,6 @@ bool Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::setElement( con
    return true;
 }
 
-
-template< typename Real,
-          typename Device,
-          typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
-__cuda_callable__
-bool Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::addElementFast( const IndexType row,
-                                                   const IndexType column,
-                                                   const RealType& value,
-                                                   const RealType& thisElementMultiplicator )
-{
-   TNL_ASSERT_GE( row, 0, "Row index must be non-negative." );
-   TNL_ASSERT_LT( row, this->getRows(), "Row index is out of bounds." );
-   TNL_ASSERT_GE( column, 0, "Column index must be non-negative." );
-   TNL_ASSERT_LT( column, this->getColumns(), "Column index is out of bounds." );
-
-   const IndexType elementIndex = this->getElementIndex( row, column );
-   if( thisElementMultiplicator == 1.0 )
-      this->values.operator[]( elementIndex ) += value;
-   else
-      this->values.operator[]( elementIndex ) =
-         thisElementMultiplicator * this->values.operator[]( elementIndex ) + value;
-   return true;
-}
-
 template< typename Real,
           typename Device,
           typename Index,
@@ -272,103 +224,6 @@ bool Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::addElement( con
    return true;
 }
 
-
-template< typename Real,
-          typename Device,
-          typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
-__cuda_callable__
-bool Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::setRowFast( const IndexType row,
-                                                        const IndexType* columns,
-                                                        const RealType* values,
-                                                        const IndexType elements )
-{
-   TNL_ASSERT( elements <= this->getColumns(),
-            std::cerr << " elements = " << elements
-                 << " this->columns = " << this->getColumns() );
-   for( IndexType i = 0; i < elements; i++ )
-      this->setElementFast( row, columns[ i ], values[ i ] );
-   return true;
-}
-
-template< typename Real,
-          typename Device,
-          typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
-bool Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::setRow( const IndexType row,
-                                                    const IndexType* columns,
-                                                    const RealType* values,
-                                                    const IndexType elements )
-{
-   TNL_ASSERT( elements <= this->getColumns(),
-            std::cerr << " elements = " << elements
-                 << " this->columns = " << this->getColumns() );
-   for( IndexType i = 0; i < elements; i++ )
-      this->setElement( row, columns[ i ], values[ i ] );
-   return true;
-}
-
-template< typename Real,
-          typename Device,
-          typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
-__cuda_callable__
-bool Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::addRowFast( const IndexType row,
-                                                        const IndexType* columns,
-                                                        const RealType* values,
-                                                        const IndexType elements,
-                                                        const RealType& thisRowMultiplicator )
-{
-   TNL_ASSERT( elements <= this->columns,
-            std::cerr << " elements = " << elements
-                 << " this->columns = " << this->columns );
-   for( IndexType i = 0; i < elements; i++ )
-      this->setElementFast( row, columns[ i ],
-                            thisRowMultiplicator * this->getElementFast( row, columns[ i ] ) + values[ i ] );
-   return true;
-}
-
-template< typename Real,
-          typename Device,
-          typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
-bool Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::addRow( const IndexType row,
-                                                    const IndexType* columns,
-                                                    const RealType* values,
-                                                    const IndexType elements,
-                                                    const RealType& thisRowMultiplicator )
-{
-   TNL_ASSERT( elements <= this->columns,
-            std::cerr << " elements = " << elements
-                 << " this->columns = " << this->columns );
-   for( IndexType i = 0; i < elements; i++ )
-      this->setElement( row, columns[ i ],
-                        thisRowMultiplicator * this->getElement( row, columns[ i ] ) + values[ i ] );
-   return true;
-}
-
-
-template< typename Real,
-          typename Device,
-          typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
-__cuda_callable__
-const Real& Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getElementFast( const IndexType row,
-                                                          const IndexType column ) const
-{
-   TNL_ASSERT_GE( row, 0, "Row index must be non-negative." );
-   TNL_ASSERT_LT( row, this->getRows(), "Row index is out of bounds." );
-   TNL_ASSERT_GE( column, 0, "Column index must be non-negative." );
-   TNL_ASSERT_LT( column, this->getColumns(), "Column index is out of bounds." );
-
-   return this->values.operator[]( this->getElementIndex( row, column ) );
-}
-
 template< typename Real,
           typename Device,
           typename Index,
@@ -380,23 +235,6 @@ Real Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getElement( con
    return this->values.getElement( this->getElementIndex( row, column ) );
 }
 
-template< typename Real,
-          typename Device,
-          typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
-__cuda_callable__
-void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getRowFast( const IndexType row,
-                                                        IndexType* columns,
-                                                        RealType* values ) const
-{
-   for( IndexType i = 0; i < this->getColumns(); i++ )
-   {
-      columns[ i ] = i;
-      values[ i ] = this->getElementFast( row, i );
-   }
-}
-
 template< typename Real,
           typename Device,
           typename Index,
@@ -448,8 +286,9 @@ typename Vector::RealType Dense< Real, Device, Index, RowMajorOrder, RealAllocat
                                                                                    const Vector& vector ) const
 {
    RealType sum( 0.0 );
-   for( IndexType column = 0; column < this->getColumns(); column++ )
-      sum += this->getElementFast( row, column ) * vector[ column ];
+   // TODO: Fix this
+   //for( IndexType column = 0; column < this->getColumns(); column++ )
+   //   sum += this->getElementFast( row, column ) * vector[ column ];
    return sum;
 }
 
@@ -949,9 +788,9 @@ void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::performSORItera
    for( IndexType i = 0; i < this->getColumns(); i++ )
    {
       if( i == row )
-         diagonalValue = this->getElementFast( row, row );
+         diagonalValue = this->getElement( row, row );
       else
-         sum += this->getElementFast( row, i ) * x[ i ];
+         sum += this->getElement( row, i ) * x[ i ];
    }
    x[ row ] = ( 1.0 - omega ) * x[ row ] + omega / diagonalValue * ( b[ row ] - sum );
 }
diff --git a/src/TNL/Matrices/DistributedSpMV.h b/src/TNL/Matrices/DistributedSpMV.h
index e5b2e9008..8460ded4d 100644
--- a/src/TNL/Matrices/DistributedSpMV.h
+++ b/src/TNL/Matrices/DistributedSpMV.h
@@ -125,8 +125,8 @@ public:
       preCommPatternEnds.setLike( commPatternEnds );
       for( int j = 0; j < nproc; j++ )
       for( int i = 0; i < nproc; i++ ) {
-         preCommPatternStarts.setElementFast( j, i, span_starts.getElement( i ) );
-         preCommPatternEnds.setElementFast( j, i, span_ends.getElement( i ) );
+         preCommPatternStarts.setElement( j, i, span_starts.getElement( i ) );
+         preCommPatternEnds.setElement( j, i, span_ends.getElement( i ) );
       }
 
       // assemble the commPattern* matrices
diff --git a/src/UnitTests/Matrices/DenseMatrixTest.h b/src/UnitTests/Matrices/DenseMatrixTest.h
index 8d9e9c727..fc6ea6bd2 100644
--- a/src/UnitTests/Matrices/DenseMatrixTest.h
+++ b/src/UnitTests/Matrices/DenseMatrixTest.h
@@ -537,9 +537,10 @@ void test_SetRow()
     IndexType row = 0;
     IndexType elements = 5;
     
-    m.setRow( row++, colIndexes1, row1, elements );
+    // TODO: Fix this
+    /*m.setRow( row++, colIndexes1, row1, elements );
     m.setRow( row++, colIndexes2, row2, elements );
-    m.setRow( row++, colIndexes3, row3, elements );
+    m.setRow( row++, colIndexes3, row3, elements );*/
     
     EXPECT_EQ( m.getElement( 0, 0 ), 11 );
     EXPECT_EQ( m.getElement( 0, 1 ), 11 );
@@ -654,12 +655,13 @@ void test_AddRow()
     IndexType elements = 5;
     RealType thisRowMultiplicator = 0;
     
-    m.addRow( row++, colIndexes0, row0, elements, thisRowMultiplicator++ );
+    // TODO: Fix this
+    /*m.addRow( row++, colIndexes0, row0, elements, thisRowMultiplicator++ );
     m.addRow( row++, colIndexes1, row1, elements, thisRowMultiplicator++ );
     m.addRow( row++, colIndexes2, row2, elements, thisRowMultiplicator++ );
     m.addRow( row++, colIndexes3, row3, elements, thisRowMultiplicator++ );
     m.addRow( row++, colIndexes4, row4, elements, thisRowMultiplicator++ );
-    m.addRow( row++, colIndexes5, row5, elements, thisRowMultiplicator++ );
+    m.addRow( row++, colIndexes5, row5, elements, thisRowMultiplicator++ );*/
     
     EXPECT_EQ( m.getElement( 0, 0 ),  11 );
     EXPECT_EQ( m.getElement( 0, 1 ),  11 );
-- 
GitLab


From 7218a64dcb5d8a2d6fb616afc2bc66b6ca4bf1a3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Fri, 3 Jan 2020 11:47:01 +0100
Subject: [PATCH 057/179] Replacing CSRSegmentView and EllpackSegment view with
 one general but specialized SegmentView.

---
 src/TNL/Containers/Segments/CSR.h             |  4 +-
 src/TNL/Containers/Segments/CSR.hpp           |  2 +-
 src/TNL/Containers/Segments/CSRSegmentView.h  | 47 -------------------
 src/TNL/Containers/Segments/CSRView.h         |  4 +-
 src/TNL/Containers/Segments/CSRView.hpp       |  2 +-
 src/TNL/Containers/Segments/Ellpack.h         |  4 +-
 src/TNL/Containers/Segments/Ellpack.hpp       |  4 +-
 src/TNL/Containers/Segments/EllpackView.h     |  4 +-
 .../{EllpackSegmentView.h => SegmentView.h}   | 46 ++++++++++++++++--
 src/TNL/Containers/Segments/SlicedEllpack.h   |  4 +-
 src/TNL/Containers/Segments/SlicedEllpack.hpp |  4 +-
 .../Containers/Segments/SlicedEllpackView.h   |  4 +-
 12 files changed, 59 insertions(+), 70 deletions(-)
 delete mode 100644 src/TNL/Containers/Segments/CSRSegmentView.h
 rename src/TNL/Containers/Segments/{EllpackSegmentView.h => SegmentView.h} (51%)

diff --git a/src/TNL/Containers/Segments/CSR.h b/src/TNL/Containers/Segments/CSR.h
index df7cb5686..3645e9f6a 100644
--- a/src/TNL/Containers/Segments/CSR.h
+++ b/src/TNL/Containers/Segments/CSR.h
@@ -14,7 +14,7 @@
 
 #include <TNL/Containers/Vector.h>
 #include <TNL/Containers/Segments/CSRView.h>
-#include <TNL/Containers/Segments/CSRSegmentView.h>
+#include <TNL/Containers/Segments/SegmentView.h>
 
 namespace TNL {
    namespace Containers {
@@ -35,7 +35,7 @@ class CSR
       using ViewTemplate = CSRView< Device_, Index_ >;
       using ViewType = CSRView< Device, Index >;
       using ConstViewType = CSRView< Device, std::add_const_t< Index > >;
-      using SegmentViewType = CSRSegmentView< IndexType >;
+      using SegmentViewType = SegmentView< IndexType, true >;
 
       CSR();
 
diff --git a/src/TNL/Containers/Segments/CSR.hpp b/src/TNL/Containers/Segments/CSR.hpp
index 83da548fc..8b8ddfff5 100644
--- a/src/TNL/Containers/Segments/CSR.hpp
+++ b/src/TNL/Containers/Segments/CSR.hpp
@@ -176,7 +176,7 @@ auto
 CSR< Device, Index, IndexAllocator >::
 getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType
 {
-   return SegmentView( offsets[ segmentIdx ], offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ] );
+   return SegmentViewType( offsets[ segmentIdx ], offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ] );
 }
 
 template< typename Device,
diff --git a/src/TNL/Containers/Segments/CSRSegmentView.h b/src/TNL/Containers/Segments/CSRSegmentView.h
deleted file mode 100644
index 3ab5ef9d2..000000000
--- a/src/TNL/Containers/Segments/CSRSegmentView.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/***************************************************************************
-                          CSRSegmentView.h -  description
-                             -------------------
-    begin                : Dec 28, 2019
-    copyright            : (C) 2019 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#pragma once
-
-namespace TNL {
-   namespace Containers {
-      namespace Segments {
-
-template< typename Index >
-class CSRSegmentView
-{
-   public:
-
-      using IndexType = Index;
-
-      __cuda_callable__
-      CSRSegmentView( const IndexType offset, const IndexType size )
-      : segmentOffset( offset ), segmentSize( size ){};
-
-      __cuda_callable__
-      IndexType getSize() const
-      {
-         return this->segmentSize;
-      };
-
-      __cuda_callable__
-      IndexType getGlobalIndex( const IndexType localIndex ) const
-      {
-         TNL_ASSERT_LT( localIndex, segmentSize, "Local index exceeds segment bounds." );
-         return segmentOffset + localIndex;
-      };
-
-      protected:
-
-         IndexType segmentOffset, segmentSize;
-};
-      } //namespace Segments
-   } //namespace Containers
-} //namespace TNL
\ No newline at end of file
diff --git a/src/TNL/Containers/Segments/CSRView.h b/src/TNL/Containers/Segments/CSRView.h
index f8bcacd0f..a0f5cd200 100644
--- a/src/TNL/Containers/Segments/CSRView.h
+++ b/src/TNL/Containers/Segments/CSRView.h
@@ -13,7 +13,7 @@
 #include <type_traits>
 
 #include <TNL/Containers/Vector.h>
-#include <TNL/Containers/Segments/CSRSegmentView.h>
+#include <TNL/Containers/Segments/SegmentView.h>
 
 namespace TNL {
    namespace Containers {
@@ -33,7 +33,7 @@ class CSRView
       template< typename Device_, typename Index_ >
       using ViewTemplate = CSRView< Device_, Index_ >;
       using ConstViewType = CSRView< Device, std::add_const_t< Index > >;
-      using SegmentViewType = CSRSegmentView< IndexType >;
+      using SegmentViewType = SegmentView< IndexType >;
 
       __cuda_callable__
       CSRView();
diff --git a/src/TNL/Containers/Segments/CSRView.hpp b/src/TNL/Containers/Segments/CSRView.hpp
index b0bb35313..bbed8e3cb 100644
--- a/src/TNL/Containers/Segments/CSRView.hpp
+++ b/src/TNL/Containers/Segments/CSRView.hpp
@@ -156,7 +156,7 @@ auto
 CSRView< Device, Index >::
 getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType
 {
-   return SegmentViewType( offsets[ segmentIdx ], offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ] );
+   return SegmentViewType( offsets[ segmentIdx ], offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ], 1 );
 }
 
 template< typename Device,
diff --git a/src/TNL/Containers/Segments/Ellpack.h b/src/TNL/Containers/Segments/Ellpack.h
index f73155335..429615647 100644
--- a/src/TNL/Containers/Segments/Ellpack.h
+++ b/src/TNL/Containers/Segments/Ellpack.h
@@ -12,7 +12,7 @@
 
 #include <TNL/Containers/Vector.h>
 #include <TNL/Containers/Segments/EllpackView.h>
-#include <TNL/Containers/Segments/EllpackSegmentView.h>
+#include <TNL/Containers/Segments/SegmentView.h>
 
 namespace TNL {
    namespace Containers {
@@ -37,7 +37,7 @@ class Ellpack
       using ViewTemplate = EllpackView< Device_, Index_ >;
       using ViewType = EllpackView< Device, Index, RowMajorOrder, Alignment >;
       //using ConstViewType = EllpackView< Device, std::add_const_t< Index >, RowMajorOrder, Alignment >;
-      using SegmentViewType = EllpackSegmentView< IndexType >;
+      using SegmentViewType = SegmentView< IndexType, RowMajorOrder >;
 
 
       Ellpack();
diff --git a/src/TNL/Containers/Segments/Ellpack.hpp b/src/TNL/Containers/Segments/Ellpack.hpp
index ebc2b360e..97a256c9e 100644
--- a/src/TNL/Containers/Segments/Ellpack.hpp
+++ b/src/TNL/Containers/Segments/Ellpack.hpp
@@ -239,9 +239,9 @@ Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >::
 getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType
 {
    if( RowMajorOrder )
-      return SegmentView( segmentIdx * this->segmentSize, this->segmentSize, 1 );
+      return SegmentViewType( segmentIdx * this->segmentSize, this->segmentSize, 1 );
    else
-      return SegmentView( segmentIdx, this->segmentSize, this->alignedSize );
+      return SegmentViewType( segmentIdx, this->segmentSize, this->alignedSize );
 }
 
 template< typename Device,
diff --git a/src/TNL/Containers/Segments/EllpackView.h b/src/TNL/Containers/Segments/EllpackView.h
index 682eeeb4a..737810498 100644
--- a/src/TNL/Containers/Segments/EllpackView.h
+++ b/src/TNL/Containers/Segments/EllpackView.h
@@ -13,7 +13,7 @@
 #include <type_traits>
 
 #include <TNL/Containers/Vector.h>
-#include <TNL/Containers/Segments/EllpackSegmentView.h>
+#include <TNL/Containers/Segments/SegmentView.h>
 
 
 namespace TNL {
@@ -38,7 +38,7 @@ class EllpackView
       using ViewTemplate = EllpackView< Device_, Index_ >;
       using ViewType = EllpackView;
       //using ConstViewType = EllpackView< Device, std::add_const_t< Index > >;
-      using SegmentViewType = EllpackSegmentView< IndexType >;
+      using SegmentViewType = SegmentView< IndexType, RowMajorOrder >;
 
       __cuda_callable__
       EllpackView();
diff --git a/src/TNL/Containers/Segments/EllpackSegmentView.h b/src/TNL/Containers/Segments/SegmentView.h
similarity index 51%
rename from src/TNL/Containers/Segments/EllpackSegmentView.h
rename to src/TNL/Containers/Segments/SegmentView.h
index 7a1638e3f..29f2e7781 100644
--- a/src/TNL/Containers/Segments/EllpackSegmentView.h
+++ b/src/TNL/Containers/Segments/SegmentView.h
@@ -1,5 +1,5 @@
 /***************************************************************************
-                          EllpackSegmentView.h -  description
+                          SegmentView.h -  description
                              -------------------
     begin                : Dec 28, 2019
     copyright            : (C) 2019 by Tomas Oberhuber
@@ -14,17 +14,21 @@ namespace TNL {
    namespace Containers {
       namespace Segments {
 
+template< typename Index,
+          bool RowMajorOrder = false >
+class SegmentView;
+
 template< typename Index >
-class EllpackSegmentView
+class SegmentView< Index, false >
 {
    public:
 
       using IndexType = Index;
 
       __cuda_callable__
-      EllpackSegmentView( const IndexType offset,
-                          const IndexType size,
-                          const IndexType step )
+      SegmentView( const IndexType offset,
+                   const IndexType size,
+                   const IndexType step )
       : segmentOffset( offset ), segmentSize( size ), step( step ){};
 
       __cuda_callable__
@@ -44,6 +48,38 @@ class EllpackSegmentView
          
          IndexType segmentOffset, segmentSize, step;
 };
+
+template< typename Index >
+class SegmentView< Index, true >
+{
+   public:
+
+      using IndexType = Index;
+
+      __cuda_callable__
+      SegmentView( const IndexType offset,
+                   const IndexType size,
+                   const IndexType step = 1 ) // For compatibility with previous specialization
+      : segmentOffset( offset ), segmentSize( size ){};
+
+      __cuda_callable__
+      IndexType getSize() const
+      {
+         return this->segmentSize;
+      };
+
+      __cuda_callable__
+      IndexType getGlobalIndex( const IndexType localIndex ) const
+      {
+         TNL_ASSERT_LT( localIndex, segmentSize, "Local index exceeds segment bounds." );
+         return segmentOffset + localIndex;
+      };
+
+      protected:
+         
+         IndexType segmentOffset, segmentSize;
+};
+
       } //namespace Segments
    } //namespace Containers
 } //namespace TNL
diff --git a/src/TNL/Containers/Segments/SlicedEllpack.h b/src/TNL/Containers/Segments/SlicedEllpack.h
index 76185bcac..5953cde36 100644
--- a/src/TNL/Containers/Segments/SlicedEllpack.h
+++ b/src/TNL/Containers/Segments/SlicedEllpack.h
@@ -13,7 +13,7 @@
 #include <TNL/Allocators/Default.h>
 #include <TNL/Containers/Vector.h>
 #include <TNL/Containers/Segments/SlicedEllpackView.h>
-#include <TNL/Containers/Segments/EllpackSegmentView.h>
+#include <TNL/Containers/Segments/SegmentView.h>
 
 namespace TNL {
    namespace Containers {
@@ -37,7 +37,7 @@ class SlicedEllpack
       template< typename Device_, typename Index_ >
       using ViewTemplate = SlicedEllpackView< Device_, Index_ >;
       using ConstViewType = SlicedEllpackView< Device, std::add_const_t< Index >, RowMajorOrder, SliceSize >;
-      using SegmentViewType = EllpackSegmentView< IndexType >;
+      using SegmentViewType = SegmentView< IndexType, RowMajorOrder >;
 
       SlicedEllpack();
 
diff --git a/src/TNL/Containers/Segments/SlicedEllpack.hpp b/src/TNL/Containers/Segments/SlicedEllpack.hpp
index b58b6a954..76790f393 100644
--- a/src/TNL/Containers/Segments/SlicedEllpack.hpp
+++ b/src/TNL/Containers/Segments/SlicedEllpack.hpp
@@ -269,9 +269,9 @@ getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType
    const IndexType& segmentSize = this->sliceSegmentSizes[ sliceIdx ];
 
    if( RowMajorOrder )
-      return SegmentView( sliceOffset + segmentInSliceIdx * segmentSize, segmentSize, 1 );
+      return SegmentViewType( sliceOffset + segmentInSliceIdx * segmentSize, segmentSize, 1 );
    else
-      return SegmentView( sliceOffset + segmentInSliceIdx, segmentSize, SliceSize );
+      return SegmentViewType( sliceOffset + segmentInSliceIdx, segmentSize, SliceSize );
 }
 
 template< typename Device,
diff --git a/src/TNL/Containers/Segments/SlicedEllpackView.h b/src/TNL/Containers/Segments/SlicedEllpackView.h
index e87c75229..86745e7c0 100644
--- a/src/TNL/Containers/Segments/SlicedEllpackView.h
+++ b/src/TNL/Containers/Segments/SlicedEllpackView.h
@@ -13,7 +13,7 @@
 #include <type_traits>
 
 #include <TNL/Containers/Vector.h>
-#include <TNL/Containers/Segments/EllpackSegmentView.h>
+#include <TNL/Containers/Segments/SegmentView.h>
 
 namespace TNL {
    namespace Containers {
@@ -36,7 +36,7 @@ class SlicedEllpackView
       using ViewTemplate = SlicedEllpackView< Device_, Index_ >;
       using ViewType = SlicedEllpackView;
       using ConstViewType = SlicedEllpackView< Device, std::add_const_t< Index > >;
-      using SegmentViewType = EllpackSegmentView< IndexType >;
+      using SegmentViewType = SegmentView< IndexType, RowMajorOrder >;
 
       __cuda_callable__
       SlicedEllpackView();
-- 
GitLab


From 922a92e213835aaba2581274ecd0f89c3d097312 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Fri, 3 Jan 2020 11:51:14 +0100
Subject: [PATCH 058/179] Deleted useless file Containers/Segments.h.

---
 src/TNL/Containers/Segments.h | 29 -----------------------------
 1 file changed, 29 deletions(-)
 delete mode 100644 src/TNL/Containers/Segments.h

diff --git a/src/TNL/Containers/Segments.h b/src/TNL/Containers/Segments.h
deleted file mode 100644
index 99ea22357..000000000
--- a/src/TNL/Containers/Segments.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/***************************************************************************
-                          Segments.h -  description
-                             -------------------
-    begin                : Nov 29, 2019
-    copyright            : (C) 2019 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#pragma once
-
-namespace TNL {
-namespace Containers {
-
-template< typename Value,
-          typename Organization >
-class Segments
-{
-   public:
-
-      using ValueType = Value;
-      using OrganizationType = Organization;
-      using IndexType = typename Organization::IndexType;
-
-};
-
-}  // namespace Conatiners
-} // namespace TNL
\ No newline at end of file
-- 
GitLab


From 0265b5936104b53823def31271ced3facd892116 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Fri, 3 Jan 2020 12:09:56 +0100
Subject: [PATCH 059/179] Added segments to dense matrix.

---
 src/TNL/Matrices/Dense.h   |  7 +++++--
 src/TNL/Matrices/Dense.hpp | 42 ++++++++++++++++++++------------------
 2 files changed, 27 insertions(+), 22 deletions(-)

diff --git a/src/TNL/Matrices/Dense.h b/src/TNL/Matrices/Dense.h
index a2c6a7eda..cff1d57b4 100644
--- a/src/TNL/Matrices/Dense.h
+++ b/src/TNL/Matrices/Dense.h
@@ -14,10 +14,10 @@
 #include <TNL/Devices/Host.h>
 #include <TNL/Matrices/Matrix.h>
 #include <TNL/Matrices/DenseRow.h>
-#include <TNL/Containers/Array.h>
+#include <TNL/Containers/Segments/Ellpack.h>
 
 namespace TNL {
-namespace Matrices {   
+namespace Matrices {
 
 template< typename Device >
 class DenseDeviceDependentCode;
@@ -46,6 +46,7 @@ public:
    using ConstCompressedRowLengthsVectorView = typename Matrix< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView;
    using BaseType = Matrix< Real, Device, Index >;
    using MatrixRow = DenseRow< Real, Index >;
+   using SegmentsType = Containers::Segments::Ellpack< DeviceType, IndexType, typename Allocators::Default< Device >::template Allocator< IndexType >, RowMajorOrder >;
 
    template< typename _Real = Real,
              typename _Device = Device,
@@ -164,6 +165,8 @@ protected:
 
    typedef DenseDeviceDependentCode< DeviceType > DeviceDependentCode;
    friend class DenseDeviceDependentCode< DeviceType >;
+
+   SegmentsType segments;
 };
 
 } // namespace Matrices
diff --git a/src/TNL/Matrices/Dense.hpp b/src/TNL/Matrices/Dense.hpp
index 190052390..bed7a37b7 100644
--- a/src/TNL/Matrices/Dense.hpp
+++ b/src/TNL/Matrices/Dense.hpp
@@ -31,7 +31,9 @@ template< typename Real,
           typename Index,
           bool RowMajorOrder,
           typename RealAllocator >
-String Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getSerializationType()
+String
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
+getSerializationType()
 {
    return String( "Matrices::Dense< " ) +
           getType< RealType >() + ", " +
@@ -44,7 +46,9 @@ template< typename Real,
           typename Index,
           bool RowMajorOrder,
           typename RealAllocator >
-String Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getSerializationTypeVirtual() const
+String
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
+getSerializationTypeVirtual() const
 {
    return this->getSerializationType();
 }
@@ -54,12 +58,15 @@ template< typename Real,
           typename Index,
           bool RowMajorOrder,
           typename RealAllocator >
-void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::setDimensions( const IndexType rows,
-                                                  const IndexType columns )
+void 
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
+setDimensions( const IndexType rows,
+               const IndexType columns )
 {
    Matrix< Real, Device, Index >::setDimensions( rows, columns );
+   this->segments.setSegmentsSizes( rows, columns );
    this->values.setSize( rows * columns );
-   this->values.setValue( 0.0 );
+   this->values = 0.0;
 }
 
 template< typename Real,
@@ -68,7 +75,9 @@ template< typename Real,
           bool RowMajorOrder,
           typename RealAllocator >
    template< typename Matrix_ >
-void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::setLike( const Matrix_& matrix )
+void
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
+setLike( const Matrix_& matrix )
 {
    Matrix< Real, Device, Index, RealAllocator >::setLike( matrix );
 }
@@ -78,8 +87,11 @@ template< typename Real,
           typename Index,
           bool RowMajorOrder,
           typename RealAllocator >
-void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths )
+void 
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
+setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths )
 {
+   this->setDimensions( rowLengths.getSize(), max( rowLengths ) );
 }
 
 template< typename Real,
@@ -92,17 +104,6 @@ Index Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getRowLength(
    return this->getColumns();
 }
 
-/*template< typename Real,
-          typename Device,
-          typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
-__cuda_callable__
-Index Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getRowLengthFast( const IndexType row ) const
-{
-   return this->getColumns();
-}*/
-
 template< typename Real,
           typename Device,
           typename Index,
@@ -896,13 +897,14 @@ __cuda_callable__
 Index Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getElementIndex( const IndexType row,
                                                               const IndexType column ) const
 {
-   TNL_ASSERT( ( std::is_same< Device, Devices::Host >::value ||
+   return this->segments.getGlobalIndex( row, column );
+   /*TNL_ASSERT( ( std::is_same< Device, Devices::Host >::value ||
           std::is_same< Device, Devices::Cuda >::value ), )
    if( std::is_same< Device, Devices::Host >::value )
       return row * this->columns + column;
    if( std::is_same< Device, Devices::Cuda >::value )
       return column * this->rows + row;
-   return -1;
+   return -1;*/
 }
 
 template<>
-- 
GitLab


From 616b29d73c4129a215fbb316a4fe2f3f193ddf98 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Fri, 3 Jan 2020 13:47:47 +0100
Subject: [PATCH 060/179] Added DenseMatrixRowView.

---
 src/TNL/Containers/Segments/Ellpack.h   |  2 +-
 src/TNL/Matrices/Dense.h                | 24 ++++++--
 src/TNL/Matrices/Dense.hpp              | 73 +++++++++----------------
 src/TNL/Matrices/DenseMatrixRowView.h   | 52 ++++++++++++++++++
 src/TNL/Matrices/DenseMatrixRowView.hpp | 71 ++++++++++++++++++++++++
 src/TNL/Matrices/SparseMatrix.h         |  2 +-
 6 files changed, 169 insertions(+), 55 deletions(-)
 create mode 100644 src/TNL/Matrices/DenseMatrixRowView.h
 create mode 100644 src/TNL/Matrices/DenseMatrixRowView.hpp

diff --git a/src/TNL/Containers/Segments/Ellpack.h b/src/TNL/Containers/Segments/Ellpack.h
index 429615647..c197c7010 100644
--- a/src/TNL/Containers/Segments/Ellpack.h
+++ b/src/TNL/Containers/Segments/Ellpack.h
@@ -124,7 +124,7 @@ class Ellpack
 };
 
       } // namespace Segements
-   }  // namespace Conatiners
+   }  // namespace Containers
 } // namespace TNL
 
 #include <TNL/Containers/Segments/Ellpack.hpp>
diff --git a/src/TNL/Matrices/Dense.h b/src/TNL/Matrices/Dense.h
index cff1d57b4..c72b7edfa 100644
--- a/src/TNL/Matrices/Dense.h
+++ b/src/TNL/Matrices/Dense.h
@@ -12,8 +12,8 @@
 
 #include <TNL/Allocators/Default.h>
 #include <TNL/Devices/Host.h>
+#include <TNL/Matrices/DenseMatrixRowView.h>
 #include <TNL/Matrices/Matrix.h>
-#include <TNL/Matrices/DenseRow.h>
 #include <TNL/Containers/Segments/Ellpack.h>
 
 namespace TNL {
@@ -42,11 +42,16 @@ public:
    using RealType = Real;
    using DeviceType = Device;
    using IndexType = Index;
-   using CompressedRowLengthsVector = typename Matrix< Real, Device, Index >::CompressedRowLengthsVector;
-   using ConstCompressedRowLengthsVectorView = typename Matrix< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView;
    using BaseType = Matrix< Real, Device, Index >;
-   using MatrixRow = DenseRow< Real, Index >;
+   using ValuesType = typename BaseType::ValuesVector;
+   using ValuesViewType = typename ValuesType::ViewType;
    using SegmentsType = Containers::Segments::Ellpack< DeviceType, IndexType, typename Allocators::Default< Device >::template Allocator< IndexType >, RowMajorOrder >;
+   using SegmentViewType = typename SegmentsType::SegmentViewType;
+   using RowView = DenseMatrixRowView< SegmentViewType, ValuesViewType >;
+
+   // TODO: remove this
+   using CompressedRowLengthsVector = typename Matrix< Real, Device, Index >::CompressedRowLengthsVector;
+   using ConstCompressedRowLengthsVectorView = typename Matrix< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView;
 
    template< typename _Real = Real,
              typename _Device = Device,
@@ -81,6 +86,13 @@ public:
 
    void reset();
 
+   __cuda_callable__
+   const RowView getRow( const IndexType& rowIdx ) const;
+
+   __cuda_callable__
+   RowView getRow( const IndexType& rowIdx );
+
+
    void setValue( const RealType& v );
 
    __cuda_callable__
@@ -103,11 +115,11 @@ public:
    Real getElement( const IndexType row,
                     const IndexType column ) const;
 
-   __cuda_callable__
+   /*__cuda_callable__
    MatrixRow getRow( const IndexType rowIndex );
 
    __cuda_callable__
-   const MatrixRow getRow( const IndexType rowIndex ) const;
+   const MatrixRow getRow( const IndexType rowIndex ) const;*/
 
    template< typename Vector >
    __cuda_callable__
diff --git a/src/TNL/Matrices/Dense.hpp b/src/TNL/Matrices/Dense.hpp
index bed7a37b7..bd0614ad0 100644
--- a/src/TNL/Matrices/Dense.hpp
+++ b/src/TNL/Matrices/Dense.hpp
@@ -158,6 +158,32 @@ void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::setValue( const
    this->values = value;
 }
 
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+__cuda_callable__ auto
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
+getRow( const IndexType& rowIdx ) const -> const RowView
+{
+   TNL_ASSERT_LT( rowIdx, this->getRows(), "Row index is larger than number of matrix rows." );
+   return RowView( this->segments.getSegmentView( rowIdx ), this->values.getView() );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+__cuda_callable__ auto
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
+getRow( const IndexType& rowIdx ) -> RowView
+{
+   TNL_ASSERT_LT( rowIdx, this->getRows(), "Row index is larger than number of matrix rows." );
+   return RowView( this->segments.getSegmentView( rowIdx ), this->values.getView() );
+}
+
 template< typename Real,
           typename Device,
           typename Index,
@@ -236,46 +262,6 @@ Real Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getElement( con
    return this->values.getElement( this->getElementIndex( row, column ) );
 }
 
-template< typename Real,
-          typename Device,
-          typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
-__cuda_callable__
-typename Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::MatrixRow
-Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
-getRow( const IndexType rowIndex )
-{
-   if( std::is_same< Device, Devices::Host >::value )
-      return MatrixRow( &this->values.getData()[ this->getElementIndex( rowIndex, 0 ) ],
-                        this->columns,
-                        1 );
-   if( std::is_same< Device, Devices::Cuda >::value )
-      return MatrixRow( &this->values.getData()[ this->getElementIndex( rowIndex, 0 ) ],
-                        this->columns,
-                        this->rows );
-}
-
-template< typename Real,
-          typename Device,
-          typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
-__cuda_callable__
-const typename Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::MatrixRow
-Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
-getRow( const IndexType rowIndex ) const
-{
-   if( std::is_same< Device, Devices::Host >::value )
-      return MatrixRow( &this->values.getData()[ this->getElementIndex( rowIndex, 0 ) ],
-                        this->columns,
-                        1 );
-   if( std::is_same< Device, Devices::Cuda >::value )
-      return MatrixRow( &this->values.getData()[ this->getElementIndex( rowIndex, 0 ) ],
-                        this->columns,
-                        this->rows );
-}
-
 template< typename Real,
           typename Device,
           typename Index,
@@ -898,13 +884,6 @@ Index Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getElementInde
                                                               const IndexType column ) const
 {
    return this->segments.getGlobalIndex( row, column );
-   /*TNL_ASSERT( ( std::is_same< Device, Devices::Host >::value ||
-          std::is_same< Device, Devices::Cuda >::value ), )
-   if( std::is_same< Device, Devices::Host >::value )
-      return row * this->columns + column;
-   if( std::is_same< Device, Devices::Cuda >::value )
-      return column * this->rows + row;
-   return -1;*/
 }
 
 template<>
diff --git a/src/TNL/Matrices/DenseMatrixRowView.h b/src/TNL/Matrices/DenseMatrixRowView.h
new file mode 100644
index 000000000..84c6b141c
--- /dev/null
+++ b/src/TNL/Matrices/DenseMatrixRowView.h
@@ -0,0 +1,52 @@
+/***************************************************************************
+                          DenseMatrixRowView.h -  description
+                             -------------------
+    begin                : Jan 3, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+namespace TNL {
+   namespace Matrices {
+
+template< typename SegmentView,
+          typename ValuesView >
+class DenseMatrixRowView
+{
+   public:
+
+      using RealType = typename ValuesView::RealType;
+      using SegmentViewType = SegmentView;
+      using IndexType = typename SegmentViewType::IndexType;
+      using ValuesViewType = ValuesView;
+
+      __cuda_callable__
+      DenseMatrixRowView( const SegmentViewType& segmentView,
+                          const ValuesViewType& values );
+
+      __cuda_callable__
+      IndexType getSize() const;
+
+      __cuda_callable__
+      const RealType& getValue( const IndexType column ) const;
+
+      __cuda_callable__
+      RealType& getValue( const IndexType column );
+
+      __cuda_callable__
+      void setElement( const IndexType column,
+                       const RealType& value );
+   protected:
+
+      SegmentViewType segmentView;
+
+      ValuesViewType values;
+};
+   } // namespace Matrices
+} // namespace TNL
+
+#include <TNL/Matrices/DenseMatrixRowView.hpp>
diff --git a/src/TNL/Matrices/DenseMatrixRowView.hpp b/src/TNL/Matrices/DenseMatrixRowView.hpp
new file mode 100644
index 000000000..1962a4d9a
--- /dev/null
+++ b/src/TNL/Matrices/DenseMatrixRowView.hpp
@@ -0,0 +1,71 @@
+/***************************************************************************
+                          DenseMatrixRowView.hpp -  description
+                             -------------------
+    begin                : Jan 3, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Matrices/DenseMatrixRowView.h>
+
+namespace TNL {
+   namespace Matrices {
+
+template< typename SegmentView,
+          typename ValuesView >
+__cuda_callable__
+DenseMatrixRowView< SegmentView, ValuesView >::
+DenseMatrixRowView( const SegmentViewType& segmentView,
+                     const ValuesViewType& values )
+ : segmentView( segmentView ), values( values )
+{
+}
+
+template< typename SegmentView,
+          typename ValuesView >
+__cuda_callable__ auto
+DenseMatrixRowView< SegmentView, ValuesView >::
+getSize() const -> IndexType
+{
+   return segmentView.getSize();
+}
+
+template< typename SegmentView,
+          typename ValuesView >
+__cuda_callable__ auto
+DenseMatrixRowView< SegmentView, ValuesView >::
+getValue( const IndexType column ) const -> const RealType&
+{
+   TNL_ASSERT_LT( column, this->getSize(), "Column index exceeds matrix row size." );
+   return values[ segmentView.getGlobalIndex( column ) ];
+}
+
+template< typename SegmentView,
+          typename ValuesView >
+__cuda_callable__ auto
+DenseMatrixRowView< SegmentView, ValuesView >::
+getValue( const IndexType column ) -> RealType&
+{
+   TNL_ASSERT_LT( column, this->getSize(), "Column index exceeds matrix row size." );
+   return values[ segmentView.getGlobalIndex( column ) ];
+}
+
+template< typename SegmentView,
+          typename ValuesView >
+__cuda_callable__ void 
+DenseMatrixRowView< SegmentView, ValuesView >::
+setElement( const IndexType column,
+            const RealType& value )
+{
+   TNL_ASSERT_LT( column, this->getSize(), "Column index exceeds matrix row size." );
+   const IndexType globalIdx = segmentView.getGlobalIndex( column );
+   values[ globalIdx ] = value;
+}
+
+
+   } // namespace Matrices
+} // namespace TNL
diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h
index 8f96af169..c50f71612 100644
--- a/src/TNL/Matrices/SparseMatrix.h
+++ b/src/TNL/Matrices/SparseMatrix.h
@@ -14,8 +14,8 @@
 #include <TNL/Matrices/MatrixType.h>
 #include <TNL/Allocators/Default.h>
 #include <TNL/Containers/Segments/CSR.h>
-#include <TNL/Matrices/SparseMatrixView.h>
 #include <TNL/Matrices/SparseMatrixRowView.h>
+#include <TNL/Matrices/SparseMatrixView.h>
 
 namespace TNL {
 namespace Matrices {
-- 
GitLab


From 873b9a9c296f632b4f87eee8df8bbf801731d9d0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Fri, 3 Jan 2020 16:25:20 +0100
Subject: [PATCH 061/179] Added methods forRows and rowsReduction to dense
 matrix.

---
 src/TNL/Matrices/Dense.h   | 230 ++++++++++++++++++++-----------------
 src/TNL/Matrices/Dense.hpp | 126 +++++++++++++++++++-
 2 files changed, 247 insertions(+), 109 deletions(-)

diff --git a/src/TNL/Matrices/Dense.h b/src/TNL/Matrices/Dense.h
index c72b7edfa..553ecc01d 100644
--- a/src/TNL/Matrices/Dense.h
+++ b/src/TNL/Matrices/Dense.h
@@ -29,156 +29,172 @@ template< typename Real = double,
           typename RealAllocator = typename Allocators::Default< Device >::template Allocator< Real > >
 class Dense : public Matrix< Real, Device, Index >
 {
-private:
-   // convenient template alias for controlling the selection of copy-assignment operator
-   template< typename Device2 >
-   using Enabler = std::enable_if< ! std::is_same< Device2, Device >::value >;
+   private:
+      // convenient template alias for controlling the selection of copy-assignment operator
+      template< typename Device2 >
+      using Enabler = std::enable_if< ! std::is_same< Device2, Device >::value >;
 
-   // friend class will be needed for templated assignment operators
-   //template< typename Real2, typename Device2, typename Index2 >
-   //friend class Dense;
+      // friend class will be needed for templated assignment operators
+      //template< typename Real2, typename Device2, typename Index2 >
+      //friend class Dense;
 
-public:
-   using RealType = Real;
-   using DeviceType = Device;
-   using IndexType = Index;
-   using BaseType = Matrix< Real, Device, Index >;
-   using ValuesType = typename BaseType::ValuesVector;
-   using ValuesViewType = typename ValuesType::ViewType;
-   using SegmentsType = Containers::Segments::Ellpack< DeviceType, IndexType, typename Allocators::Default< Device >::template Allocator< IndexType >, RowMajorOrder >;
-   using SegmentViewType = typename SegmentsType::SegmentViewType;
-   using RowView = DenseMatrixRowView< SegmentViewType, ValuesViewType >;
+   public:
+      using RealType = Real;
+      using DeviceType = Device;
+      using IndexType = Index;
+      using BaseType = Matrix< Real, Device, Index >;
+      using ValuesType = typename BaseType::ValuesVector;
+      using ValuesViewType = typename ValuesType::ViewType;
+      using SegmentsType = Containers::Segments::Ellpack< DeviceType, IndexType, typename Allocators::Default< Device >::template Allocator< IndexType >, RowMajorOrder >;
+      using SegmentViewType = typename SegmentsType::SegmentViewType;
+      using RowView = DenseMatrixRowView< SegmentViewType, ValuesViewType >;
 
-   // TODO: remove this
-   using CompressedRowLengthsVector = typename Matrix< Real, Device, Index >::CompressedRowLengthsVector;
-   using ConstCompressedRowLengthsVectorView = typename Matrix< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView;
+      // TODO: remove this
+      using CompressedRowLengthsVector = typename Matrix< Real, Device, Index >::CompressedRowLengthsVector;
+      using ConstCompressedRowLengthsVectorView = typename Matrix< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView;
 
-   template< typename _Real = Real,
-             typename _Device = Device,
-             typename _Index = Index >
-   using Self = Dense< _Real, _Device, _Index >;
+      template< typename _Real = Real,
+                typename _Device = Device,
+                typename _Index = Index >
+      using Self = Dense< _Real, _Device, _Index >;
 
-   Dense();
+      Dense();
 
-   static String getSerializationType();
+      static String getSerializationType();
 
-   virtual String getSerializationTypeVirtual() const;
+      virtual String getSerializationTypeVirtual() const;
 
-   void setDimensions( const IndexType rows,
-                       const IndexType columns );
+      void setDimensions( const IndexType rows,
+                          const IndexType columns );
 
-   template< typename Matrix >
-   void setLike( const Matrix& matrix );
+      template< typename Matrix >
+      void setLike( const Matrix& matrix );
 
-   /****
-    * This method is only for the compatibility with the sparse matrices.
-    */
-   void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths );
+      /****
+       * This method is only for the compatibility with the sparse matrices.
+       */
+      void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths );
 
-   [[deprecated]]
-   IndexType getRowLength( const IndexType row ) const;
+      [[deprecated]]
+      IndexType getRowLength( const IndexType row ) const;
 
-   IndexType getMaxRowLength() const;
+      IndexType getMaxRowLength() const;
 
-   IndexType getNumberOfMatrixElements() const;
+      IndexType getNumberOfMatrixElements() const;
 
-   IndexType getNumberOfNonzeroMatrixElements() const;
+      IndexType getNumberOfNonzeroMatrixElements() const;
 
-   void reset();
+      template< typename Vector >
+      void getCompressedRowLengths( Vector& rowLengths ) const;
 
-   __cuda_callable__
-   const RowView getRow( const IndexType& rowIdx ) const;
 
-   __cuda_callable__
-   RowView getRow( const IndexType& rowIdx );
+      void reset();
 
+      __cuda_callable__
+      const RowView getRow( const IndexType& rowIdx ) const;
 
-   void setValue( const RealType& v );
+      __cuda_callable__
+      RowView getRow( const IndexType& rowIdx );
 
-   __cuda_callable__
-   Real& operator()( const IndexType row,
-                     const IndexType column );
 
-   __cuda_callable__
-   const Real& operator()( const IndexType row,
-                           const IndexType column ) const;
+      void setValue( const RealType& v );
 
-   bool setElement( const IndexType row,
-                    const IndexType column,
-                    const RealType& value );
+      __cuda_callable__
+      Real& operator()( const IndexType row,
+                        const IndexType column );
 
-   bool addElement( const IndexType row,
-                    const IndexType column,
-                    const RealType& value,
-                    const RealType& thisElementMultiplicator = 1.0 );
+      __cuda_callable__
+      const Real& operator()( const IndexType row,
+                              const IndexType column ) const;
 
-   Real getElement( const IndexType row,
-                    const IndexType column ) const;
+      bool setElement( const IndexType row,
+                       const IndexType column,
+                       const RealType& value );
 
-   /*__cuda_callable__
-   MatrixRow getRow( const IndexType rowIndex );
+      bool addElement( const IndexType row,
+                       const IndexType column,
+                       const RealType& value,
+                       const RealType& thisElementMultiplicator = 1.0 );
 
-   __cuda_callable__
-   const MatrixRow getRow( const IndexType rowIndex ) const;*/
+      Real getElement( const IndexType row,
+                       const IndexType column ) const;
 
-   template< typename Vector >
-   __cuda_callable__
-   typename Vector::RealType rowVectorProduct( const IndexType row,
-                                               const Vector& vector ) const;
+      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+      void rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
 
-   template< typename InVector, typename OutVector >
-   void vectorProduct( const InVector& inVector,
-                       OutVector& outVector ) const;
+      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+      void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
 
-   template< typename Matrix >
-   void addMatrix( const Matrix& matrix,
-                   const RealType& matrixMultiplicator = 1.0,
-                   const RealType& thisMatrixMultiplicator = 1.0 );
+      template< typename Function >
+      void forRows( IndexType first, IndexType last, Function& function ) const;
 
-   template< typename Matrix1, typename Matrix2, int tileDim = 32 >
-   void getMatrixProduct( const Matrix1& matrix1,
-                       const Matrix2& matrix2,
-                       const RealType& matrix1Multiplicator = 1.0,
-                       const RealType& matrix2Multiplicator = 1.0 );
+      template< typename Function >
+      void forRows( IndexType first, IndexType last, Function& function );
 
-   template< typename Matrix, int tileDim = 32 >
-   void getTransposition( const Matrix& matrix,
-                          const RealType& matrixMultiplicator = 1.0 );
+      template< typename Function >
+      void forAllRows( Function& function ) const;
 
-   template< typename Vector1, typename Vector2 >
-   void performSORIteration( const Vector1& b,
-                             const IndexType row,
-                             Vector2& x,
-                             const RealType& omega = 1.0 ) const;
+      template< typename Function >
+      void forAllRows( Function& function );
 
-   // copy assignment
-   Dense& operator=( const Dense& matrix );
+      template< typename Vector >
+      __cuda_callable__
+      typename Vector::RealType rowVectorProduct( const IndexType row,
+                                                  const Vector& vector ) const;
 
-   // cross-device copy assignment
-   template< typename Real2, typename Device2, typename Index2,
-             typename = typename Enabler< Device2 >::type >
-   Dense& operator=( const Dense< Real2, Device2, Index2 >& matrix );
+      template< typename InVector, typename OutVector >
+      void vectorProduct( const InVector& inVector,
+                          OutVector& outVector ) const;
 
-   void save( const String& fileName ) const;
+      template< typename Matrix >
+      void addMatrix( const Matrix& matrix,
+                      const RealType& matrixMultiplicator = 1.0,
+                      const RealType& thisMatrixMultiplicator = 1.0 );
 
-   void load( const String& fileName );
+      template< typename Matrix1, typename Matrix2, int tileDim = 32 >
+      void getMatrixProduct( const Matrix1& matrix1,
+                          const Matrix2& matrix2,
+                          const RealType& matrix1Multiplicator = 1.0,
+                          const RealType& matrix2Multiplicator = 1.0 );
 
-   void save( File& file ) const;
+      template< typename Matrix, int tileDim = 32 >
+      void getTransposition( const Matrix& matrix,
+                             const RealType& matrixMultiplicator = 1.0 );
 
-   void load( File& file );
+      template< typename Vector1, typename Vector2 >
+      void performSORIteration( const Vector1& b,
+                                const IndexType row,
+                                Vector2& x,
+                                const RealType& omega = 1.0 ) const;
 
-   void print( std::ostream& str ) const;
+      // copy assignment
+      Dense& operator=( const Dense& matrix );
 
-protected:
+      // cross-device copy assignment
+      template< typename Real2, typename Device2, typename Index2,
+                typename = typename Enabler< Device2 >::type >
+      Dense& operator=( const Dense< Real2, Device2, Index2 >& matrix );
 
-   __cuda_callable__
-   IndexType getElementIndex( const IndexType row,
-                              const IndexType column ) const;
+      void save( const String& fileName ) const;
+
+      void load( const String& fileName );
+
+      void save( File& file ) const;
+
+      void load( File& file );
+
+      void print( std::ostream& str ) const;
+
+   protected:
+
+      __cuda_callable__
+      IndexType getElementIndex( const IndexType row,
+                                 const IndexType column ) const;
 
-   typedef DenseDeviceDependentCode< DeviceType > DeviceDependentCode;
-   friend class DenseDeviceDependentCode< DeviceType >;
+      typedef DenseDeviceDependentCode< DeviceType > DeviceDependentCode;
+      friend class DenseDeviceDependentCode< DeviceType >;
 
-   SegmentsType segments;
+      SegmentsType segments;
 };
 
 } // namespace Matrices
diff --git a/src/TNL/Matrices/Dense.hpp b/src/TNL/Matrices/Dense.hpp
index bd0614ad0..680fa3ed2 100644
--- a/src/TNL/Matrices/Dense.hpp
+++ b/src/TNL/Matrices/Dense.hpp
@@ -94,6 +94,31 @@ setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths )
    this->setDimensions( rowLengths.getSize(), max( rowLengths ) );
 }
 
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+   template< typename Vector >
+void
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
+getCompressedRowLengths( Vector& rowLengths ) const
+{
+   rowLengths.setSize( this->getRows() );
+   rowLengths = 0;
+   auto rowLengths_view = rowLengths.getView();
+   auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, const RealType& value ) -> IndexType {
+      return ( value != 0.0 );
+   };
+   auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) {
+      aux += a;
+   };
+   auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable {
+      rowLengths_view[ rowIdx ] = value;
+   };
+   this->allRowsReduction( fetch, reduce, keep, 0 );
+}
+
 template< typename Real,
           typename Device,
           typename Index,
@@ -256,12 +281,109 @@ template< typename Real,
           typename Index,
           bool RowMajorOrder,
           typename RealAllocator >
-Real Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getElement( const IndexType row,
-                                                        const IndexType column ) const
+Real 
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
+getElement( const IndexType row,
+            const IndexType column ) const
 {
    return this->values.getElement( this->getElementIndex( row, column ) );
 }
 
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+   template< typename Fetch, typename Reduce, typename Keep, typename FetchValue >
+void
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
+rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchValue& zero ) const
+{
+   const auto values_view = this->values.getConstView();
+   auto fetch_ = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx, bool& compute ) mutable -> decltype( fetch( IndexType(), IndexType(), RealType() ) ) {
+         return fetch( rowIdx, columnIdx, values_view[ globalIdx ] );
+      return zero;
+   };
+   this->segments.segmentsReduction( first, last, fetch_, reduce, keep, zero );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+   template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+void
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
+allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const
+{
+   this->rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+   template< typename Function >
+void
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
+forRows( IndexType first, IndexType last, Function& function ) const
+{
+   const auto values_view = this->values.getConstView();
+   auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx ) mutable -> bool {
+      function( rowIdx, columnIdx, values_view[ globalIdx ] );
+      return true;
+   };
+   this->segments.forSegments( first, last, f );
+
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+   template< typename Function >
+void
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
+forRows( IndexType first, IndexType last, Function& function )
+{
+   auto values_view = this->values.getView();
+   auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx ) mutable -> bool {
+      function( rowIdx, columnIdx, values_view[ globalIdx ] );
+      return true;
+   };
+   this->segments.forSegments( first, last, f );
+
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+   template< typename Function >
+void
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
+forAllRows( Function& function ) const
+{
+   this->forRows( 0, this->getRows(), function );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+   template< typename Function >
+void
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
+forAllRows( Function& function )
+{
+   this->forRows( 0, this->getRows(), function );
+}
+
 template< typename Real,
           typename Device,
           typename Index,
-- 
GitLab


From 3f278b0e1925829b50a81afc9368acdcb2de368f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Fri, 3 Jan 2020 21:25:42 +0100
Subject: [PATCH 062/179] Fixing dense matrix unit tests.

---
 src/TNL/Matrices/Dense.h                 |  11 +-
 src/TNL/Matrices/Dense.hpp               |  11 +
 src/UnitTests/Matrices/DenseMatrixTest.h | 419 ++++++++++++++---------
 3 files changed, 266 insertions(+), 175 deletions(-)

diff --git a/src/TNL/Matrices/Dense.h b/src/TNL/Matrices/Dense.h
index 553ecc01d..51308280d 100644
--- a/src/TNL/Matrices/Dense.h
+++ b/src/TNL/Matrices/Dense.h
@@ -45,7 +45,7 @@ class Dense : public Matrix< Real, Device, Index >
       using BaseType = Matrix< Real, Device, Index >;
       using ValuesType = typename BaseType::ValuesVector;
       using ValuesViewType = typename ValuesType::ViewType;
-      using SegmentsType = Containers::Segments::Ellpack< DeviceType, IndexType, typename Allocators::Default< Device >::template Allocator< IndexType >, RowMajorOrder >;
+      using SegmentsType = Containers::Segments::Ellpack< DeviceType, IndexType, typename Allocators::Default< Device >::template Allocator< IndexType >, RowMajorOrder, 1 >;
       using SegmentViewType = typename SegmentsType::SegmentViewType;
       using RowView = DenseMatrixRowView< SegmentViewType, ValuesViewType >;
 
@@ -60,6 +60,8 @@ class Dense : public Matrix< Real, Device, Index >
 
       Dense();
 
+      Dense( const IndexType rows, const IndexType columns );
+
       static String getSerializationType();
 
       virtual String getSerializationTypeVirtual() const;
@@ -75,6 +77,9 @@ class Dense : public Matrix< Real, Device, Index >
        */
       void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths );
 
+      template< typename Vector >
+      void getCompressedRowLengths( Vector& rowLengths ) const;
+
       [[deprecated]]
       IndexType getRowLength( const IndexType row ) const;
 
@@ -84,10 +89,6 @@ class Dense : public Matrix< Real, Device, Index >
 
       IndexType getNumberOfNonzeroMatrixElements() const;
 
-      template< typename Vector >
-      void getCompressedRowLengths( Vector& rowLengths ) const;
-
-
       void reset();
 
       __cuda_callable__
diff --git a/src/TNL/Matrices/Dense.hpp b/src/TNL/Matrices/Dense.hpp
index 680fa3ed2..ebf2c03b9 100644
--- a/src/TNL/Matrices/Dense.hpp
+++ b/src/TNL/Matrices/Dense.hpp
@@ -26,6 +26,17 @@ Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::Dense()
 {
 }
 
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
+Dense( const IndexType rows, const IndexType columns )
+{
+   this->setDimensions( rows, columns );
+}
+
 template< typename Real,
           typename Device,
           typename Index,
diff --git a/src/UnitTests/Matrices/DenseMatrixTest.h b/src/UnitTests/Matrices/DenseMatrixTest.h
index fc6ea6bd2..2184e6360 100644
--- a/src/UnitTests/Matrices/DenseMatrixTest.h
+++ b/src/UnitTests/Matrices/DenseMatrixTest.h
@@ -12,6 +12,8 @@
 #include <TNL/Matrices/Matrix.h>
 #include <TNL/Matrices/Dense.h>
 #include <TNL/Containers/Array.h>
+#include <TNL/Pointers/SharedPointer.h>
+#include <TNL/Pointers/SmartPointersRegister.h>
 
 #include <TNL/Containers/Vector.h>
 #include <TNL/Containers/VectorView.h>
@@ -92,6 +94,58 @@ void test_SetLike()
     EXPECT_EQ( m1.getColumns(), m2.getColumns() );
 }
 
+template< typename Matrix >
+void test_GetCompressedRowLengths()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   const IndexType rows = 10;
+   const IndexType cols = 11;
+
+    Matrix m( rows, cols );
+
+    // Insert values into the rows.
+    RealType value = 1;
+
+    for( IndexType i = 0; i < 3; i++ )      // 0th row
+        m.setElement( 0, i, value++ );
+
+    for( IndexType i = 0; i < 3; i++ )      // 1st row
+        m.setElement( 1, i, value++ );
+
+    for( IndexType i = 0; i < 1; i++ )      // 2nd row
+        m.setElement( 2, i, value++ );
+
+    for( IndexType i = 0; i < 2; i++ )      // 3rd row
+        m.setElement( 3, i, value++ );
+
+    for( IndexType i = 0; i < 3; i++ )      // 4th row
+        m.setElement( 4, i, value++ );
+
+    for( IndexType i = 0; i < 4; i++ )      // 5th row
+        m.setElement( 5, i, value++ );
+
+    for( IndexType i = 0; i < 5; i++ )      // 6th row
+        m.setElement( 6, i, value++ );
+
+    for( IndexType i = 0; i < 6; i++ )      // 7th row
+        m.setElement( 7, i, value++ );
+
+    for( IndexType i = 0; i < 7; i++ )      // 8th row
+        m.setElement( 8, i, value++ );
+
+    for( IndexType i = 0; i < 8; i++ )      // 9th row
+        m.setElement( 9, i, value++ );
+
+   typename Matrix::CompressedRowLengthsVector rowLengths;
+   rowLengths = 0;
+   m.getCompressedRowLengths( rowLengths );
+   typename Matrix::CompressedRowLengthsVector correctRowLengths{ 3, 3, 1, 2, 3, 4, 5, 6, 7, 8 };
+   EXPECT_EQ( rowLengths, correctRowLengths );
+}
+
 template< typename Matrix >
 void test_GetRowLength()
 {
@@ -508,142 +562,147 @@ void test_AddElement()
 template< typename Matrix >
 void test_SetRow()
 {
-    using RealType = typename Matrix::RealType;
-    using DeviceType = typename Matrix::DeviceType;
-    using IndexType = typename Matrix::IndexType;
-/*
- * Sets up the following 3x7 dense matrix:
- *
- *    /  1  2  3  4  5  6  7 \
- *    |  8  9 10 11 12 13 14 |
- *    \ 15 16 17 18 19 20 21 /
- */
-    const IndexType rows = 3;
-    const IndexType cols = 7;
-    
-    Matrix m;
-    m.reset();
-    m.setDimensions( rows, cols );
-    
-    RealType value = 1;
-    for( IndexType i = 0; i < rows; i++ )
-        for( IndexType j = 0; j < cols; j++ )
-            m.setElement( i, j, value++ );       
-    
-    RealType row1 [ 5 ] = { 11, 11, 11, 11, 11 }; IndexType colIndexes1 [ 5 ] = { 0, 1, 2, 3, 4 };
-    RealType row2 [ 5 ] = { 22, 22, 22, 22, 22 }; IndexType colIndexes2 [ 5 ] = { 0, 1, 2, 3, 4 };
-    RealType row3 [ 5 ] = { 33, 33, 33, 33, 33 }; IndexType colIndexes3 [ 5 ] = { 2, 3, 4, 5, 6 };
-    
-    IndexType row = 0;
-    IndexType elements = 5;
-    
-    // TODO: Fix this
-    /*m.setRow( row++, colIndexes1, row1, elements );
-    m.setRow( row++, colIndexes2, row2, elements );
-    m.setRow( row++, colIndexes3, row3, elements );*/
-    
-    EXPECT_EQ( m.getElement( 0, 0 ), 11 );
-    EXPECT_EQ( m.getElement( 0, 1 ), 11 );
-    EXPECT_EQ( m.getElement( 0, 2 ), 11 );
-    EXPECT_EQ( m.getElement( 0, 3 ), 11 );
-    EXPECT_EQ( m.getElement( 0, 4 ), 11 );
-    EXPECT_EQ( m.getElement( 0, 5 ),  6 );
-    EXPECT_EQ( m.getElement( 0, 6 ),  7 );
-    
-    EXPECT_EQ( m.getElement( 1, 0 ), 22 );
-    EXPECT_EQ( m.getElement( 1, 1 ), 22 );
-    EXPECT_EQ( m.getElement( 1, 2 ), 22 );
-    EXPECT_EQ( m.getElement( 1, 3 ), 22 );
-    EXPECT_EQ( m.getElement( 1, 4 ), 22 );
-    EXPECT_EQ( m.getElement( 1, 5 ), 13 );
-    EXPECT_EQ( m.getElement( 1, 6 ), 14 );
-    
-    EXPECT_EQ( m.getElement( 2, 0 ), 15 );
-    EXPECT_EQ( m.getElement( 2, 1 ), 16 );
-    EXPECT_EQ( m.getElement( 2, 2 ), 33 );
-    EXPECT_EQ( m.getElement( 2, 3 ), 33 );
-    EXPECT_EQ( m.getElement( 2, 4 ), 33 );
-    EXPECT_EQ( m.getElement( 2, 5 ), 33 );
-    EXPECT_EQ( m.getElement( 2, 6 ), 33 );
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /*
+    * Sets up the following 3x7 dense matrix:
+    *
+    *    /  1  2  3  4  5  6  7 \
+    *    |  8  9 10 11 12 13 14 |
+    *    \ 15 16 17 18 19 20 21 /
+    */
+   const IndexType rows = 3;
+   const IndexType cols = 7;
+
+   TNL::Pointers::SharedPointer< Matrix > m;
+   m->reset();
+   m->setDimensions( rows, cols );
+
+   RealType value = 1;
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < cols; j++ )
+         m->setElement( i, j, value++ );
+
+   Matrix* m_ptr = &m.template modifyData< DeviceType >();
+   auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
+      RealType values[ 3 ][ 5 ] {
+         { 11, 11, 11, 11, 11 },
+         { 22, 22, 22, 22, 22 },
+         { 33, 33, 33, 33, 33 } };
+      IndexType columnIndexes[ 3 ][ 5 ] {
+         { 0, 1, 2, 3, 4 },
+         { 0, 1, 2, 3, 4 },
+         { 2, 3, 4, 5, 6 } };
+      auto row = m_ptr->getRow( rowIdx );
+      for( IndexType i = 0; i < 5; i++ )
+         row.setElement( columnIndexes[ rowIdx ][ i ], values[ rowIdx ][ i ] );
+   };
+   TNL::Pointers::synchronizeSmartPointersOnDevice< DeviceType >();
+   TNL::Algorithms::ParallelFor< DeviceType >::exec( 0, 3, f );
+
+   EXPECT_EQ( m->getElement( 0, 0 ), 11 );
+   EXPECT_EQ( m->getElement( 0, 1 ), 11 );
+   EXPECT_EQ( m->getElement( 0, 2 ), 11 );
+   EXPECT_EQ( m->getElement( 0, 3 ), 11 );
+   EXPECT_EQ( m->getElement( 0, 4 ), 11 );
+   EXPECT_EQ( m->getElement( 0, 5 ),  6 );
+   EXPECT_EQ( m->getElement( 0, 6 ),  7 );
+
+   EXPECT_EQ( m->getElement( 1, 0 ), 22 );
+   EXPECT_EQ( m->getElement( 1, 1 ), 22 );
+   EXPECT_EQ( m->getElement( 1, 2 ), 22 );
+   EXPECT_EQ( m->getElement( 1, 3 ), 22 );
+   EXPECT_EQ( m->getElement( 1, 4 ), 22 );
+   EXPECT_EQ( m->getElement( 1, 5 ), 13 );
+   EXPECT_EQ( m->getElement( 1, 6 ), 14 );
+
+   EXPECT_EQ( m->getElement( 2, 0 ), 15 );
+   EXPECT_EQ( m->getElement( 2, 1 ), 16 );
+   EXPECT_EQ( m->getElement( 2, 2 ), 33 );
+   EXPECT_EQ( m->getElement( 2, 3 ), 33 );
+   EXPECT_EQ( m->getElement( 2, 4 ), 33 );
+   EXPECT_EQ( m->getElement( 2, 5 ), 33 );
+   EXPECT_EQ( m->getElement( 2, 6 ), 33 );
 }
 
 template< typename Matrix >
 void test_AddRow()
 {
-    using RealType = typename Matrix::RealType;
-    using DeviceType = typename Matrix::DeviceType;
-    using IndexType = typename Matrix::IndexType;
-/*
- * Sets up the following 6x5 dense matrix:
- *
- *    /  1  2  3  4  5 \
- *    |  6  7  8  9 10 |
- *    | 11 12 13 14 15 |
- *    | 16 17 18 19 20 |
- *    | 21 22 23 24 25 |
- *    \ 26 27 28 29 30 /
- */
-    const IndexType rows = 6;
-    const IndexType cols = 5;
-    
-    Matrix m;
-    m.reset();
-    m.setDimensions( rows, cols );
-    
-    RealType value = 1;
-    for( IndexType i = 0; i < rows; i++ )
-        for( IndexType j = 0; j < cols; j++ )
-            m.setElement( i, j, value++ );
-    
-    // Check the added elements
-    EXPECT_EQ( m.getElement( 0, 0 ),  1 );
-    EXPECT_EQ( m.getElement( 0, 1 ),  2 );
-    EXPECT_EQ( m.getElement( 0, 2 ),  3 );
-    EXPECT_EQ( m.getElement( 0, 3 ),  4 );
-    EXPECT_EQ( m.getElement( 0, 4 ),  5 );
-    
-    EXPECT_EQ( m.getElement( 1, 0 ),  6 );
-    EXPECT_EQ( m.getElement( 1, 1 ),  7 );
-    EXPECT_EQ( m.getElement( 1, 2 ),  8 );
-    EXPECT_EQ( m.getElement( 1, 3 ),  9 );
-    EXPECT_EQ( m.getElement( 1, 4 ), 10 );
-    
-    EXPECT_EQ( m.getElement( 2, 0 ), 11 );
-    EXPECT_EQ( m.getElement( 2, 1 ), 12 );
-    EXPECT_EQ( m.getElement( 2, 2 ), 13 );
-    EXPECT_EQ( m.getElement( 2, 3 ), 14 );
-    EXPECT_EQ( m.getElement( 2, 4 ), 15 );
-    
-    EXPECT_EQ( m.getElement( 3, 0 ), 16 );
-    EXPECT_EQ( m.getElement( 3, 1 ), 17 );
-    EXPECT_EQ( m.getElement( 3, 2 ), 18 );
-    EXPECT_EQ( m.getElement( 3, 3 ), 19 );
-    EXPECT_EQ( m.getElement( 3, 4 ), 20 );
-    
-    EXPECT_EQ( m.getElement( 4, 0 ), 21 );
-    EXPECT_EQ( m.getElement( 4, 1 ), 22 );
-    EXPECT_EQ( m.getElement( 4, 2 ), 23 );
-    EXPECT_EQ( m.getElement( 4, 3 ), 24 );
-    EXPECT_EQ( m.getElement( 4, 4 ), 25 );
-    
-    EXPECT_EQ( m.getElement( 5, 0 ), 26 );
-    EXPECT_EQ( m.getElement( 5, 1 ), 27 );
-    EXPECT_EQ( m.getElement( 5, 2 ), 28 );
-    EXPECT_EQ( m.getElement( 5, 3 ), 29 );
-    EXPECT_EQ( m.getElement( 5, 4 ), 30 );
-    
-    // Add new elements to the old elements with a multiplying factor applied to the old elements.
-/*
- * The following setup results in the following 6x5 sparse matrix:
- *
- *    /  3  6  9 12 15 \
- *    | 18 21 24 27 30 |
- *    | 33 36 39 42 45 |
- *    | 48 51 54 57 60 |
- *    | 63 66 69 72 75 |
- *    \ 78 81 84 87 90 /
- */
-    
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+   /*
+    * Sets up the following 6x5 dense matrix:
+    *
+    *    /  1  2  3  4  5 \
+    *    |  6  7  8  9 10 |
+    *    | 11 12 13 14 15 |
+    *    | 16 17 18 19 20 |
+    *    | 21 22 23 24 25 |
+    *    \ 26 27 28 29 30 /
+    */
+
+   const IndexType rows = 6;
+   const IndexType cols = 5;
+
+   TNL::Pointers::SharedPointer< Matrix > m( rows, cols );
+
+   RealType value = 1;
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < cols; j++ )
+         m->setElement( i, j, value++ );
+
+   // Check the added elements
+   EXPECT_EQ( m->getElement( 0, 0 ),  1 );
+   EXPECT_EQ( m->getElement( 0, 1 ),  2 );
+   EXPECT_EQ( m->getElement( 0, 2 ),  3 );
+   EXPECT_EQ( m->getElement( 0, 3 ),  4 );
+   EXPECT_EQ( m->getElement( 0, 4 ),  5 );
+
+   EXPECT_EQ( m->getElement( 1, 0 ),  6 );
+   EXPECT_EQ( m->getElement( 1, 1 ),  7 );
+   EXPECT_EQ( m->getElement( 1, 2 ),  8 );
+   EXPECT_EQ( m->getElement( 1, 3 ),  9 );
+   EXPECT_EQ( m->getElement( 1, 4 ), 10 );
+
+   EXPECT_EQ( m->getElement( 2, 0 ), 11 );
+   EXPECT_EQ( m->getElement( 2, 1 ), 12 );
+   EXPECT_EQ( m->getElement( 2, 2 ), 13 );
+   EXPECT_EQ( m->getElement( 2, 3 ), 14 );
+   EXPECT_EQ( m->getElement( 2, 4 ), 15 );
+
+   EXPECT_EQ( m->getElement( 3, 0 ), 16 );
+   EXPECT_EQ( m->getElement( 3, 1 ), 17 );
+   EXPECT_EQ( m->getElement( 3, 2 ), 18 );
+   EXPECT_EQ( m->getElement( 3, 3 ), 19 );
+   EXPECT_EQ( m->getElement( 3, 4 ), 20 );
+
+   EXPECT_EQ( m->getElement( 4, 0 ), 21 );
+   EXPECT_EQ( m->getElement( 4, 1 ), 22 );
+   EXPECT_EQ( m->getElement( 4, 2 ), 23 );
+   EXPECT_EQ( m->getElement( 4, 3 ), 24 );
+   EXPECT_EQ( m->getElement( 4, 4 ), 25 );
+
+   EXPECT_EQ( m->getElement( 5, 0 ), 26 );
+   EXPECT_EQ( m->getElement( 5, 1 ), 27 );
+   EXPECT_EQ( m->getElement( 5, 2 ), 28 );
+   EXPECT_EQ( m->getElement( 5, 3 ), 29 );
+   EXPECT_EQ( m->getElement( 5, 4 ), 30 );
+
+   // Add new elements to the old elements with a multiplying factor applied to the old elements.
+   /*
+    * The following setup results in the following 6x5 sparse matrix:
+    *
+    *    /  3  6  9 12 15 \
+    *    | 18 21 24 27 30 |
+    *    | 33 36 39 42 45 |
+    *    | 48 51 54 57 60 |
+    *    | 63 66 69 72 75 |
+    *    \ 78 81 84 87 90 /
+    */
+
     RealType row0 [ 5 ] = { 11, 11, 11, 11, 0 }; IndexType colIndexes0 [ 5 ] = { 0, 1, 2, 3, 4 };
     RealType row1 [ 5 ] = { 22, 22, 22, 22, 0 }; IndexType colIndexes1 [ 5 ] = { 0, 1, 2, 3, 4 };
     RealType row2 [ 5 ] = { 33, 33, 33, 33, 0 }; IndexType colIndexes2 [ 5 ] = { 0, 1, 2, 3, 4 };
@@ -662,42 +721,62 @@ void test_AddRow()
     m.addRow( row++, colIndexes3, row3, elements, thisRowMultiplicator++ );
     m.addRow( row++, colIndexes4, row4, elements, thisRowMultiplicator++ );
     m.addRow( row++, colIndexes5, row5, elements, thisRowMultiplicator++ );*/
-    
-    EXPECT_EQ( m.getElement( 0, 0 ),  11 );
-    EXPECT_EQ( m.getElement( 0, 1 ),  11 );
-    EXPECT_EQ( m.getElement( 0, 2 ),  11 );
-    EXPECT_EQ( m.getElement( 0, 3 ),  11 );
-    EXPECT_EQ( m.getElement( 0, 4 ),   0 );
-    
-    EXPECT_EQ( m.getElement( 1, 0 ),  28 );
-    EXPECT_EQ( m.getElement( 1, 1 ),  29 );
-    EXPECT_EQ( m.getElement( 1, 2 ),  30 );
-    EXPECT_EQ( m.getElement( 1, 3 ),  31 );
-    EXPECT_EQ( m.getElement( 1, 4 ),  10 );
-    
-    EXPECT_EQ( m.getElement( 2, 0 ),  55 );
-    EXPECT_EQ( m.getElement( 2, 1 ),  57 );
-    EXPECT_EQ( m.getElement( 2, 2 ),  59 );
-    EXPECT_EQ( m.getElement( 2, 3 ),  61 );
-    EXPECT_EQ( m.getElement( 2, 4 ),  30 );
-    
-    EXPECT_EQ( m.getElement( 3, 0 ),  92 );
-    EXPECT_EQ( m.getElement( 3, 1 ),  95 );
-    EXPECT_EQ( m.getElement( 3, 2 ),  98 );
-    EXPECT_EQ( m.getElement( 3, 3 ), 101 );
-    EXPECT_EQ( m.getElement( 3, 4 ),  60 );
-    
-    EXPECT_EQ( m.getElement( 4, 0 ), 139 );
-    EXPECT_EQ( m.getElement( 4, 1 ), 143 );
-    EXPECT_EQ( m.getElement( 4, 2 ), 147 );
-    EXPECT_EQ( m.getElement( 4, 3 ), 151 );
-    EXPECT_EQ( m.getElement( 4, 4 ), 100 );
-    
-    EXPECT_EQ( m.getElement( 5, 0 ), 196 );
-    EXPECT_EQ( m.getElement( 5, 1 ), 201 );
-    EXPECT_EQ( m.getElement( 5, 2 ), 206 );
-    EXPECT_EQ( m.getElement( 5, 3 ), 211 );
-    EXPECT_EQ( m.getElement( 5, 4 ), 150 );
+
+   Matrix* m_ptr = &m.template modifyData< DeviceType >();
+   auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
+      RealType values[ 6 ][ 5 ] {
+         { 11, 11, 11, 11, 0 },
+         { 22, 22, 22, 22, 0 },
+         { 33, 33, 33, 33, 0 },
+         { 44, 44, 44, 44, 0 },
+         { 55, 55, 55, 55, 0 },
+         { 66, 66, 66, 66, 0 } };
+      auto row = m_ptr->getRow( rowIdx );
+      for( IndexType i = 0; i < 5; i++ )
+      {
+         RealType& val = row.getValue( i );
+         val = rowIdx * val + values[ rowIdx ][ i ];
+      }
+   };
+   TNL::Pointers::synchronizeSmartPointersOnDevice< DeviceType >();
+   TNL::Algorithms::ParallelFor< DeviceType >::exec( 0, 6, f );
+
+    
+    EXPECT_EQ( m->getElement( 0, 0 ),  11 );
+    EXPECT_EQ( m->getElement( 0, 1 ),  11 );
+    EXPECT_EQ( m->getElement( 0, 2 ),  11 );
+    EXPECT_EQ( m->getElement( 0, 3 ),  11 );
+    EXPECT_EQ( m->getElement( 0, 4 ),   0 );
+    
+    EXPECT_EQ( m->getElement( 1, 0 ),  28 );
+    EXPECT_EQ( m->getElement( 1, 1 ),  29 );
+    EXPECT_EQ( m->getElement( 1, 2 ),  30 );
+    EXPECT_EQ( m->getElement( 1, 3 ),  31 );
+    EXPECT_EQ( m->getElement( 1, 4 ),  10 );
+    
+    EXPECT_EQ( m->getElement( 2, 0 ),  55 );
+    EXPECT_EQ( m->getElement( 2, 1 ),  57 );
+    EXPECT_EQ( m->getElement( 2, 2 ),  59 );
+    EXPECT_EQ( m->getElement( 2, 3 ),  61 );
+    EXPECT_EQ( m->getElement( 2, 4 ),  30 );
+    
+    EXPECT_EQ( m->getElement( 3, 0 ),  92 );
+    EXPECT_EQ( m->getElement( 3, 1 ),  95 );
+    EXPECT_EQ( m->getElement( 3, 2 ),  98 );
+    EXPECT_EQ( m->getElement( 3, 3 ), 101 );
+    EXPECT_EQ( m->getElement( 3, 4 ),  60 );
+    
+    EXPECT_EQ( m->getElement( 4, 0 ), 139 );
+    EXPECT_EQ( m->getElement( 4, 1 ), 143 );
+    EXPECT_EQ( m->getElement( 4, 2 ), 147 );
+    EXPECT_EQ( m->getElement( 4, 3 ), 151 );
+    EXPECT_EQ( m->getElement( 4, 4 ), 100 );
+    
+    EXPECT_EQ( m->getElement( 5, 0 ), 196 );
+    EXPECT_EQ( m->getElement( 5, 1 ), 201 );
+    EXPECT_EQ( m->getElement( 5, 2 ), 206 );
+    EXPECT_EQ( m->getElement( 5, 3 ), 211 );
+    EXPECT_EQ( m->getElement( 5, 4 ), 150 );
 }
 
 template< typename Matrix >
@@ -1352,12 +1431,12 @@ TYPED_TEST( MatrixTest, addRowTest )
     test_AddRow< MatrixType >();
 }
 
-TYPED_TEST( MatrixTest, vectorProductTest )
+/*TYPED_TEST( MatrixTest, vectorProductTest )
 {
     using MatrixType = typename TestFixture::MatrixType;
     
     test_VectorProduct< MatrixType >();
-}
+}*/
 
 TYPED_TEST( MatrixTest, addMatrixTest )
 {
@@ -1499,6 +1578,6 @@ TEST( DenseMatrixTest, Dense_performSORIterationTest_Cuda )
 }
 #endif
 
-#endif
+#endif // HAVE_GTEST
 
 #include "../main.h"
-- 
GitLab


From 7cb2f2d4f1321c96439265699c44d91ae2bb1981 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Fri, 3 Jan 2020 21:51:52 +0100
Subject: [PATCH 063/179] Restoring setCompressedRowLengthsTest unit test for
 sparse matrices.

---
 src/UnitTests/Matrices/SparseMatrixTest.hpp   | 15 ++++-----------
 .../Matrices/SparseMatrixTest_CSR_segments.h  | 19 ++++++-------------
 .../SparseMatrixTest_SlicedEllpack_segments.h | 19 ++++++-------------
 3 files changed, 16 insertions(+), 37 deletions(-)

diff --git a/src/UnitTests/Matrices/SparseMatrixTest.hpp b/src/UnitTests/Matrices/SparseMatrixTest.hpp
index df06d28fc..bf261aa84 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest.hpp
+++ b/src/UnitTests/Matrices/SparseMatrixTest.hpp
@@ -113,17 +113,10 @@ void test_SetCompressedRowLengths()
     for( IndexType i = 0; i < 8; i++ )      // 9th row
         m.setElement( 9, i, value++ );
 
-
-    EXPECT_EQ( m.getNonZeroRowLength( 0 ), 3 );
-    EXPECT_EQ( m.getNonZeroRowLength( 1 ), 3 );
-    EXPECT_EQ( m.getNonZeroRowLength( 2 ), 1 );
-    EXPECT_EQ( m.getNonZeroRowLength( 3 ), 2 );
-    EXPECT_EQ( m.getNonZeroRowLength( 4 ), 3 );
-    EXPECT_EQ( m.getNonZeroRowLength( 5 ), 4 );
-    EXPECT_EQ( m.getNonZeroRowLength( 6 ), 5 );
-    EXPECT_EQ( m.getNonZeroRowLength( 7 ), 6 );
-    EXPECT_EQ( m.getNonZeroRowLength( 8 ), 7 );
-    EXPECT_EQ( m.getNonZeroRowLength( 9 ), 8 );
+   rowLengths = 0;
+   m.getCompressedRowLengths( rowLengths );
+   typename Matrix::CompressedRowLengthsVector correctRowLengths{ 3, 3, 1, 2, 3, 4, 5, 6, 7, 8 };
+   EXPECT_EQ( rowLengths, correctRowLengths );
 }
 
 template< typename Matrix1, typename Matrix2 >
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h
index 34f7b4f70..5ac3dde26 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h
+++ b/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h
@@ -66,19 +66,12 @@ TYPED_TEST( CSRMatrixTest, setDimensionsTest )
     test_SetDimensions< CSRMatrixType >();
 }
 
-//TYPED_TEST( CSRMatrixTest, setCompressedRowLengthsTest )
-//{
-////    using CSRMatrixType = typename TestFixture::CSRMatrixType;
-//
-////    test_SetCompressedRowLengths< CSRMatrixType >();
-//
-//    bool testRan = false;
-//    EXPECT_TRUE( testRan );
-//    std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n";
-//    std::cout << "      This test is dependent on the input format. \n";
-//    std::cout << "      Almost every format allocates elements per row differently.\n\n";
-//    std::cout << "\n    TODO: Finish implementation of getNonZeroRowLength (Only non-zero elements, not the number of allocated elements.)\n\n";
-//}
+TYPED_TEST( CSRMatrixTest, setCompressedRowLengthsTest )
+{
+    using CSRMatrixType = typename TestFixture::CSRMatrixType;
+
+    test_SetCompressedRowLengths< CSRMatrixType >();
+}
 
 TYPED_TEST( CSRMatrixTest, setLikeTest )
 {
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h
index 8eba34a2b..de5356f3a 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h
+++ b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h
@@ -78,19 +78,12 @@ TYPED_TEST( SlicedEllpackMatrixTest, setDimensionsTest )
     test_SetDimensions< SlicedEllpackMatrixType >();
 }
 
-//TYPED_TEST( SlicedEllpackMatrixTest, setCompressedRowLengthsTest )
-//{
-////    using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
-//
-////    test_SetCompressedRowLengths< SlicedEllpackMatrixType >();
-//
-//    bool testRan = false;
-//    EXPECT_TRUE( testRan );
-//    std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n";
-//    std::cout << "      This test is dependent on the input format. \n";
-//    std::cout << "      Almost every format allocates elements per row differently.\n\n";
-//    std::cout << "\n    TODO: Finish implementation of getNonZeroRowLength (Only non-zero elements, not the number of allocated elements.)\n\n";
-//}
+TYPED_TEST( SlicedEllpackMatrixTest, setCompressedRowLengthsTest )
+{
+   using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
+
+   test_SetCompressedRowLengths< SlicedEllpackMatrixType >();
+}
 
 TYPED_TEST( SlicedEllpackMatrixTest, setLikeTest )
 {
-- 
GitLab


From 5043ac64c3847dc5eb1a8e00f54e5dcf52bd403d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Fri, 3 Jan 2020 22:03:56 +0100
Subject: [PATCH 064/179] Restoring dense matrix unit tests.

---
 src/UnitTests/Matrices/CMakeLists.txt    | 3 +--
 src/UnitTests/Matrices/DenseMatrixTest.h | 3 ++-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/UnitTests/Matrices/CMakeLists.txt b/src/UnitTests/Matrices/CMakeLists.txt
index 668e272df..e4616f23b 100644
--- a/src/UnitTests/Matrices/CMakeLists.txt
+++ b/src/UnitTests/Matrices/CMakeLists.txt
@@ -55,8 +55,7 @@ ADD_TEST( SparseMatrixCopyTest ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixCopyTest${C
 ADD_TEST( SparseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} )
 # TODO: Uncomment the following when AdEllpack works
 #ADD_TEST( SparseMatrixTest_AdEllpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_AdEllpack${CMAKE_EXECUTABLE_SUFFIX} )
-# TODO: DenseMatrixTest is not finished
-#ADD_TEST( DenseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/DenseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} )
+ADD_TEST( DenseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/DenseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} )
 
 ####
 # Segments tests
diff --git a/src/UnitTests/Matrices/DenseMatrixTest.h b/src/UnitTests/Matrices/DenseMatrixTest.h
index 2184e6360..c7ada1240 100644
--- a/src/UnitTests/Matrices/DenseMatrixTest.h
+++ b/src/UnitTests/Matrices/DenseMatrixTest.h
@@ -585,6 +585,7 @@ void test_SetRow()
       for( IndexType j = 0; j < cols; j++ )
          m->setElement( i, j, value++ );
 
+   // TODO: replace this with dense matrix view
    Matrix* m_ptr = &m.template modifyData< DeviceType >();
    auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
       RealType values[ 3 ][ 5 ] {
@@ -597,7 +598,7 @@ void test_SetRow()
          { 2, 3, 4, 5, 6 } };
       auto row = m_ptr->getRow( rowIdx );
       for( IndexType i = 0; i < 5; i++ )
-         row.setElement( columnIndexes[ rowIdx ][ i ], values[ rowIdx ][ i ] );
+      /   row.setElement( rowIdx, i ); //columnIndexes[ rowIdx ][ i ], values[ rowIdx ][ i ] );
    };
    TNL::Pointers::synchronizeSmartPointersOnDevice< DeviceType >();
    TNL::Algorithms::ParallelFor< DeviceType >::exec( 0, 3, f );
-- 
GitLab


From 7049c029b5218291c7cca95ad9b5d93fa44745e9 Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Sat, 4 Jan 2020 22:10:15 +0100
Subject: [PATCH 065/179] Added DenseMatrixView.

---
 src/TNL/Matrices/Dense.h                 |    7 +
 src/TNL/Matrices/Dense.hpp               |   34 +-
 src/TNL/Matrices/DenseMatrixView.h       |  207 +++++
 src/TNL/Matrices/DenseMatrixView.hpp     | 1068 ++++++++++++++++++++++
 src/UnitTests/Matrices/DenseMatrixTest.h |    4 +-
 5 files changed, 1317 insertions(+), 3 deletions(-)
 create mode 100644 src/TNL/Matrices/DenseMatrixView.h
 create mode 100644 src/TNL/Matrices/DenseMatrixView.hpp

diff --git a/src/TNL/Matrices/Dense.h b/src/TNL/Matrices/Dense.h
index 51308280d..18249a7b1 100644
--- a/src/TNL/Matrices/Dense.h
+++ b/src/TNL/Matrices/Dense.h
@@ -14,6 +14,7 @@
 #include <TNL/Devices/Host.h>
 #include <TNL/Matrices/DenseMatrixRowView.h>
 #include <TNL/Matrices/Matrix.h>
+#include <TNL/Matrices/DenseMatrixView.h>
 #include <TNL/Containers/Segments/Ellpack.h>
 
 namespace TNL {
@@ -47,6 +48,8 @@ class Dense : public Matrix< Real, Device, Index >
       using ValuesViewType = typename ValuesType::ViewType;
       using SegmentsType = Containers::Segments::Ellpack< DeviceType, IndexType, typename Allocators::Default< Device >::template Allocator< IndexType >, RowMajorOrder, 1 >;
       using SegmentViewType = typename SegmentsType::SegmentViewType;
+      using ViewType = DenseMatrixView< Real, Device, Index, MatrixType, SegmentsViewTemplate >;
+      using ConstViewType = DenseMatrixView< typename std::add_const< Real >::type, Device, Index, MatrixType, SegmentsViewTemplate >;
       using RowView = DenseMatrixRowView< SegmentViewType, ValuesViewType >;
 
       // TODO: remove this
@@ -61,6 +64,10 @@ class Dense : public Matrix< Real, Device, Index >
       Dense();
 
       Dense( const IndexType rows, const IndexType columns );
+      
+      ViewType getView();
+
+      ConstViewType getConstView() const;
 
       static String getSerializationType();
 
diff --git a/src/TNL/Matrices/Dense.hpp b/src/TNL/Matrices/Dense.hpp
index ebf2c03b9..85f1b560d 100644
--- a/src/TNL/Matrices/Dense.hpp
+++ b/src/TNL/Matrices/Dense.hpp
@@ -1,5 +1,5 @@
 /***************************************************************************
-                          Dense_impl.h  -  description
+                          Dense.hpp  -  description
                              -------------------
     begin                : Nov 29, 2013
     copyright            : (C) 2013 by Tomas Oberhuber
@@ -37,6 +37,38 @@ Dense( const IndexType rows, const IndexType columns )
    this->setDimensions( rows, columns );
 }
 
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          template< typename, typename, typename > class Segments,
+          typename RealAllocator >
+auto
+Dense< Real, Device, Index, RowMajorOrder, Segments, RealAllocator >::
+getView() -> ViewType
+{
+   return ViewType( this->getRows(), 
+                    this->getColumns(),
+                    this->getValues().getView(),
+                    this->segments.getView() );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          template< typename, typename, typename > class Segments,
+          typename RealAllocator >
+auto
+Dense< Real, Device, Index, RowMajorOrder, Segments, RealAllocator >::
+getConstView() const -> ConstViewType
+{
+   return ConstViewType( this->getRows(),
+                         this->getColumns(),
+                         this->getValues().getConstView(),
+                         this->segments.getConstView() );
+}
+
 template< typename Real,
           typename Device,
           typename Index,
diff --git a/src/TNL/Matrices/DenseMatrixView.h b/src/TNL/Matrices/DenseMatrixView.h
new file mode 100644
index 000000000..2334eb636
--- /dev/null
+++ b/src/TNL/Matrices/DenseMatrixView.h
@@ -0,0 +1,207 @@
+/***************************************************************************
+                          DenseMatrixView.h  -  description
+                             -------------------
+    begin                : Nov 29, 2013
+    copyright            : (C) 2013 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Allocators/Default.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Matrices/DenseMatrixRowView.h>
+#include <TNL/Matrices/MatrixView.h>
+#include <TNL/Containers/Segments/EllpackView.h>
+
+namespace TNL {
+namespace Matrices {
+
+//template< typename Device >
+//class DenseDeviceDependentCode;
+
+template< typename Real = double,
+          typename Device = Devices::Host,
+          typename Index = int,
+          bool RowMajorOrder = std::is_same< Device, Devices::Host >::value >
+class DenseMatrixView : public MatrixView< Real, Device, Index >
+{
+   private:
+      // convenient template alias for controlling the selection of copy-assignment operator
+      template< typename Device2 >
+      using Enabler = std::enable_if< ! std::is_same< Device2, Device >::value >;
+
+      // friend class will be needed for templated assignment operators
+      //template< typename Real2, typename Device2, typename Index2 >
+      //friend class Dense;
+
+   public:
+      using RealType = Real;
+      using DeviceType = Device;
+      using IndexType = Index;
+      using BaseType = Matrix< Real, Device, Index >;
+      using ValuesType = typename BaseType::ValuesVector;
+      using ValuesViewType = typename ValuesType::ViewType;
+      using SegmentsType = Containers::Segments::Ellpack< DeviceType, IndexType, typename Allocators::Default< Device >::template Allocator< IndexType >, RowMajorOrder, 1 >;
+      using SegmentsViewType = typename SegmentsType::ViewType;
+      using SegmentViewType = typename SegmentsType::SegmentViewType;
+      using RowView = DenseMatrixRowView< SegmentViewType, ValuesViewType >;
+
+      // TODO: remove this
+      using CompressedRowLengthsVector = typename Matrix< Real, Device, Index >::CompressedRowLengthsVector;
+      using ConstCompressedRowLengthsVectorView = typename Matrix< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView;
+
+      template< typename _Real = Real,
+                typename _Device = Device,
+                typename _Index = Index >
+      using Self = Dense< _Real, _Device, _Index >;
+
+      __cuda_callable__
+      DenseMatrixView();
+
+      __cuda_callable__
+      DenseMatrixView( const IndexType rows,
+                       const IndexType columns,
+                       const ValuesViewType& values,
+                       const SegmentsViewType& segments );
+
+      __cuda_callable__
+      DenseMatrixView( const DenseMatrixView& m ) = default;
+
+      __cuda_callable__
+      ViewType getView();
+
+      __cuda_callable__
+      ConstViewType getConstView() const;
+
+      static String getSerializationType();
+
+      virtual String getSerializationTypeVirtual() const;
+
+      template< typename Vector >
+      void getCompressedRowLengths( Vector& rowLengths ) const;
+
+      [[deprecated]]
+      IndexType getRowLength( const IndexType row ) const;
+
+      IndexType getMaxRowLength() const;
+
+      IndexType getNumberOfMatrixElements() const;
+
+      IndexType getNumberOfNonzeroMatrixElements() const;
+
+      void reset();
+
+      __cuda_callable__
+      const RowView getRow( const IndexType& rowIdx ) const;
+
+      __cuda_callable__
+      RowView getRow( const IndexType& rowIdx );
+
+
+      void setValue( const RealType& v );
+
+      __cuda_callable__
+      Real& operator()( const IndexType row,
+                        const IndexType column );
+
+      __cuda_callable__
+      const Real& operator()( const IndexType row,
+                              const IndexType column ) const;
+
+      bool setElement( const IndexType row,
+                       const IndexType column,
+                       const RealType& value );
+
+      bool addElement( const IndexType row,
+                       const IndexType column,
+                       const RealType& value,
+                       const RealType& thisElementMultiplicator = 1.0 );
+
+      Real getElement( const IndexType row,
+                       const IndexType column ) const;
+
+      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+      void rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
+
+      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+      void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
+
+      template< typename Function >
+      void forRows( IndexType first, IndexType last, Function& function ) const;
+
+      template< typename Function >
+      void forRows( IndexType first, IndexType last, Function& function );
+
+      template< typename Function >
+      void forAllRows( Function& function ) const;
+
+      template< typename Function >
+      void forAllRows( Function& function );
+
+      template< typename Vector >
+      __cuda_callable__
+      typename Vector::RealType rowVectorProduct( const IndexType row,
+                                                  const Vector& vector ) const;
+
+      template< typename InVector, typename OutVector >
+      void vectorProduct( const InVector& inVector,
+                          OutVector& outVector ) const;
+
+      template< typename Matrix >
+      void addMatrix( const Matrix& matrix,
+                      const RealType& matrixMultiplicator = 1.0,
+                      const RealType& thisMatrixMultiplicator = 1.0 );
+
+      template< typename Matrix1, typename Matrix2, int tileDim = 32 >
+      void getMatrixProduct( const Matrix1& matrix1,
+                          const Matrix2& matrix2,
+                          const RealType& matrix1Multiplicator = 1.0,
+                          const RealType& matrix2Multiplicator = 1.0 );
+
+      template< typename Matrix, int tileDim = 32 >
+      void getTransposition( const Matrix& matrix,
+                             const RealType& matrixMultiplicator = 1.0 );
+
+      template< typename Vector1, typename Vector2 >
+      void performSORIteration( const Vector1& b,
+                                const IndexType row,
+                                Vector2& x,
+                                const RealType& omega = 1.0 ) const;
+
+      // copy assignment
+      Dense& operator=( const Dense& matrix );
+
+      // cross-device copy assignment
+      template< typename Real2, typename Device2, typename Index2,
+                typename = typename Enabler< Device2 >::type >
+      Dense& operator=( const Dense< Real2, Device2, Index2 >& matrix );
+
+      void save( const String& fileName ) const;
+
+      void load( const String& fileName );
+
+      void save( File& file ) const;
+
+      void load( File& file );
+
+      void print( std::ostream& str ) const;
+
+   protected:
+
+      __cuda_callable__
+      IndexType getElementIndex( const IndexType row,
+                                 const IndexType column ) const;
+
+      typedef DenseDeviceDependentCode< DeviceType > DeviceDependentCode;
+      friend class DenseDeviceDependentCode< DeviceType >;
+
+      SegmentsViewType segments;
+};
+
+} // namespace Matrices
+} // namespace TNL
+
+#include <TNL/Matrices/DenseMatrixView.hpp>
diff --git a/src/TNL/Matrices/DenseMatrixView.hpp b/src/TNL/Matrices/DenseMatrixView.hpp
new file mode 100644
index 000000000..18d6574ac
--- /dev/null
+++ b/src/TNL/Matrices/DenseMatrixView.hpp
@@ -0,0 +1,1068 @@
+/***************************************************************************
+                          DenseMatrixView.hpp  -  description
+                             -------------------
+    begin                : Nov 29, 2013
+    copyright            : (C) 2013 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Assert.h>
+#include <TNL/Matrices/Dense.h>
+#include <TNL/Exceptions/NotImplementedError.h>
+
+namespace TNL {
+namespace Matrices {   
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+__cuda_callable__
+DenseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+DenseMatrixView()
+{
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+__cuda_callable__
+DenseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+DenseMatrixView( const IndexType rows,
+                  const IndexType columns,
+                  const ValuesViewType& values,
+                  const ColumnsIndexesViewType& columnIndexes,
+                  const SegmentsViewType& segments )
+ : MatrixView< Real, Device, Index >( rows, columns, values ), columnIndexes( columnIndexes ), segments( segments )
+{
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+__cuda_callable__
+auto
+DenseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+getView() -> ViewType
+{
+   return ViewType( this->getRows(), 
+                    this->getColumns(),
+                    this->getValues().getView(),
+                    this->columnIndexes.getView(),
+                    this->segments.getView() );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+__cuda_callable__
+auto
+DenseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+getConstView() const -> ConstViewType
+{
+   return ConstViewType( this->getRows(),
+                         this->getColumns(),
+                         this->getValues().getConstView(),
+                         this->getColumnsIndexes().getConstView(),
+                         this->segments.getConstView() );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+String
+DenseMatrixView< Real, Device, Index, RowMajorOrder >::
+getSerializationType()
+{
+   return String( "Matrices::Dense< " ) +
+          getType< RealType >() + ", " +
+          getType< Device >() + ", " +
+          getType< IndexType >() + " >";
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+String
+DenseMatrixView< Real, Device, Index, RowMajorOrder >::
+getSerializationTypeVirtual() const
+{
+   return this->getSerializationType();
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+   template< typename Vector >
+void
+DenseMatrixView< Real, Device, Index, RowMajorOrder >::
+getCompressedRowLengths( Vector& rowLengths ) const
+{
+   rowLengths.setSize( this->getRows() );
+   rowLengths = 0;
+   auto rowLengths_view = rowLengths.getView();
+   auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, const RealType& value ) -> IndexType {
+      return ( value != 0.0 );
+   };
+   auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) {
+      aux += a;
+   };
+   auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable {
+      rowLengths_view[ rowIdx ] = value;
+   };
+   this->allRowsReduction( fetch, reduce, keep, 0 );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+Index DenseMatrixView< Real, Device, Index, RowMajorOrder >::getRowLength( const IndexType row ) const
+{
+   return this->getColumns();
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+Index DenseMatrixView< Real, Device, Index, RowMajorOrder >::getMaxRowLength() const
+{
+   return this->getColumns();
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+Index DenseMatrixView< Real, Device, Index, RowMajorOrder >::getNumberOfMatrixElements() const
+{
+   return this->getRows() * this->getColumns();
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+Index DenseMatrixView< Real, Device, Index, RowMajorOrder >::getNumberOfNonzeroMatrixElements() const
+{
+   const auto values_view = this->values.getConstView();
+   auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType {
+      return ( values_view[ i ] != 0.0 );
+   };
+   return Algorithms::Reduction< DeviceType >::reduce( this->values.getSize(), std::plus<>{}, fetch, 0 );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+void DenseMatrixView< Real, Device, Index, RowMajorOrder >::reset()
+{
+   Matrix< Real, Device, Index >::reset();
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+void DenseMatrixView< Real, Device, Index, RowMajorOrder >::setValue( const Real& value )
+{
+   this->values = value;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+__cuda_callable__ auto
+DenseMatrixView< Real, Device, Index, RowMajorOrder >::
+getRow( const IndexType& rowIdx ) const -> const RowView
+{
+   TNL_ASSERT_LT( rowIdx, this->getRows(), "Row index is larger than number of matrix rows." );
+   return RowView( this->segments.getSegmentView( rowIdx ), this->values.getView() );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+__cuda_callable__ auto
+DenseMatrixView< Real, Device, Index, RowMajorOrder >::
+getRow( const IndexType& rowIdx ) -> RowView
+{
+   TNL_ASSERT_LT( rowIdx, this->getRows(), "Row index is larger than number of matrix rows." );
+   return RowView( this->segments.getSegmentView( rowIdx ), this->values.getView() );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+__cuda_callable__
+Real& DenseMatrixView< Real, Device, Index, RowMajorOrder >::operator()( const IndexType row,
+                                                const IndexType column )
+{
+   TNL_ASSERT_GE( row, 0, "Row index must be non-negative." );
+   TNL_ASSERT_LT( row, this->getRows(), "Row index is out of bounds." );
+   TNL_ASSERT_GE( column, 0, "Column index must be non-negative." );
+   TNL_ASSERT_LT( column, this->getColumns(), "Column index is out of bounds." );
+
+   return this->values.operator[]( this->getElementIndex( row, column ) );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+__cuda_callable__
+const Real& DenseMatrixView< Real, Device, Index, RowMajorOrder >::operator()( const IndexType row,
+                                                      const IndexType column ) const
+{
+   TNL_ASSERT_GE( row, 0, "Row index must be non-negative." );
+   TNL_ASSERT_LT( row, this->getRows(), "Row index is out of bounds." );
+   TNL_ASSERT_GE( column, 0, "Column index must be non-negative." );
+   TNL_ASSERT_LT( column, this->getColumns(), "Column index is out of bounds." );
+
+   return this->values.operator[]( this->getElementIndex( row, column ) );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+bool DenseMatrixView< Real, Device, Index, RowMajorOrder >::setElement( const IndexType row,
+                                               const IndexType column,
+                                               const RealType& value )
+{
+   this->values.setElement( this->getElementIndex( row, column ), value );
+   return true;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+bool DenseMatrixView< Real, Device, Index, RowMajorOrder >::addElement( const IndexType row,
+                                                        const IndexType column,
+                                                        const RealType& value,
+                                                        const RealType& thisElementMultiplicator )
+{
+   const IndexType elementIndex = this->getElementIndex( row, column );
+   if( thisElementMultiplicator == 1.0 )
+      this->values.setElement( elementIndex,
+                               this->values.getElement( elementIndex ) + value );
+   else
+      this->values.setElement( elementIndex,
+                               thisElementMultiplicator * this->values.getElement( elementIndex ) + value );
+   return true;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+Real 
+DenseMatrixView< Real, Device, Index, RowMajorOrder >::
+getElement( const IndexType row,
+            const IndexType column ) const
+{
+   return this->values.getElement( this->getElementIndex( row, column ) );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+   template< typename Fetch, typename Reduce, typename Keep, typename FetchValue >
+void
+DenseMatrixView< Real, Device, Index, RowMajorOrder >::
+rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchValue& zero ) const
+{
+   const auto values_view = this->values.getConstView();
+   auto fetch_ = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx, bool& compute ) mutable -> decltype( fetch( IndexType(), IndexType(), RealType() ) ) {
+         return fetch( rowIdx, columnIdx, values_view[ globalIdx ] );
+      return zero;
+   };
+   this->segments.segmentsReduction( first, last, fetch_, reduce, keep, zero );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+   template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+void
+DenseMatrixView< Real, Device, Index, RowMajorOrder >::
+allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const
+{
+   this->rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+   template< typename Function >
+void
+DenseMatrixView< Real, Device, Index, RowMajorOrder >::
+forRows( IndexType first, IndexType last, Function& function ) const
+{
+   const auto values_view = this->values.getConstView();
+   auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx ) mutable -> bool {
+      function( rowIdx, columnIdx, values_view[ globalIdx ] );
+      return true;
+   };
+   this->segments.forSegments( first, last, f );
+
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+   template< typename Function >
+void
+DenseMatrixView< Real, Device, Index, RowMajorOrder >::
+forRows( IndexType first, IndexType last, Function& function )
+{
+   auto values_view = this->values.getView();
+   auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx ) mutable -> bool {
+      function( rowIdx, columnIdx, values_view[ globalIdx ] );
+      return true;
+   };
+   this->segments.forSegments( first, last, f );
+
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+   template< typename Function >
+void
+DenseMatrixView< Real, Device, Index, RowMajorOrder >::
+forAllRows( Function& function ) const
+{
+   this->forRows( 0, this->getRows(), function );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+   template< typename Function >
+void
+DenseMatrixView< Real, Device, Index, RowMajorOrder >::
+forAllRows( Function& function )
+{
+   this->forRows( 0, this->getRows(), function );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+   template< typename Vector >
+__cuda_callable__
+typename Vector::RealType DenseMatrixView< Real, Device, Index, RowMajorOrder >::rowVectorProduct( const IndexType row,
+                                                                                   const Vector& vector ) const
+{
+   RealType sum( 0.0 );
+   // TODO: Fix this
+   //for( IndexType column = 0; column < this->getColumns(); column++ )
+   //   sum += this->getElementFast( row, column ) * vector[ column ];
+   return sum;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+   template< typename InVector,
+             typename OutVector >
+void DenseMatrixView< Real, Device, Index, RowMajorOrder >::vectorProduct( const InVector& inVector,
+                                                           OutVector& outVector ) const
+{
+   TNL_ASSERT( this->getColumns() == inVector.getSize(),
+            std::cerr << "Matrix columns: " << this->getColumns() << std::endl
+                 << "Vector size: " << inVector.getSize() << std::endl );
+   TNL_ASSERT( this->getRows() == outVector.getSize(),
+               std::cerr << "Matrix rows: " << this->getRows() << std::endl
+                    << "Vector size: " << outVector.getSize() << std::endl );
+
+   DeviceDependentCode::vectorProduct( *this, inVector, outVector );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+   template< typename Matrix >
+void DenseMatrixView< Real, Device, Index, RowMajorOrder >::addMatrix( const Matrix& matrix,
+                                              const RealType& matrixMultiplicator,
+                                              const RealType& thisMatrixMultiplicator )
+{
+   TNL_ASSERT( this->getColumns() == matrix.getColumns() &&
+              this->getRows() == matrix.getRows(),
+            std::cerr << "This matrix columns: " << this->getColumns() << std::endl
+                 << "This matrix rows: " << this->getRows() << std::endl
+                 << "That matrix columns: " << matrix.getColumns() << std::endl
+                 << "That matrix rows: " << matrix.getRows() << std::endl );
+
+   if( thisMatrixMultiplicator == 1.0 )
+      this->values += matrixMultiplicator * matrix.values;
+   else
+      this->values = thisMatrixMultiplicator * this->values + matrixMultiplicator * matrix.values;
+}
+
+#ifdef HAVE_CUDA
+template< typename Real,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename Matrix1,
+          typename Matrix2,
+          int tileDim,
+          int tileRowBlockSize >
+__global__ void DenseMatrixProductKernel( Dense< Real, Devices::Cuda, Index >* resultMatrix,
+                                                   const Matrix1* matrixA,
+                                                   const Matrix2* matrixB,
+                                                   const Real matrixAMultiplicator,
+                                                   const Real matrixBMultiplicator,
+                                                   const Index gridIdx_x,
+                                                   const Index gridIdx_y )
+{
+   /****
+    * Here we compute product C = A * B. To profit from the fast
+    * shared memory we do it by tiles.
+    */
+
+   typedef Index IndexType;
+   typedef Real RealType;
+   __shared__ Real tileA[ tileDim*tileDim ];
+   __shared__ Real tileB[ tileDim*tileDim ];
+   __shared__ Real tileC[ tileDim*tileDim ];
+
+   const IndexType& matrixARows = matrixA->getRows();
+   const IndexType& matrixAColumns = matrixA->getColumns();
+   const IndexType& matrixBRows = matrixB->getRows();
+   const IndexType& matrixBColumns = matrixB->getColumns();
+
+   /****
+    * Reset the tile C
+    */
+   for( IndexType row = 0; row < tileDim; row += tileRowBlockSize )
+      tileC[ ( row + threadIdx.y )*tileDim + threadIdx.x ] = 0.0;
+
+   /****
+    * Compute the result tile coordinates
+    */
+   const IndexType resultTileRow = ( gridIdx_y*gridDim.y + blockIdx.y )*tileDim;
+   const IndexType resultTileColumn = ( gridIdx_x*gridDim.x + blockIdx.x )*tileDim;
+
+   /****
+    * Sum over the matrix tiles
+    */
+   for( IndexType i = 0; i < matrixAColumns; i += tileDim )
+   {
+      for( IndexType row = 0; row < tileDim; row += tileRowBlockSize )
+      {
+         const IndexType matrixARow = resultTileRow + threadIdx.y + row;
+         const IndexType matrixAColumn = i + threadIdx.x;
+         if( matrixARow < matrixARows && matrixAColumn < matrixAColumns )
+            tileA[ (threadIdx.y + row)*tileDim + threadIdx.x ] =
+               matrixAMultiplicator * matrixA->getElementFast( matrixARow,  matrixAColumn );
+
+         const IndexType matrixBRow = i + threadIdx.y + row;
+         const IndexType matrixBColumn = resultTileColumn + threadIdx.x;
+         if( matrixBRow < matrixBRows && matrixBColumn < matrixBColumns )
+            tileB[ (threadIdx.y + row)*tileDim + threadIdx.x ] =
+               matrixBMultiplicator * matrixB->getElementFast( matrixBRow, matrixBColumn );
+      }
+      __syncthreads();
+
+      const IndexType tileALastRow = tnlCudaMin( tileDim, matrixARows - resultTileRow );
+      const IndexType tileALastColumn = tnlCudaMin( tileDim, matrixAColumns - i );
+      const IndexType tileBLastRow = tnlCudaMin( tileDim, matrixBRows - i );
+      const IndexType tileBLastColumn =
+         tnlCudaMin( tileDim, matrixBColumns - resultTileColumn );
+
+      for( IndexType row = 0; row < tileALastRow; row += tileRowBlockSize )
+      {
+         RealType sum( 0.0 );
+         for( IndexType j = 0; j < tileALastColumn; j++ )
+            sum += tileA[ ( threadIdx.y + row )*tileDim + j ]*
+                      tileB[ j*tileDim + threadIdx.x ];
+         tileC[ ( row + threadIdx.y )*tileDim + threadIdx.x ] += sum;
+      }
+      __syncthreads();
+   }
+
+   /****
+    * Write the result tile to the result matrix
+    */
+   const IndexType& matrixCRows = resultMatrix->getRows();
+   const IndexType& matrixCColumns = resultMatrix->getColumns();
+   for( IndexType row = 0; row < tileDim; row += tileRowBlockSize )
+   {
+      const IndexType matrixCRow = resultTileRow + row + threadIdx.y;
+      const IndexType matrixCColumn = resultTileColumn + threadIdx.x;
+      if( matrixCRow < matrixCRows && matrixCColumn < matrixCColumns )
+         resultMatrix->setElementFast( matrixCRow,
+                                       matrixCColumn,
+                                       tileC[ ( row + threadIdx.y )*tileDim + threadIdx.x ] );
+   }
+
+}
+#endif
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+   template< typename Matrix1, typename Matrix2, int tileDim >
+void DenseMatrixView< Real, Device, Index, RowMajorOrder >::getMatrixProduct( const Matrix1& matrix1,
+                                                              const Matrix2& matrix2,
+                                                              const RealType& matrix1Multiplicator,
+                                                              const RealType& matrix2Multiplicator )
+{
+   TNL_ASSERT( matrix1.getColumns() == matrix2.getRows() &&
+              this->getRows() == matrix1.getRows() &&
+              this->getColumns() == matrix2.getColumns(),
+            std::cerr << "This matrix columns: " << this->getColumns() << std::endl
+                 << "This matrix rows: " << this->getRows() << std::endl
+                 << "Matrix1 columns: " << matrix1.getColumns() << std::endl
+                 << "Matrix1 rows: " << matrix1.getRows() << std::endl
+                 << "Matrix2 columns: " << matrix2.getColumns() << std::endl
+                 << "Matrix2 rows: " << matrix2.getRows() << std::endl );
+
+   if( std::is_same< Device, Devices::Host >::value )
+      for( IndexType i = 0; i < this->getRows(); i += tileDim )
+         for( IndexType j = 0; j < this->getColumns(); j += tileDim )
+         {
+            const IndexType tileRows = min( tileDim, this->getRows() - i );
+            const IndexType tileColumns = min( tileDim, this->getColumns() - j );
+            for( IndexType i1 = i; i1 < i + tileRows; i1++ )
+               for( IndexType j1 = j; j1 < j + tileColumns; j1++ )
+                  this->setElementFast( i1, j1, 0.0 );
+
+            for( IndexType k = 0; k < matrix1.getColumns(); k += tileDim )
+            {
+               const IndexType lastK = min( k + tileDim, matrix1.getColumns() );
+               for( IndexType i1 = 0; i1 < tileRows; i1++ )
+                  for( IndexType j1 = 0; j1 < tileColumns; j1++ )
+                     for( IndexType k1 = k; k1 < lastK; k1++ )
+                        this->addElementFast( i + i1, j + j1,
+                            matrix1.getElementFast( i + i1, k1 ) * matrix2.getElementFast( k1, j + j1 ) );
+            }
+         }
+   if( std::is_same< Device, Devices::Cuda >::value )
+   {
+#ifdef HAVE_CUDA
+      dim3 cudaBlockSize( 0 ), cudaGridSize( 0 );
+      const IndexType matrixProductCudaBlockSize( 256 );
+      const IndexType rowTiles = roundUpDivision( this->getRows(), tileDim );
+      const IndexType columnTiles = roundUpDivision( this->getColumns(), tileDim );
+      const IndexType cudaBlockColumns( tileDim );
+      const IndexType cudaBlockRows( matrixProductCudaBlockSize / tileDim );
+      cudaBlockSize.x = cudaBlockColumns;
+      cudaBlockSize.y = cudaBlockRows;
+      const IndexType rowGrids = roundUpDivision( rowTiles, Cuda::getMaxGridSize() );
+      const IndexType columnGrids = roundUpDivision( columnTiles, Cuda::getMaxGridSize() );
+
+      for( IndexType gridIdx_x = 0; gridIdx_x < columnGrids; gridIdx_x++ )
+         for( IndexType gridIdx_y = 0; gridIdx_y < rowGrids; gridIdx_y++ )
+         {
+            cudaGridSize.x = cudaGridSize.y = Cuda::getMaxGridSize();
+            if( gridIdx_x == columnGrids - 1 )
+               cudaGridSize.x = columnTiles % Cuda::getMaxGridSize();
+            if( gridIdx_y == rowGrids - 1 )
+               cudaGridSize.y = rowTiles % Cuda::getMaxGridSize();
+            Dense* this_kernel = Cuda::passToDevice( *this );
+            Matrix1* matrix1_kernel = Cuda::passToDevice( matrix1 );
+            Matrix2* matrix2_kernel = Cuda::passToDevice( matrix2 );
+            DenseMatrixProductKernel< Real,
+                                               Index,
+                                               Matrix1,
+                                               Matrix2,
+                                               tileDim,
+                                               cudaBlockRows >
+                                           <<< cudaGridSize,
+                                               cudaBlockSize,
+                                               3*tileDim*tileDim >>>
+                                             ( this_kernel,
+                                               matrix1_kernel,
+                                               matrix2_kernel,
+                                               matrix1Multiplicator,
+                                               matrix2Multiplicator,
+                                               gridIdx_x,
+                                               gridIdx_y );
+            Cuda::freeFromDevice( this_kernel );
+            Cuda::freeFromDevice( matrix1_kernel );
+            Cuda::freeFromDevice( matrix2_kernel );
+         }
+#endif
+   }
+}
+
+#ifdef HAVE_CUDA
+template< typename Real,
+          typename Index,
+          typename Matrix,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          int tileDim,
+          int tileRowBlockSize >
+__global__ void DenseTranspositionAlignedKernel( Dense< Real, Devices::Cuda, Index >* resultMatrix,
+                                                          const Matrix* inputMatrix,
+                                                          const Real matrixMultiplicator,
+                                                          const Index gridIdx_x,
+                                                          const Index gridIdx_y )
+{
+   __shared__ Real tile[ tileDim*tileDim ];
+
+   const Index columns = inputMatrix->getColumns();
+   const Index rows = inputMatrix->getRows();
+
+
+   /****
+    * Diagonal mapping of the CUDA blocks
+    */
+   Index blockIdx_x, blockIdx_y;
+   if( columns == rows )
+   {
+      blockIdx_y = blockIdx.x;
+      blockIdx_x = (blockIdx.x+blockIdx.y)%gridDim.x;
+   }
+   else
+   {
+      Index bID = blockIdx.x + gridDim.x*blockIdx.y;
+      blockIdx_y = bID % gridDim.y;
+      blockIdx_x = ( ( bID / gridDim.y ) + blockIdx_y ) % gridDim.x;
+   }
+
+   /****
+    * Read the tile to the shared memory
+    */
+   const Index readRowPosition =
+      ( gridIdx_y*gridDim.y + blockIdx_y )*tileDim + threadIdx.y;
+   const Index readColumnPosition =
+      ( gridIdx_x*gridDim.x + blockIdx_x )*tileDim + threadIdx.x;
+   for( Index rowBlock = 0;
+        rowBlock < tileDim;
+        rowBlock += tileRowBlockSize )
+   {
+      tile[ Cuda::getInterleaving( threadIdx.x*tileDim +  threadIdx.y + rowBlock ) ] =
+               inputMatrix->getElementFast( readColumnPosition,
+                                            readRowPosition + rowBlock );
+   }
+   __syncthreads();
+
+   /****
+    * Write the tile to the global memory
+    */
+   const Index writeRowPosition =
+      ( gridIdx_x*gridDim.x + blockIdx_x )*tileDim + threadIdx.y;
+   const Index writeColumnPosition =
+      ( gridIdx_y*gridDim.y + blockIdx_y )*tileDim + threadIdx.x;
+   for( Index rowBlock = 0;
+        rowBlock < tileDim;
+        rowBlock += tileRowBlockSize )
+   {
+      resultMatrix->setElementFast( writeColumnPosition,
+                                    writeRowPosition + rowBlock,
+                                    matrixMultiplicator * tile[ Cuda::getInterleaving( ( threadIdx.y + rowBlock ) * tileDim + threadIdx.x ) ] );
+
+   }
+
+}
+
+template< typename Real,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename Matrix,
+          int tileDim,
+          int tileRowBlockSize >
+__global__ void DenseTranspositionNonAlignedKernel( Dense< Real, Devices::Cuda, Index >* resultMatrix,
+                                                             const Matrix* inputMatrix,
+                                                             const Real matrixMultiplicator,
+                                                             const Index gridIdx_x,
+                                                             const Index gridIdx_y )
+{
+   __shared__ Real tile[ tileDim*tileDim ];
+
+   const Index columns = inputMatrix->getColumns();
+   const Index rows = inputMatrix->getRows();
+
+   /****
+    * Diagonal mapping of the CUDA blocks
+    */
+   Index blockIdx_x, blockIdx_y;
+   if( columns == rows )
+   {
+      blockIdx_y = blockIdx.x;
+      blockIdx_x = (blockIdx.x+blockIdx.y)%gridDim.x;
+   }
+   else
+   {
+      Index bID = blockIdx.x + gridDim.x*blockIdx.y;
+      blockIdx_y = bID % gridDim.y;
+      blockIdx_x = ( ( bID / gridDim.y ) + blockIdx_y ) % gridDim.x;
+   }
+
+   /****
+    * Read the tile to the shared memory
+    */
+   const Index readRowPosition =
+      ( gridIdx_y*gridDim.y + blockIdx_y )*tileDim + threadIdx.y;
+   const Index readColumnPosition =
+      ( gridIdx_x*gridDim.x + blockIdx_x )*tileDim + threadIdx.x;
+   if( readColumnPosition < columns )
+   {
+      const Index readOffset = readRowPosition * columns + readColumnPosition;
+      for( Index rowBlock = 0;
+           rowBlock < tileDim;
+           rowBlock += tileRowBlockSize )
+      {
+         if( readRowPosition + rowBlock < rows )
+            tile[ Cuda::getInterleaving( threadIdx.x*tileDim +  threadIdx.y + rowBlock ) ] =
+               inputMatrix->getElementFast( readColumnPosition,
+                                            readRowPosition + rowBlock );
+      }
+   }
+   __syncthreads();
+
+   /****
+    * Write the tile to the global memory
+    */
+   const Index writeRowPosition =
+      ( gridIdx_x*gridDim.x + blockIdx_x )*tileDim + threadIdx.y;
+   const Index writeColumnPosition =
+      ( gridIdx_y*gridDim.y + blockIdx_y )*tileDim + threadIdx.x;
+   if( writeColumnPosition < rows )
+   {
+      const Index writeOffset = writeRowPosition * rows + writeColumnPosition;
+      for( Index rowBlock = 0;
+           rowBlock < tileDim;
+           rowBlock += tileRowBlockSize )
+      {
+         if( writeRowPosition + rowBlock < columns )
+            resultMatrix->setElementFast( writeColumnPosition,
+                                          writeRowPosition + rowBlock,
+                                          matrixMultiplicator * tile[ Cuda::getInterleaving( ( threadIdx.y + rowBlock ) * tileDim + threadIdx.x ) ] );
+      }
+   }
+
+}
+
+
+#endif
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+   template< typename Matrix, int tileDim >
+void DenseMatrixView< Real, Device, Index, RowMajorOrder >::getTransposition( const Matrix& matrix,
+                                                              const RealType& matrixMultiplicator )
+{
+   TNL_ASSERT( this->getColumns() == matrix.getRows() &&
+              this->getRows() == matrix.getColumns(),
+               std::cerr << "This matrix columns: " << this->getColumns() << std::endl
+                    << "This matrix rows: " << this->getRows() << std::endl
+                    << "That matrix columns: " << matrix.getColumns() << std::endl
+                    << "That matrix rows: " << matrix.getRows() << std::endl );
+ 
+   if( std::is_same< Device, Devices::Host >::value )
+   {
+      const IndexType& rows = matrix.getRows();
+      const IndexType& columns = matrix.getColumns();
+      for( IndexType i = 0; i < rows; i += tileDim )
+         for( IndexType j = 0; j < columns; j += tileDim )
+            for( IndexType k = i; k < i + tileDim && k < rows; k++ )
+               for( IndexType l = j; l < j + tileDim && l < columns; l++ )
+                  this->setElement( l, k, matrixMultiplicator * matrix. getElement( k, l ) );
+   }
+   if( std::is_same< Device, Devices::Cuda >::value )
+   {
+#ifdef HAVE_CUDA
+      dim3 cudaBlockSize( 0 ), cudaGridSize( 0 );
+      const IndexType matrixProductCudaBlockSize( 256 );
+      const IndexType rowTiles = roundUpDivision( this->getRows(), tileDim );
+      const IndexType columnTiles = roundUpDivision( this->getColumns(), tileDim );
+      const IndexType cudaBlockColumns( tileDim );
+      const IndexType cudaBlockRows( matrixProductCudaBlockSize / tileDim );
+      cudaBlockSize.x = cudaBlockColumns;
+      cudaBlockSize.y = cudaBlockRows;
+      const IndexType rowGrids = roundUpDivision( rowTiles, Cuda::getMaxGridSize() );
+      const IndexType columnGrids = roundUpDivision( columnTiles, Cuda::getMaxGridSize() );
+      const IndexType sharedMemorySize = tileDim*tileDim + tileDim*tileDim/Cuda::getNumberOfSharedMemoryBanks();
+
+      Dense* this_device = Cuda::passToDevice( *this );
+      Matrix* matrix_device = Cuda::passToDevice( matrix );
+
+      for( IndexType gridIdx_x = 0; gridIdx_x < columnGrids; gridIdx_x++ )
+         for( IndexType gridIdx_y = 0; gridIdx_y < rowGrids; gridIdx_y++ )
+         {
+            cudaGridSize.x = cudaGridSize.y = Cuda::getMaxGridSize();
+            if( gridIdx_x == columnGrids - 1)
+               cudaGridSize.x = columnTiles % Cuda::getMaxGridSize();
+            if( gridIdx_y == rowGrids - 1 )
+               cudaGridSize.y = rowTiles % Cuda::getMaxGridSize();
+            if( ( gridIdx_x < columnGrids - 1 || matrix.getColumns() % tileDim == 0 ) &&
+                ( gridIdx_y < rowGrids - 1 || matrix.getRows() % tileDim == 0 ) )
+            {
+               DenseTranspositionAlignedKernel< Real,
+                                                         Index,
+                                                         Matrix,
+                                                         tileDim,
+                                                         cudaBlockRows >
+                                                     <<< cudaGridSize,
+                                                         cudaBlockSize,
+                                                         sharedMemorySize  >>>
+                                                       ( this_device,
+                                                         matrix_device,
+                                                         matrixMultiplicator,
+                                                         gridIdx_x,
+                                                         gridIdx_y );
+            }
+            else
+            {
+               DenseTranspositionNonAlignedKernel< Real,
+                                                         Index,
+                                                         Matrix,
+                                                         tileDim,
+                                                         cudaBlockRows >
+                                                     <<< cudaGridSize,
+                                                         cudaBlockSize,
+                                                         sharedMemorySize  >>>
+                                                       ( this_device,
+                                                         matrix_device,
+                                                         matrixMultiplicator,
+                                                         gridIdx_x,
+                                                         gridIdx_y );
+            }
+            TNL_CHECK_CUDA_DEVICE;
+         }
+      Cuda::freeFromDevice( this_device );
+      Cuda::freeFromDevice( matrix_device );
+#endif
+   }
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+   template< typename Vector1, typename Vector2 >
+void DenseMatrixView< Real, Device, Index, RowMajorOrder >::performSORIteration( const Vector1& b,
+                                                        const IndexType row,
+                                                        Vector2& x,
+                                                        const RealType& omega ) const
+{
+   RealType sum( 0.0 ), diagonalValue;
+   for( IndexType i = 0; i < this->getColumns(); i++ )
+   {
+      if( i == row )
+         diagonalValue = this->getElement( row, row );
+      else
+         sum += this->getElement( row, i ) * x[ i ];
+   }
+   x[ row ] = ( 1.0 - omega ) * x[ row ] + omega / diagonalValue * ( b[ row ] - sum );
+}
+
+
+// copy assignment
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+DenseMatrixView< Real, Device, Index, RowMajorOrder >&
+DenseMatrixView< Real, Device, Index, RowMajorOrder >::operator=( const Dense& matrix )
+{
+   this->setLike( matrix );
+   this->values = matrix.values;
+   return *this;
+}
+
+// cross-device copy assignment
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+   template< typename Real2, typename Device2, typename Index2, typename >
+DenseMatrixView< Real, Device, Index, RowMajorOrder >&
+DenseMatrixView< Real, Device, Index, RowMajorOrder >::operator=( const Dense< Real2, Device2, Index2 >& matrix )
+{
+   static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Cuda >::value,
+                  "unknown device" );
+   static_assert( std::is_same< Device2, Devices::Host >::value || std::is_same< Device2, Devices::Cuda >::value,
+                  "unknown device" );
+
+   this->setLike( matrix );
+
+   throw Exceptions::NotImplementedError("Cross-device assignment for the Dense format is not implemented yet.");
+}
+
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+void DenseMatrixView< Real, Device, Index, RowMajorOrder >::save( const String& fileName ) const
+{
+   Object::save( fileName );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+void DenseMatrixView< Real, Device, Index, RowMajorOrder >::load( const String& fileName )
+{
+   Object::load( fileName );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+void DenseMatrixView< Real, Device, Index, RowMajorOrder >::save( File& file ) const
+{
+   Matrix< Real, Device, Index >::save( file );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+void DenseMatrixView< Real, Device, Index, RowMajorOrder >::load( File& file )
+{
+   Matrix< Real, Device, Index >::load( file );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+void DenseMatrixView< Real, Device, Index, RowMajorOrder >::print( std::ostream& str ) const
+{
+   for( IndexType row = 0; row < this->getRows(); row++ )
+   {
+      str <<"Row: " << row << " -> ";
+      for( IndexType column = 0; column < this->getColumns(); column++ )
+         str << " Col:" << column << "->" << this->getElement( row, column ) << "\t";
+      str << std::endl;
+   }
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+__cuda_callable__
+Index DenseMatrixView< Real, Device, Index, RowMajorOrder >::getElementIndex( const IndexType row,
+                                                              const IndexType column ) const
+{
+   return this->segments.getGlobalIndex( row, column );
+}
+
+template<>
+class DenseDeviceDependentCode< Devices::Host >
+{
+   public:
+
+      typedef Devices::Host Device;
+
+      template< typename Real,
+                typename Index,
+                bool RowMajorOrder,
+                typename RealAllocator,
+                typename InVector,
+                typename OutVector >
+      static void vectorProduct( const DenseMatrixView< Real, Device, Index, RowMajorOrder >& matrix,
+                                 const InVector& inVector,
+                                 OutVector& outVector )
+      {
+#ifdef HAVE_OPENMP
+#pragma omp parallel for if( Devices::Host::isOMPEnabled() )
+#endif
+         for( Index row = 0; row < matrix.getRows(); row ++ )
+            outVector[ row ] = matrix.rowVectorProduct( row, inVector );
+      }
+};
+
+template<>
+class DenseDeviceDependentCode< Devices::Cuda >
+{
+   public:
+
+      typedef Devices::Cuda Device;
+
+      template< typename Real,
+                typename Index,
+                bool RowMajorOrder,
+                typename RealAllocator,
+                typename InVector,
+                typename OutVector >
+      static void vectorProduct( const DenseMatrixView< Real, Device, Index, RowMajorOrder >& matrix,
+                                 const InVector& inVector,
+                                 OutVector& outVector )
+      {
+         MatrixVectorProductCuda( matrix, inVector, outVector );
+      }
+};
+
+} // namespace Matrices
+} // namespace TNL
diff --git a/src/UnitTests/Matrices/DenseMatrixTest.h b/src/UnitTests/Matrices/DenseMatrixTest.h
index c7ada1240..c0f9b92ff 100644
--- a/src/UnitTests/Matrices/DenseMatrixTest.h
+++ b/src/UnitTests/Matrices/DenseMatrixTest.h
@@ -597,8 +597,8 @@ void test_SetRow()
          { 0, 1, 2, 3, 4 },
          { 2, 3, 4, 5, 6 } };
       auto row = m_ptr->getRow( rowIdx );
-      for( IndexType i = 0; i < 5; i++ )
-      /   row.setElement( rowIdx, i ); //columnIndexes[ rowIdx ][ i ], values[ rowIdx ][ i ] );
+      //for( IndexType i = 0; i < 5; i++ )
+      ///   row.setElement( rowIdx, i ); //columnIndexes[ rowIdx ][ i ], values[ rowIdx ][ i ] );
    };
    TNL::Pointers::synchronizeSmartPointersOnDevice< DeviceType >();
    TNL::Algorithms::ParallelFor< DeviceType >::exec( 0, 3, f );
-- 
GitLab


From 16f3e12af5a32bd121e86d13af5bb59c3f5bb0b1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Sun, 5 Jan 2020 20:57:18 +0100
Subject: [PATCH 066/179] Fixing DenseMatrix unit tests.

---
 src/TNL/Containers/Segments/Ellpack.hpp  |  10 +-
 src/TNL/Matrices/Dense.h                 |   4 +-
 src/TNL/Matrices/Dense.hpp               |  72 ++-----
 src/TNL/Matrices/DenseMatrixView.h       |  18 +-
 src/TNL/Matrices/DenseMatrixView.hpp     | 162 ++++++---------
 src/TNL/Matrices/details/DenseMatrix.h   |  67 +++++++
 src/UnitTests/Matrices/DenseMatrixTest.h | 240 ++++++++++-------------
 7 files changed, 270 insertions(+), 303 deletions(-)
 create mode 100644 src/TNL/Matrices/details/DenseMatrix.h

diff --git a/src/TNL/Containers/Segments/Ellpack.hpp b/src/TNL/Containers/Segments/Ellpack.hpp
index 97a256c9e..8763c2e5d 100644
--- a/src/TNL/Containers/Segments/Ellpack.hpp
+++ b/src/TNL/Containers/Segments/Ellpack.hpp
@@ -306,7 +306,7 @@ void
 Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >::
 segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
 {
-   using RealType = decltype( fetch( IndexType(), IndexType(), std::declval< bool& >(), args... ) );
+   using RealType = decltype( fetch( IndexType(), IndexType(), IndexType(), std::declval< bool& >(), args... ) );
    if( RowMajorOrder )
    {
       const IndexType segmentSize = this->segmentSize;
@@ -315,8 +315,8 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red
          const IndexType end = begin + segmentSize;
          RealType aux( zero );
          bool compute( true );
-         for( IndexType j = begin; j < end && compute; j++  )
-            reduction( aux, fetch( i, j, compute, args... ) );
+         for( IndexType j = begin, localIdx = 0; j < end && compute; j++, localIdx++  )
+            reduction( aux, fetch( i, localIdx, j, compute, args... ) );
          keeper( i, aux );
       };
       Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
@@ -330,8 +330,8 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red
          const IndexType end = storageSize;
          RealType aux( zero );
          bool compute( true );
-         for( IndexType j = begin; j < end && compute; j += alignedSize  )
-            reduction( aux, fetch( i, j, compute, args... ) );
+         for( IndexType j = begin, localIdx = 0; j < end && compute; j += alignedSize, localIdx++  )
+            reduction( aux, fetch( i, localIdx, j, compute, args... ) );
          keeper( i, aux );
       };
       Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
diff --git a/src/TNL/Matrices/Dense.h b/src/TNL/Matrices/Dense.h
index 18249a7b1..90aa57170 100644
--- a/src/TNL/Matrices/Dense.h
+++ b/src/TNL/Matrices/Dense.h
@@ -48,8 +48,8 @@ class Dense : public Matrix< Real, Device, Index >
       using ValuesViewType = typename ValuesType::ViewType;
       using SegmentsType = Containers::Segments::Ellpack< DeviceType, IndexType, typename Allocators::Default< Device >::template Allocator< IndexType >, RowMajorOrder, 1 >;
       using SegmentViewType = typename SegmentsType::SegmentViewType;
-      using ViewType = DenseMatrixView< Real, Device, Index, MatrixType, SegmentsViewTemplate >;
-      using ConstViewType = DenseMatrixView< typename std::add_const< Real >::type, Device, Index, MatrixType, SegmentsViewTemplate >;
+      using ViewType = DenseMatrixView< Real, Device, Index, RowMajorOrder >;
+      using ConstViewType = DenseMatrixView< typename std::add_const< Real >::type, Device, Index, RowMajorOrder >;
       using RowView = DenseMatrixRowView< SegmentViewType, ValuesViewType >;
 
       // TODO: remove this
diff --git a/src/TNL/Matrices/Dense.hpp b/src/TNL/Matrices/Dense.hpp
index 85f1b560d..fe11d6759 100644
--- a/src/TNL/Matrices/Dense.hpp
+++ b/src/TNL/Matrices/Dense.hpp
@@ -41,10 +41,9 @@ template< typename Real,
           typename Device,
           typename Index,
           bool RowMajorOrder,
-          template< typename, typename, typename > class Segments,
           typename RealAllocator >
 auto
-Dense< Real, Device, Index, RowMajorOrder, Segments, RealAllocator >::
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
 getView() -> ViewType
 {
    return ViewType( this->getRows(), 
@@ -57,10 +56,9 @@ template< typename Real,
           typename Device,
           typename Index,
           bool RowMajorOrder,
-          template< typename, typename, typename > class Segments,
           typename RealAllocator >
 auto
-Dense< Real, Device, Index, RowMajorOrder, Segments, RealAllocator >::
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
 getConstView() const -> ConstViewType
 {
    return ConstViewType( this->getRows(),
@@ -451,8 +449,9 @@ template< typename Real,
           typename RealAllocator >
    template< typename InVector,
              typename OutVector >
-void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::vectorProduct( const InVector& inVector,
-                                                           OutVector& outVector ) const
+void 
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
+vectorProduct( const InVector& inVector, OutVector& outVector ) const
 {
    TNL_ASSERT( this->getColumns() == inVector.getSize(),
             std::cerr << "Matrix columns: " << this->getColumns() << std::endl
@@ -461,7 +460,20 @@ void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::vectorProduct(
                std::cerr << "Matrix rows: " << this->getRows() << std::endl
                     << "Vector size: " << outVector.getSize() << std::endl );
 
-   DeviceDependentCode::vectorProduct( *this, inVector, outVector );
+   //DeviceDependentCode::vectorProduct( *this, inVector, outVector );
+   const auto inVectorView = inVector.getConstView();
+   auto outVectorView = outVector.getView();
+   const auto valuesView = this->values.getConstView();
+   auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType column, IndexType offset, bool& compute ) -> RealType {
+      return valuesView[ offset ] * inVectorView[ column ];
+   };
+   auto reduction = [] __cuda_callable__ ( RealType& sum, const RealType& value ) {
+      sum += value;
+   };
+   auto keeper = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable {
+      outVectorView[ row ] = value;
+   };
+   this->segments.segmentsReduction( 0, this->getRows(), fetch, reduction, keeper, ( RealType ) 0.0 );
 }
 
 template< typename Real,
@@ -1051,51 +1063,5 @@ Index Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getElementInde
    return this->segments.getGlobalIndex( row, column );
 }
 
-template<>
-class DenseDeviceDependentCode< Devices::Host >
-{
-   public:
-
-      typedef Devices::Host Device;
-
-      template< typename Real,
-                typename Index,
-                bool RowMajorOrder,
-                typename RealAllocator,
-                typename InVector,
-                typename OutVector >
-      static void vectorProduct( const Dense< Real, Device, Index, RowMajorOrder, RealAllocator >& matrix,
-                                 const InVector& inVector,
-                                 OutVector& outVector )
-      {
-#ifdef HAVE_OPENMP
-#pragma omp parallel for if( Devices::Host::isOMPEnabled() )
-#endif
-         for( Index row = 0; row < matrix.getRows(); row ++ )
-            outVector[ row ] = matrix.rowVectorProduct( row, inVector );
-      }
-};
-
-template<>
-class DenseDeviceDependentCode< Devices::Cuda >
-{
-   public:
-
-      typedef Devices::Cuda Device;
-
-      template< typename Real,
-                typename Index,
-                bool RowMajorOrder,
-                typename RealAllocator,
-                typename InVector,
-                typename OutVector >
-      static void vectorProduct( const Dense< Real, Device, Index, RowMajorOrder, RealAllocator >& matrix,
-                                 const InVector& inVector,
-                                 OutVector& outVector )
-      {
-         MatrixVectorProductCuda( matrix, inVector, outVector );
-      }
-};
-
 } // namespace Matrices
 } // namespace TNL
diff --git a/src/TNL/Matrices/DenseMatrixView.h b/src/TNL/Matrices/DenseMatrixView.h
index 2334eb636..23f5d7317 100644
--- a/src/TNL/Matrices/DenseMatrixView.h
+++ b/src/TNL/Matrices/DenseMatrixView.h
@@ -14,14 +14,11 @@
 #include <TNL/Devices/Host.h>
 #include <TNL/Matrices/DenseMatrixRowView.h>
 #include <TNL/Matrices/MatrixView.h>
-#include <TNL/Containers/Segments/EllpackView.h>
+#include <TNL/Containers/Segments/Ellpack.h>
 
 namespace TNL {
 namespace Matrices {
 
-//template< typename Device >
-//class DenseDeviceDependentCode;
-
 template< typename Real = double,
           typename Device = Devices::Host,
           typename Index = int,
@@ -48,6 +45,9 @@ class DenseMatrixView : public MatrixView< Real, Device, Index >
       using SegmentsViewType = typename SegmentsType::ViewType;
       using SegmentViewType = typename SegmentsType::SegmentViewType;
       using RowView = DenseMatrixRowView< SegmentViewType, ValuesViewType >;
+      using ViewType = DenseMatrixView< Real, Device, Index, RowMajorOrder >;
+      using ConstViewType = DenseMatrixView< typename std::add_const< Real >::type, Device, Index, RowMajorOrder >;
+
 
       // TODO: remove this
       using CompressedRowLengthsVector = typename Matrix< Real, Device, Index >::CompressedRowLengthsVector;
@@ -56,7 +56,7 @@ class DenseMatrixView : public MatrixView< Real, Device, Index >
       template< typename _Real = Real,
                 typename _Device = Device,
                 typename _Index = Index >
-      using Self = Dense< _Real, _Device, _Index >;
+      using Self = DenseMatrixView< _Real, _Device, _Index >;
 
       __cuda_callable__
       DenseMatrixView();
@@ -172,12 +172,12 @@ class DenseMatrixView : public MatrixView< Real, Device, Index >
                                 const RealType& omega = 1.0 ) const;
 
       // copy assignment
-      Dense& operator=( const Dense& matrix );
+      DenseMatrixView& operator=( const DenseMatrixView& matrix );
 
       // cross-device copy assignment
       template< typename Real2, typename Device2, typename Index2,
                 typename = typename Enabler< Device2 >::type >
-      Dense& operator=( const Dense< Real2, Device2, Index2 >& matrix );
+      DenseMatrixView& operator=( const DenseMatrixView< Real2, Device2, Index2 >& matrix );
 
       void save( const String& fileName ) const;
 
@@ -195,8 +195,8 @@ class DenseMatrixView : public MatrixView< Real, Device, Index >
       IndexType getElementIndex( const IndexType row,
                                  const IndexType column ) const;
 
-      typedef DenseDeviceDependentCode< DeviceType > DeviceDependentCode;
-      friend class DenseDeviceDependentCode< DeviceType >;
+      //typedef DenseDeviceDependentCode< DeviceType > DeviceDependentCode;
+      //friend class DenseDeviceDependentCode< DeviceType >;
 
       SegmentsViewType segments;
 };
diff --git a/src/TNL/Matrices/DenseMatrixView.hpp b/src/TNL/Matrices/DenseMatrixView.hpp
index 18d6574ac..08cfab843 100644
--- a/src/TNL/Matrices/DenseMatrixView.hpp
+++ b/src/TNL/Matrices/DenseMatrixView.hpp
@@ -20,10 +20,9 @@ namespace Matrices {
 template< typename Real,
           typename Device,
           typename Index,
-          typename MatrixType,
-          template< typename, typename > class SegmentsView >
+          bool RowMajorOrder >
 __cuda_callable__
-DenseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+DenseMatrixView< Real, Device, Index, RowMajorOrder >::
 DenseMatrixView()
 {
 }
@@ -31,27 +30,24 @@ DenseMatrixView()
 template< typename Real,
           typename Device,
           typename Index,
-          typename MatrixType,
-          template< typename, typename > class SegmentsView >
+          bool RowMajorOrder >
 __cuda_callable__
-DenseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+DenseMatrixView< Real, Device, Index, RowMajorOrder >::
 DenseMatrixView( const IndexType rows,
-                  const IndexType columns,
-                  const ValuesViewType& values,
-                  const ColumnsIndexesViewType& columnIndexes,
-                  const SegmentsViewType& segments )
- : MatrixView< Real, Device, Index >( rows, columns, values ), columnIndexes( columnIndexes ), segments( segments )
+                 const IndexType columns,
+                 const ValuesViewType& values,
+                 const SegmentsViewType& segments )
+ : MatrixView< Real, Device, Index >( rows, columns, values ), segments( segments )
 {
 }
 
 template< typename Real,
           typename Device,
           typename Index,
-          typename MatrixType,
-          template< typename, typename > class SegmentsView >
+          bool RowMajorOrder >
 __cuda_callable__
 auto
-DenseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+DenseMatrixView< Real, Device, Index, RowMajorOrder >::
 getView() -> ViewType
 {
    return ViewType( this->getRows(), 
@@ -64,11 +60,10 @@ getView() -> ViewType
 template< typename Real,
           typename Device,
           typename Index,
-          typename MatrixType,
-          template< typename, typename > class SegmentsView >
+          bool RowMajorOrder >
 __cuda_callable__
 auto
-DenseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+DenseMatrixView< Real, Device, Index, RowMajorOrder >::
 getConstView() const -> ConstViewType
 {
    return ConstViewType( this->getRows(),
@@ -95,8 +90,7 @@ getSerializationType()
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
 String
 DenseMatrixView< Real, Device, Index, RowMajorOrder >::
 getSerializationTypeVirtual() const
@@ -107,8 +101,7 @@ getSerializationTypeVirtual() const
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
    template< typename Vector >
 void
 DenseMatrixView< Real, Device, Index, RowMajorOrder >::
@@ -132,8 +125,7 @@ getCompressedRowLengths( Vector& rowLengths ) const
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
 Index DenseMatrixView< Real, Device, Index, RowMajorOrder >::getRowLength( const IndexType row ) const
 {
    return this->getColumns();
@@ -142,8 +134,7 @@ Index DenseMatrixView< Real, Device, Index, RowMajorOrder >::getRowLength( const
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
 Index DenseMatrixView< Real, Device, Index, RowMajorOrder >::getMaxRowLength() const
 {
    return this->getColumns();
@@ -152,8 +143,7 @@ Index DenseMatrixView< Real, Device, Index, RowMajorOrder >::getMaxRowLength() c
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
 Index DenseMatrixView< Real, Device, Index, RowMajorOrder >::getNumberOfMatrixElements() const
 {
    return this->getRows() * this->getColumns();
@@ -162,8 +152,7 @@ Index DenseMatrixView< Real, Device, Index, RowMajorOrder >::getNumberOfMatrixEl
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
 Index DenseMatrixView< Real, Device, Index, RowMajorOrder >::getNumberOfNonzeroMatrixElements() const
 {
    const auto values_view = this->values.getConstView();
@@ -176,8 +165,7 @@ Index DenseMatrixView< Real, Device, Index, RowMajorOrder >::getNumberOfNonzeroM
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
 void DenseMatrixView< Real, Device, Index, RowMajorOrder >::reset()
 {
    Matrix< Real, Device, Index >::reset();
@@ -186,8 +174,7 @@ void DenseMatrixView< Real, Device, Index, RowMajorOrder >::reset()
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
 void DenseMatrixView< Real, Device, Index, RowMajorOrder >::setValue( const Real& value )
 {
    this->values = value;
@@ -196,8 +183,7 @@ void DenseMatrixView< Real, Device, Index, RowMajorOrder >::setValue( const Real
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
 __cuda_callable__ auto
 DenseMatrixView< Real, Device, Index, RowMajorOrder >::
 getRow( const IndexType& rowIdx ) const -> const RowView
@@ -209,8 +195,7 @@ getRow( const IndexType& rowIdx ) const -> const RowView
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
 __cuda_callable__ auto
 DenseMatrixView< Real, Device, Index, RowMajorOrder >::
 getRow( const IndexType& rowIdx ) -> RowView
@@ -222,8 +207,7 @@ getRow( const IndexType& rowIdx ) -> RowView
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
 __cuda_callable__
 Real& DenseMatrixView< Real, Device, Index, RowMajorOrder >::operator()( const IndexType row,
                                                 const IndexType column )
@@ -239,8 +223,7 @@ Real& DenseMatrixView< Real, Device, Index, RowMajorOrder >::operator()( const I
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
 __cuda_callable__
 const Real& DenseMatrixView< Real, Device, Index, RowMajorOrder >::operator()( const IndexType row,
                                                       const IndexType column ) const
@@ -256,8 +239,7 @@ const Real& DenseMatrixView< Real, Device, Index, RowMajorOrder >::operator()( c
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
 bool DenseMatrixView< Real, Device, Index, RowMajorOrder >::setElement( const IndexType row,
                                                const IndexType column,
                                                const RealType& value )
@@ -269,8 +251,7 @@ bool DenseMatrixView< Real, Device, Index, RowMajorOrder >::setElement( const In
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
 bool DenseMatrixView< Real, Device, Index, RowMajorOrder >::addElement( const IndexType row,
                                                         const IndexType column,
                                                         const RealType& value,
@@ -289,8 +270,7 @@ bool DenseMatrixView< Real, Device, Index, RowMajorOrder >::addElement( const In
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
 Real 
 DenseMatrixView< Real, Device, Index, RowMajorOrder >::
 getElement( const IndexType row,
@@ -302,8 +282,7 @@ getElement( const IndexType row,
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
    template< typename Fetch, typename Reduce, typename Keep, typename FetchValue >
 void
 DenseMatrixView< Real, Device, Index, RowMajorOrder >::
@@ -320,8 +299,7 @@ rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Ke
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
    template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
 void
 DenseMatrixView< Real, Device, Index, RowMajorOrder >::
@@ -333,8 +311,7 @@ allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zer
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
    template< typename Function >
 void
 DenseMatrixView< Real, Device, Index, RowMajorOrder >::
@@ -352,8 +329,7 @@ forRows( IndexType first, IndexType last, Function& function ) const
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
    template< typename Function >
 void
 DenseMatrixView< Real, Device, Index, RowMajorOrder >::
@@ -371,8 +347,7 @@ forRows( IndexType first, IndexType last, Function& function )
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
    template< typename Function >
 void
 DenseMatrixView< Real, Device, Index, RowMajorOrder >::
@@ -384,8 +359,7 @@ forAllRows( Function& function ) const
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
    template< typename Function >
 void
 DenseMatrixView< Real, Device, Index, RowMajorOrder >::
@@ -397,8 +371,7 @@ forAllRows( Function& function )
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
    template< typename Vector >
 __cuda_callable__
 typename Vector::RealType DenseMatrixView< Real, Device, Index, RowMajorOrder >::rowVectorProduct( const IndexType row,
@@ -414,8 +387,7 @@ typename Vector::RealType DenseMatrixView< Real, Device, Index, RowMajorOrder >:
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
    template< typename InVector,
              typename OutVector >
 void DenseMatrixView< Real, Device, Index, RowMajorOrder >::vectorProduct( const InVector& inVector,
@@ -428,14 +400,13 @@ void DenseMatrixView< Real, Device, Index, RowMajorOrder >::vectorProduct( const
                std::cerr << "Matrix rows: " << this->getRows() << std::endl
                     << "Vector size: " << outVector.getSize() << std::endl );
 
-   DeviceDependentCode::vectorProduct( *this, inVector, outVector );
+   //DeviceDependentCode::vectorProduct( *this, inVector, outVector );
 }
 
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
    template< typename Matrix >
 void DenseMatrixView< Real, Device, Index, RowMajorOrder >::addMatrix( const Matrix& matrix,
                                               const RealType& matrixMultiplicator,
@@ -454,7 +425,7 @@ void DenseMatrixView< Real, Device, Index, RowMajorOrder >::addMatrix( const Mat
       this->values = thisMatrixMultiplicator * this->values + matrixMultiplicator * matrix.values;
 }
 
-#ifdef HAVE_CUDA
+#ifdef HAVE_CUDA_______________
 template< typename Real,
           typename Index,
           bool RowMajorOrder,
@@ -463,7 +434,7 @@ template< typename Real,
           typename Matrix2,
           int tileDim,
           int tileRowBlockSize >
-__global__ void DenseMatrixProductKernel( Dense< Real, Devices::Cuda, Index >* resultMatrix,
+__global__ void DenseMatrixProductKernel( Dense< Real, Devices::Cuda, Index, RowMajorOrder >* resultMatrix,
                                                    const Matrix1* matrixA,
                                                    const Matrix2* matrixB,
                                                    const Real matrixAMultiplicator,
@@ -558,8 +529,7 @@ __global__ void DenseMatrixProductKernel( Dense< Real, Devices::Cuda, Index >* r
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
    template< typename Matrix1, typename Matrix2, int tileDim >
 void DenseMatrixView< Real, Device, Index, RowMajorOrder >::getMatrixProduct( const Matrix1& matrix1,
                                                               const Matrix2& matrix2,
@@ -599,7 +569,7 @@ void DenseMatrixView< Real, Device, Index, RowMajorOrder >::getMatrixProduct( co
    if( std::is_same< Device, Devices::Cuda >::value )
    {
 #ifdef HAVE_CUDA
-      dim3 cudaBlockSize( 0 ), cudaGridSize( 0 );
+      /*dim3 cudaBlockSize( 0 ), cudaGridSize( 0 );
       const IndexType matrixProductCudaBlockSize( 256 );
       const IndexType rowTiles = roundUpDivision( this->getRows(), tileDim );
       const IndexType columnTiles = roundUpDivision( this->getColumns(), tileDim );
@@ -640,12 +610,12 @@ void DenseMatrixView< Real, Device, Index, RowMajorOrder >::getMatrixProduct( co
             Cuda::freeFromDevice( this_kernel );
             Cuda::freeFromDevice( matrix1_kernel );
             Cuda::freeFromDevice( matrix2_kernel );
-         }
+         }*/
 #endif
    }
 }
 
-#ifdef HAVE_CUDA
+#ifdef HAVE_CUDA________________________
 template< typename Real,
           typename Index,
           typename Matrix,
@@ -802,8 +772,7 @@ __global__ void DenseTranspositionNonAlignedKernel( Dense< Real, Devices::Cuda,
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
    template< typename Matrix, int tileDim >
 void DenseMatrixView< Real, Device, Index, RowMajorOrder >::getTransposition( const Matrix& matrix,
                                                               const RealType& matrixMultiplicator )
@@ -828,7 +797,7 @@ void DenseMatrixView< Real, Device, Index, RowMajorOrder >::getTransposition( co
    if( std::is_same< Device, Devices::Cuda >::value )
    {
 #ifdef HAVE_CUDA
-      dim3 cudaBlockSize( 0 ), cudaGridSize( 0 );
+      /*dim3 cudaBlockSize( 0 ), cudaGridSize( 0 );
       const IndexType matrixProductCudaBlockSize( 256 );
       const IndexType rowTiles = roundUpDivision( this->getRows(), tileDim );
       const IndexType columnTiles = roundUpDivision( this->getColumns(), tileDim );
@@ -887,7 +856,7 @@ void DenseMatrixView< Real, Device, Index, RowMajorOrder >::getTransposition( co
             TNL_CHECK_CUDA_DEVICE;
          }
       Cuda::freeFromDevice( this_device );
-      Cuda::freeFromDevice( matrix_device );
+      Cuda::freeFromDevice( matrix_device );*/
 #endif
    }
 }
@@ -895,8 +864,7 @@ void DenseMatrixView< Real, Device, Index, RowMajorOrder >::getTransposition( co
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
    template< typename Vector1, typename Vector2 >
 void DenseMatrixView< Real, Device, Index, RowMajorOrder >::performSORIteration( const Vector1& b,
                                                         const IndexType row,
@@ -919,10 +887,9 @@ void DenseMatrixView< Real, Device, Index, RowMajorOrder >::performSORIteration(
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
 DenseMatrixView< Real, Device, Index, RowMajorOrder >&
-DenseMatrixView< Real, Device, Index, RowMajorOrder >::operator=( const Dense& matrix )
+DenseMatrixView< Real, Device, Index, RowMajorOrder >::operator=( const DenseMatrixView& matrix )
 {
    this->setLike( matrix );
    this->values = matrix.values;
@@ -933,11 +900,10 @@ DenseMatrixView< Real, Device, Index, RowMajorOrder >::operator=( const Dense& m
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
    template< typename Real2, typename Device2, typename Index2, typename >
 DenseMatrixView< Real, Device, Index, RowMajorOrder >&
-DenseMatrixView< Real, Device, Index, RowMajorOrder >::operator=( const Dense< Real2, Device2, Index2 >& matrix )
+DenseMatrixView< Real, Device, Index, RowMajorOrder >::operator=( const DenseMatrixView< Real2, Device2, Index2 >& matrix )
 {
    static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Cuda >::value,
                   "unknown device" );
@@ -953,8 +919,7 @@ DenseMatrixView< Real, Device, Index, RowMajorOrder >::operator=( const Dense< R
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
 void DenseMatrixView< Real, Device, Index, RowMajorOrder >::save( const String& fileName ) const
 {
    Object::save( fileName );
@@ -963,8 +928,7 @@ void DenseMatrixView< Real, Device, Index, RowMajorOrder >::save( const String&
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
 void DenseMatrixView< Real, Device, Index, RowMajorOrder >::load( const String& fileName )
 {
    Object::load( fileName );
@@ -973,28 +937,25 @@ void DenseMatrixView< Real, Device, Index, RowMajorOrder >::load( const String&
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
 void DenseMatrixView< Real, Device, Index, RowMajorOrder >::save( File& file ) const
 {
-   Matrix< Real, Device, Index >::save( file );
+   MatrixView< Real, Device, Index >::save( file );
 }
 
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
 void DenseMatrixView< Real, Device, Index, RowMajorOrder >::load( File& file )
 {
-   Matrix< Real, Device, Index >::load( file );
+   MatrixView< Real, Device, Index >::load( file );
 }
 
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
 void DenseMatrixView< Real, Device, Index, RowMajorOrder >::print( std::ostream& str ) const
 {
    for( IndexType row = 0; row < this->getRows(); row++ )
@@ -1009,8 +970,7 @@ void DenseMatrixView< Real, Device, Index, RowMajorOrder >::print( std::ostream&
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
 __cuda_callable__
 Index DenseMatrixView< Real, Device, Index, RowMajorOrder >::getElementIndex( const IndexType row,
                                                               const IndexType column ) const
@@ -1018,7 +978,7 @@ Index DenseMatrixView< Real, Device, Index, RowMajorOrder >::getElementIndex( co
    return this->segments.getGlobalIndex( row, column );
 }
 
-template<>
+/*template<>
 class DenseDeviceDependentCode< Devices::Host >
 {
    public:
@@ -1062,7 +1022,7 @@ class DenseDeviceDependentCode< Devices::Cuda >
       {
          MatrixVectorProductCuda( matrix, inVector, outVector );
       }
-};
+};*/
 
 } // namespace Matrices
 } // namespace TNL
diff --git a/src/TNL/Matrices/details/DenseMatrix.h b/src/TNL/Matrices/details/DenseMatrix.h
new file mode 100644
index 000000000..813e58bc4
--- /dev/null
+++ b/src/TNL/Matrices/details/DenseMatrix.h
@@ -0,0 +1,67 @@
+/***************************************************************************
+                          DenseMatrix.h  -  description
+                             -------------------
+    begin                : Jan 5, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+namespace TNL {
+   namespace Matrices {
+      namespace details {
+
+template< typename Device >
+class DenseDeviceDependentCode;
+template<>
+class DenseDeviceDependentCode< Devices::Host >
+{
+   public:
+
+      typedef Devices::Host Device;
+
+      template< typename Real,
+                typename Index,
+                bool RowMajorOrder,
+                typename RealAllocator,
+                typename InVector,
+                typename OutVector >
+      static void vectorProduct( const DenseMatrixView< Real, Device, Index, RowMajorOrder >& matrix,
+                                 const InVector& inVector,
+                                 OutVector& outVector )
+      {
+#ifdef HAVE_OPENMP
+#pragma omp parallel for if( Devices::Host::isOMPEnabled() )
+#endif
+         for( Index row = 0; row < matrix.getRows(); row ++ )
+            outVector[ row ] = matrix.rowVectorProduct( row, inVector );
+      }
+};
+
+template<>
+class DenseDeviceDependentCode< Devices::Cuda >
+{
+   public:
+
+      typedef Devices::Cuda Device;
+
+      template< typename Real,
+                typename Index,
+                bool RowMajorOrder,
+                typename RealAllocator,
+                typename InVector,
+                typename OutVector >
+      static void vectorProduct( const DenseMatrixView< Real, Device, Index, RowMajorOrder >& matrix,
+                                 const InVector& inVector,
+                                 OutVector& outVector )
+      {
+         MatrixVectorProductCuda( matrix, inVector, outVector );
+      }
+};
+
+      } //namespace details
+   } //namepsace Matrices
+} //namespace TNL
\ No newline at end of file
diff --git a/src/UnitTests/Matrices/DenseMatrixTest.h b/src/UnitTests/Matrices/DenseMatrixTest.h
index c0f9b92ff..897861f7f 100644
--- a/src/UnitTests/Matrices/DenseMatrixTest.h
+++ b/src/UnitTests/Matrices/DenseMatrixTest.h
@@ -12,8 +12,6 @@
 #include <TNL/Matrices/Matrix.h>
 #include <TNL/Matrices/Dense.h>
 #include <TNL/Containers/Array.h>
-#include <TNL/Pointers/SharedPointer.h>
-#include <TNL/Pointers/SmartPointersRegister.h>
 
 #include <TNL/Containers/Vector.h>
 #include <TNL/Containers/VectorView.h>
@@ -576,17 +574,16 @@ void test_SetRow()
    const IndexType rows = 3;
    const IndexType cols = 7;
 
-   TNL::Pointers::SharedPointer< Matrix > m;
-   m->reset();
-   m->setDimensions( rows, cols );
+   Matrix m;
+   m.reset();
+   m.setDimensions( rows, cols );
 
    RealType value = 1;
    for( IndexType i = 0; i < rows; i++ )
       for( IndexType j = 0; j < cols; j++ )
-         m->setElement( i, j, value++ );
+         m.setElement( i, j, value++ );
 
-   // TODO: replace this with dense matrix view
-   Matrix* m_ptr = &m.template modifyData< DeviceType >();
+   auto matrix_view = m.getView();
    auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
       RealType values[ 3 ][ 5 ] {
          { 11, 11, 11, 11, 11 },
@@ -596,36 +593,35 @@ void test_SetRow()
          { 0, 1, 2, 3, 4 },
          { 0, 1, 2, 3, 4 },
          { 2, 3, 4, 5, 6 } };
-      auto row = m_ptr->getRow( rowIdx );
-      //for( IndexType i = 0; i < 5; i++ )
-      ///   row.setElement( rowIdx, i ); //columnIndexes[ rowIdx ][ i ], values[ rowIdx ][ i ] );
+      auto row = matrix_view.getRow( rowIdx );
+      for( IndexType i = 0; i < 5; i++ )
+        row.setElement( columnIndexes[ rowIdx ][ i ], values[ rowIdx ][ i ] );
    };
-   TNL::Pointers::synchronizeSmartPointersOnDevice< DeviceType >();
    TNL::Algorithms::ParallelFor< DeviceType >::exec( 0, 3, f );
 
-   EXPECT_EQ( m->getElement( 0, 0 ), 11 );
-   EXPECT_EQ( m->getElement( 0, 1 ), 11 );
-   EXPECT_EQ( m->getElement( 0, 2 ), 11 );
-   EXPECT_EQ( m->getElement( 0, 3 ), 11 );
-   EXPECT_EQ( m->getElement( 0, 4 ), 11 );
-   EXPECT_EQ( m->getElement( 0, 5 ),  6 );
-   EXPECT_EQ( m->getElement( 0, 6 ),  7 );
-
-   EXPECT_EQ( m->getElement( 1, 0 ), 22 );
-   EXPECT_EQ( m->getElement( 1, 1 ), 22 );
-   EXPECT_EQ( m->getElement( 1, 2 ), 22 );
-   EXPECT_EQ( m->getElement( 1, 3 ), 22 );
-   EXPECT_EQ( m->getElement( 1, 4 ), 22 );
-   EXPECT_EQ( m->getElement( 1, 5 ), 13 );
-   EXPECT_EQ( m->getElement( 1, 6 ), 14 );
-
-   EXPECT_EQ( m->getElement( 2, 0 ), 15 );
-   EXPECT_EQ( m->getElement( 2, 1 ), 16 );
-   EXPECT_EQ( m->getElement( 2, 2 ), 33 );
-   EXPECT_EQ( m->getElement( 2, 3 ), 33 );
-   EXPECT_EQ( m->getElement( 2, 4 ), 33 );
-   EXPECT_EQ( m->getElement( 2, 5 ), 33 );
-   EXPECT_EQ( m->getElement( 2, 6 ), 33 );
+   EXPECT_EQ( m.getElement( 0, 0 ), 11 );
+   EXPECT_EQ( m.getElement( 0, 1 ), 11 );
+   EXPECT_EQ( m.getElement( 0, 2 ), 11 );
+   EXPECT_EQ( m.getElement( 0, 3 ), 11 );
+   EXPECT_EQ( m.getElement( 0, 4 ), 11 );
+   EXPECT_EQ( m.getElement( 0, 5 ),  6 );
+   EXPECT_EQ( m.getElement( 0, 6 ),  7 );
+
+   EXPECT_EQ( m.getElement( 1, 0 ), 22 );
+   EXPECT_EQ( m.getElement( 1, 1 ), 22 );
+   EXPECT_EQ( m.getElement( 1, 2 ), 22 );
+   EXPECT_EQ( m.getElement( 1, 3 ), 22 );
+   EXPECT_EQ( m.getElement( 1, 4 ), 22 );
+   EXPECT_EQ( m.getElement( 1, 5 ), 13 );
+   EXPECT_EQ( m.getElement( 1, 6 ), 14 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ), 15 );
+   EXPECT_EQ( m.getElement( 2, 1 ), 16 );
+   EXPECT_EQ( m.getElement( 2, 2 ), 33 );
+   EXPECT_EQ( m.getElement( 2, 3 ), 33 );
+   EXPECT_EQ( m.getElement( 2, 4 ), 33 );
+   EXPECT_EQ( m.getElement( 2, 5 ), 33 );
+   EXPECT_EQ( m.getElement( 2, 6 ), 33 );
 }
 
 template< typename Matrix >
@@ -648,49 +644,49 @@ void test_AddRow()
    const IndexType rows = 6;
    const IndexType cols = 5;
 
-   TNL::Pointers::SharedPointer< Matrix > m( rows, cols );
+   Matrix m( rows, cols );
 
    RealType value = 1;
    for( IndexType i = 0; i < rows; i++ )
       for( IndexType j = 0; j < cols; j++ )
-         m->setElement( i, j, value++ );
+         m.setElement( i, j, value++ );
 
    // Check the added elements
-   EXPECT_EQ( m->getElement( 0, 0 ),  1 );
-   EXPECT_EQ( m->getElement( 0, 1 ),  2 );
-   EXPECT_EQ( m->getElement( 0, 2 ),  3 );
-   EXPECT_EQ( m->getElement( 0, 3 ),  4 );
-   EXPECT_EQ( m->getElement( 0, 4 ),  5 );
-
-   EXPECT_EQ( m->getElement( 1, 0 ),  6 );
-   EXPECT_EQ( m->getElement( 1, 1 ),  7 );
-   EXPECT_EQ( m->getElement( 1, 2 ),  8 );
-   EXPECT_EQ( m->getElement( 1, 3 ),  9 );
-   EXPECT_EQ( m->getElement( 1, 4 ), 10 );
-
-   EXPECT_EQ( m->getElement( 2, 0 ), 11 );
-   EXPECT_EQ( m->getElement( 2, 1 ), 12 );
-   EXPECT_EQ( m->getElement( 2, 2 ), 13 );
-   EXPECT_EQ( m->getElement( 2, 3 ), 14 );
-   EXPECT_EQ( m->getElement( 2, 4 ), 15 );
-
-   EXPECT_EQ( m->getElement( 3, 0 ), 16 );
-   EXPECT_EQ( m->getElement( 3, 1 ), 17 );
-   EXPECT_EQ( m->getElement( 3, 2 ), 18 );
-   EXPECT_EQ( m->getElement( 3, 3 ), 19 );
-   EXPECT_EQ( m->getElement( 3, 4 ), 20 );
-
-   EXPECT_EQ( m->getElement( 4, 0 ), 21 );
-   EXPECT_EQ( m->getElement( 4, 1 ), 22 );
-   EXPECT_EQ( m->getElement( 4, 2 ), 23 );
-   EXPECT_EQ( m->getElement( 4, 3 ), 24 );
-   EXPECT_EQ( m->getElement( 4, 4 ), 25 );
-
-   EXPECT_EQ( m->getElement( 5, 0 ), 26 );
-   EXPECT_EQ( m->getElement( 5, 1 ), 27 );
-   EXPECT_EQ( m->getElement( 5, 2 ), 28 );
-   EXPECT_EQ( m->getElement( 5, 3 ), 29 );
-   EXPECT_EQ( m->getElement( 5, 4 ), 30 );
+   EXPECT_EQ( m.getElement( 0, 0 ),  1 );
+   EXPECT_EQ( m.getElement( 0, 1 ),  2 );
+   EXPECT_EQ( m.getElement( 0, 2 ),  3 );
+   EXPECT_EQ( m.getElement( 0, 3 ),  4 );
+   EXPECT_EQ( m.getElement( 0, 4 ),  5 );
+
+   EXPECT_EQ( m.getElement( 1, 0 ),  6 );
+   EXPECT_EQ( m.getElement( 1, 1 ),  7 );
+   EXPECT_EQ( m.getElement( 1, 2 ),  8 );
+   EXPECT_EQ( m.getElement( 1, 3 ),  9 );
+   EXPECT_EQ( m.getElement( 1, 4 ), 10 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ), 11 );
+   EXPECT_EQ( m.getElement( 2, 1 ), 12 );
+   EXPECT_EQ( m.getElement( 2, 2 ), 13 );
+   EXPECT_EQ( m.getElement( 2, 3 ), 14 );
+   EXPECT_EQ( m.getElement( 2, 4 ), 15 );
+
+   EXPECT_EQ( m.getElement( 3, 0 ), 16 );
+   EXPECT_EQ( m.getElement( 3, 1 ), 17 );
+   EXPECT_EQ( m.getElement( 3, 2 ), 18 );
+   EXPECT_EQ( m.getElement( 3, 3 ), 19 );
+   EXPECT_EQ( m.getElement( 3, 4 ), 20 );
+
+   EXPECT_EQ( m.getElement( 4, 0 ), 21 );
+   EXPECT_EQ( m.getElement( 4, 1 ), 22 );
+   EXPECT_EQ( m.getElement( 4, 2 ), 23 );
+   EXPECT_EQ( m.getElement( 4, 3 ), 24 );
+   EXPECT_EQ( m.getElement( 4, 4 ), 25 );
+
+   EXPECT_EQ( m.getElement( 5, 0 ), 26 );
+   EXPECT_EQ( m.getElement( 5, 1 ), 27 );
+   EXPECT_EQ( m.getElement( 5, 2 ), 28 );
+   EXPECT_EQ( m.getElement( 5, 3 ), 29 );
+   EXPECT_EQ( m.getElement( 5, 4 ), 30 );
 
    // Add new elements to the old elements with a multiplying factor applied to the old elements.
    /*
@@ -704,26 +700,7 @@ void test_AddRow()
     *    \ 78 81 84 87 90 /
     */
 
-    RealType row0 [ 5 ] = { 11, 11, 11, 11, 0 }; IndexType colIndexes0 [ 5 ] = { 0, 1, 2, 3, 4 };
-    RealType row1 [ 5 ] = { 22, 22, 22, 22, 0 }; IndexType colIndexes1 [ 5 ] = { 0, 1, 2, 3, 4 };
-    RealType row2 [ 5 ] = { 33, 33, 33, 33, 0 }; IndexType colIndexes2 [ 5 ] = { 0, 1, 2, 3, 4 };
-    RealType row3 [ 5 ] = { 44, 44, 44, 44, 0 }; IndexType colIndexes3 [ 5 ] = { 0, 1, 2, 3, 4 };
-    RealType row4 [ 5 ] = { 55, 55, 55, 55, 0 }; IndexType colIndexes4 [ 5 ] = { 0, 1, 2, 3, 4 };
-    RealType row5 [ 5 ] = { 66, 66, 66, 66, 0 }; IndexType colIndexes5 [ 5 ] = { 0, 1, 2, 3, 4 };
-    
-    IndexType row = 0;
-    IndexType elements = 5;
-    RealType thisRowMultiplicator = 0;
-    
-    // TODO: Fix this
-    /*m.addRow( row++, colIndexes0, row0, elements, thisRowMultiplicator++ );
-    m.addRow( row++, colIndexes1, row1, elements, thisRowMultiplicator++ );
-    m.addRow( row++, colIndexes2, row2, elements, thisRowMultiplicator++ );
-    m.addRow( row++, colIndexes3, row3, elements, thisRowMultiplicator++ );
-    m.addRow( row++, colIndexes4, row4, elements, thisRowMultiplicator++ );
-    m.addRow( row++, colIndexes5, row5, elements, thisRowMultiplicator++ );*/
-
-   Matrix* m_ptr = &m.template modifyData< DeviceType >();
+   auto matrix_view = m.getView();
    auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
       RealType values[ 6 ][ 5 ] {
          { 11, 11, 11, 11, 0 },
@@ -732,52 +709,51 @@ void test_AddRow()
          { 44, 44, 44, 44, 0 },
          { 55, 55, 55, 55, 0 },
          { 66, 66, 66, 66, 0 } };
-      auto row = m_ptr->getRow( rowIdx );
+      auto row = matrix_view.getRow( rowIdx );
       for( IndexType i = 0; i < 5; i++ )
       {
          RealType& val = row.getValue( i );
          val = rowIdx * val + values[ rowIdx ][ i ];
       }
    };
-   TNL::Pointers::synchronizeSmartPointersOnDevice< DeviceType >();
    TNL::Algorithms::ParallelFor< DeviceType >::exec( 0, 6, f );
 
     
-    EXPECT_EQ( m->getElement( 0, 0 ),  11 );
-    EXPECT_EQ( m->getElement( 0, 1 ),  11 );
-    EXPECT_EQ( m->getElement( 0, 2 ),  11 );
-    EXPECT_EQ( m->getElement( 0, 3 ),  11 );
-    EXPECT_EQ( m->getElement( 0, 4 ),   0 );
-    
-    EXPECT_EQ( m->getElement( 1, 0 ),  28 );
-    EXPECT_EQ( m->getElement( 1, 1 ),  29 );
-    EXPECT_EQ( m->getElement( 1, 2 ),  30 );
-    EXPECT_EQ( m->getElement( 1, 3 ),  31 );
-    EXPECT_EQ( m->getElement( 1, 4 ),  10 );
-    
-    EXPECT_EQ( m->getElement( 2, 0 ),  55 );
-    EXPECT_EQ( m->getElement( 2, 1 ),  57 );
-    EXPECT_EQ( m->getElement( 2, 2 ),  59 );
-    EXPECT_EQ( m->getElement( 2, 3 ),  61 );
-    EXPECT_EQ( m->getElement( 2, 4 ),  30 );
-    
-    EXPECT_EQ( m->getElement( 3, 0 ),  92 );
-    EXPECT_EQ( m->getElement( 3, 1 ),  95 );
-    EXPECT_EQ( m->getElement( 3, 2 ),  98 );
-    EXPECT_EQ( m->getElement( 3, 3 ), 101 );
-    EXPECT_EQ( m->getElement( 3, 4 ),  60 );
-    
-    EXPECT_EQ( m->getElement( 4, 0 ), 139 );
-    EXPECT_EQ( m->getElement( 4, 1 ), 143 );
-    EXPECT_EQ( m->getElement( 4, 2 ), 147 );
-    EXPECT_EQ( m->getElement( 4, 3 ), 151 );
-    EXPECT_EQ( m->getElement( 4, 4 ), 100 );
-    
-    EXPECT_EQ( m->getElement( 5, 0 ), 196 );
-    EXPECT_EQ( m->getElement( 5, 1 ), 201 );
-    EXPECT_EQ( m->getElement( 5, 2 ), 206 );
-    EXPECT_EQ( m->getElement( 5, 3 ), 211 );
-    EXPECT_EQ( m->getElement( 5, 4 ), 150 );
+    EXPECT_EQ( m.getElement( 0, 0 ),  11 );
+    EXPECT_EQ( m.getElement( 0, 1 ),  11 );
+    EXPECT_EQ( m.getElement( 0, 2 ),  11 );
+    EXPECT_EQ( m.getElement( 0, 3 ),  11 );
+    EXPECT_EQ( m.getElement( 0, 4 ),   0 );
+    
+    EXPECT_EQ( m.getElement( 1, 0 ),  28 );
+    EXPECT_EQ( m.getElement( 1, 1 ),  29 );
+    EXPECT_EQ( m.getElement( 1, 2 ),  30 );
+    EXPECT_EQ( m.getElement( 1, 3 ),  31 );
+    EXPECT_EQ( m.getElement( 1, 4 ),  10 );
+    
+    EXPECT_EQ( m.getElement( 2, 0 ),  55 );
+    EXPECT_EQ( m.getElement( 2, 1 ),  57 );
+    EXPECT_EQ( m.getElement( 2, 2 ),  59 );
+    EXPECT_EQ( m.getElement( 2, 3 ),  61 );
+    EXPECT_EQ( m.getElement( 2, 4 ),  30 );
+    
+    EXPECT_EQ( m.getElement( 3, 0 ),  92 );
+    EXPECT_EQ( m.getElement( 3, 1 ),  95 );
+    EXPECT_EQ( m.getElement( 3, 2 ),  98 );
+    EXPECT_EQ( m.getElement( 3, 3 ), 101 );
+    EXPECT_EQ( m.getElement( 3, 4 ),  60 );
+    
+    EXPECT_EQ( m.getElement( 4, 0 ), 139 );
+    EXPECT_EQ( m.getElement( 4, 1 ), 143 );
+    EXPECT_EQ( m.getElement( 4, 2 ), 147 );
+    EXPECT_EQ( m.getElement( 4, 3 ), 151 );
+    EXPECT_EQ( m.getElement( 4, 4 ), 100 );
+    
+    EXPECT_EQ( m.getElement( 5, 0 ), 196 );
+    EXPECT_EQ( m.getElement( 5, 1 ), 201 );
+    EXPECT_EQ( m.getElement( 5, 2 ), 206 );
+    EXPECT_EQ( m.getElement( 5, 3 ), 211 );
+    EXPECT_EQ( m.getElement( 5, 4 ), 150 );
 }
 
 template< typename Matrix >
@@ -1263,8 +1239,6 @@ void test_SaveAndLoad()
     EXPECT_EQ( savedMatrix.getElement( 3, 1 ), 14 );
     EXPECT_EQ( savedMatrix.getElement( 3, 2 ), 15 );
     EXPECT_EQ( savedMatrix.getElement( 3, 3 ), 16 );
-    
-    std::cout << "\nThis will create a file called '" << TEST_FILE_NAME << "' (of the matrix created in the test function), in .../tnl-dev/Debug/bin/\n\n";
 }
 
 template< typename Matrix >
@@ -1432,12 +1406,12 @@ TYPED_TEST( MatrixTest, addRowTest )
     test_AddRow< MatrixType >();
 }
 
-/*TYPED_TEST( MatrixTest, vectorProductTest )
+TYPED_TEST( MatrixTest, vectorProductTest )
 {
     using MatrixType = typename TestFixture::MatrixType;
     
     test_VectorProduct< MatrixType >();
-}*/
+}
 
 TYPED_TEST( MatrixTest, addMatrixTest )
 {
-- 
GitLab


From 65d9b74cec3690588bce4dd3786c8a84e81565bc Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Mon, 6 Jan 2020 14:54:04 +0100
Subject: [PATCH 067/179] I

---
 src/TNL/Containers/Segments/CSR.hpp           |   3 +-
 src/TNL/Containers/Segments/SlicedEllpack.hpp |   6 +-
 src/TNL/Matrices/Dense.h                      |   6 +
 src/TNL/Matrices/Dense.hpp                    |  27 +++++
 src/TNL/Matrices/SparseMatrix.h               |   7 ++
 src/TNL/Matrices/SparseMatrix.hpp             | 107 +++++++++++++++++-
 src/UnitTests/Matrices/SparseMatrixCopyTest.h |  85 +++++++++++++-
 7 files changed, 234 insertions(+), 7 deletions(-)

diff --git a/src/TNL/Containers/Segments/CSR.hpp b/src/TNL/Containers/Segments/CSR.hpp
index 8b8ddfff5..971754b5a 100644
--- a/src/TNL/Containers/Segments/CSR.hpp
+++ b/src/TNL/Containers/Segments/CSR.hpp
@@ -225,8 +225,9 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red
       const IndexType end = offsetsView[ i + 1 ];
       RealType aux( zero );
       bool compute( true );
+      IndexType localIdx( 0 );
       for( IndexType j = begin; j < end && compute; j++  )
-         reduction( aux, fetch( i, j, compute, args... ) );
+         reduction( aux, fetch( i, localIdx++, j, compute, args... ) );
       keeper( i, aux );
    };
    Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
diff --git a/src/TNL/Containers/Segments/SlicedEllpack.hpp b/src/TNL/Containers/Segments/SlicedEllpack.hpp
index 76790f393..31f417df2 100644
--- a/src/TNL/Containers/Segments/SlicedEllpack.hpp
+++ b/src/TNL/Containers/Segments/SlicedEllpack.hpp
@@ -354,8 +354,9 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red
          const IndexType end = begin + segmentSize;
          RealType aux( zero );
          bool compute( true );
+         IndexType localIdx( 0 );
          for( IndexType globalIdx = begin; globalIdx< end; globalIdx++  )
-            reduction( aux, fetch( segmentIdx, globalIdx, compute, args... ) );
+            reduction( aux, fetch( segmentIdx, localIdx++, globalIdx, compute, args... ) );
          keeper( segmentIdx, aux );
       };
       Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
@@ -370,8 +371,9 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red
          const IndexType end = sliceOffsets_view[ sliceIdx + 1 ];
          RealType aux( zero );
          bool compute( true );
+         IndexType localIdx( 0 );
          for( IndexType globalIdx = begin; globalIdx < end; globalIdx += SliceSize  )
-            reduction( aux, fetch( segmentIdx, globalIdx, compute, args... ) );
+            reduction( aux, fetch( segmentIdx, localIdx++, globalIdx, compute, args... ) );
          keeper( segmentIdx, aux );
       };
       Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
diff --git a/src/TNL/Matrices/Dense.h b/src/TNL/Matrices/Dense.h
index 90aa57170..2e283c9e8 100644
--- a/src/TNL/Matrices/Dense.h
+++ b/src/TNL/Matrices/Dense.h
@@ -183,6 +183,12 @@ class Dense : public Matrix< Real, Device, Index >
                 typename = typename Enabler< Device2 >::type >
       Dense& operator=( const Dense< Real2, Device2, Index2 >& matrix );
 
+      template< typename Real_, typename Device_, typename Index_, typename RealAllocator_ >
+      bool operator==( const Dense< Real_, Device_, Index_, RowMajorOrder >& matrix ) const;
+
+      template< typename Real_, typename Device_, typename Index_, typename RealAllocator_ >
+      bool operator!=( const Dense< Real_, Device_, Index_, RowMajorOrder >& matrix ) const;
+      
       void save( const String& fileName ) const;
 
       void load( const String& fileName );
diff --git a/src/TNL/Matrices/Dense.hpp b/src/TNL/Matrices/Dense.hpp
index fe11d6759..ecd5aec1c 100644
--- a/src/TNL/Matrices/Dense.hpp
+++ b/src/TNL/Matrices/Dense.hpp
@@ -994,6 +994,33 @@ Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::operator=( const Den
    throw Exceptions::NotImplementedError("Cross-device assignment for the Dense format is not implemented yet.");
 }
 
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+   template< typename Real_, typename Device_, typename Index_, typename RealAllocator_ >
+bool
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
+operator==( const Dense< Real_, Device_, Index_, RowMajorOrder >& matrix ) const
+{
+   return( this->getRows() == matrix.getRows() &&
+           this->getColumns() == matrix.getColumns() &&
+           this->getValues() == matrix.getValues() );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+   template< typename Real_, typename Device_, typename Index_, typename RealAllocator_ >
+bool
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
+operator!=( const Dense< Real_, Device_, Index_, RowMajorOrder >& matrix ) const
+{
+   return ! ( *this == matrix );
+}
 
 template< typename Real,
           typename Device,
diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h
index c50f71612..75d917928 100644
--- a/src/TNL/Matrices/SparseMatrix.h
+++ b/src/TNL/Matrices/SparseMatrix.h
@@ -175,6 +175,13 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
        */
       SparseMatrix& operator=( const SparseMatrix& matrix );
 
+      /**
+       * \brief Assignment of dense matrix
+       */
+      template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder, typename RealAllocator_ >
+      SparseMatrix& operator=( const Dense< Real_, Device_, Index_, RowMajorOrder, RealAllocator_ >& matrix );
+      
+      
       /**
        * \brief Assignment of any other matrix type.
        * @param matrix
diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp
index 6c0655ce0..d38b9de34 100644
--- a/src/TNL/Matrices/SparseMatrix.hpp
+++ b/src/TNL/Matrices/SparseMatrix.hpp
@@ -183,7 +183,7 @@ getCompressedRowLengths( Vector& rowLengths ) const
    rowLengths.setSize( this->getRows() );
    rowLengths = 0;
    auto rowLengths_view = rowLengths.getView();
-   auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, const RealType& value ) -> IndexType {
+   auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, IndexType globalIdx, const RealType& value ) -> IndexType {
       return ( value != 0.0 );
    };
    auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) {
@@ -448,7 +448,7 @@ rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Ke
    const auto columns_view = this->columnIndexes.getConstView();
    const auto values_view = this->values.getConstView();
    const IndexType paddingIndex_ = this->getPaddingIndex();
-   auto fetch_ = [=] __cuda_callable__ ( IndexType rowIdx, IndexType globalIdx, bool& compute ) mutable -> decltype( fetch( IndexType(), IndexType(), RealType() ) ) {
+   auto fetch_ = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx, bool& compute ) mutable -> decltype( fetch( IndexType(), IndexType(), RealType() ) ) {
       IndexType columnIdx = columns_view[ globalIdx ];
       if( columnIdx != paddingIndex_ )
          return fetch( rowIdx, columnIdx, values_view[ globalIdx ] );
@@ -615,7 +615,108 @@ operator=( const SparseMatrix& matrix )
    return *this;
 }
 
-// cross-device copy assignment
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder, typename RealAllocator_ >
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >&
+SparseMatrix& operator=( const Dense< Real_, Device_, Index_, RowMajorOrder, RealAllocator_ >& matrix )
+{
+   using RHSMatrix = Dense< Real_, Device_, Index_, RowMajorOrder, RealAllocator_ >;
+   using RHSIndexType = typename RHSMatrix::IndexType;
+   using RHSRealType = typename RHSMatrix::RealType;
+   using RHSDeviceType = typename RHSMatrix::DeviceType;
+   using RHSRealAllocatorType = typename RHSMatrix::RealAllocatorType;
+
+   typename RHSMatrix::RowsCapacitiesType rowLengths;
+   matrix.getCompressedRowLengths( rowLengths );
+   this->setDimensions( matrix.getRows(), matrix.getColumns() );
+   this->setCompressedRowLengths( rowLengths );
+
+   // TODO: use getConstView when it works
+   const auto matrixView = const_cast< RHSMatrix& >( matrix ).getView();
+   const IndexType paddingIndex = this->getPaddingIndex();
+   auto columns_view = this->columnIndexes.getView();
+   auto values_view = this->values.getView();
+   columns_view = paddingIndex;
+
+   if( std::is_same< DeviceType, RHSDeviceType >::value )
+   {
+      const auto this_segments_view = this->segments.getView();
+      const auto segments_view = this->segments.getView();
+      auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIndex, const RHSRealType& value ) mutable {
+         if( columnIndex != paddingIndex )
+         {
+            IndexType thisGlobalIdx = segments_view.getGlobalIndex( rowIdx, localIdx );
+            columns_view[ thisGlobalIdx ] = columnIndex;
+            values_view[ thisGlobalIdx ] = value;
+         }
+      };
+      matrix.forAllRows( f );
+   }
+   else
+   {
+      const IndexType maxRowLength = max( rowLengths );
+      const IndexType bufferRowsCount( 128 );
+      const size_t bufferSize = bufferRowsCount * maxRowLength;
+      Containers::Vector< RHSRealType, RHSDeviceType, RHSIndexType, RHSRealAllocatorType > matrixValuesBuffer( bufferSize );
+      Containers::Vector< RHSIndexType, RHSDeviceType, RHSIndexType, RHSIndexAllocatorType > matrixColumnsBuffer( bufferSize );
+      Containers::Vector< RealType, DeviceType, IndexType, RealAllocatorType > thisValuesBuffer( bufferSize );
+      Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocatorType > thisColumnsBuffer( bufferSize );
+      auto matrixValuesBuffer_view = matrixValuesBuffer.getView();
+      auto matrixColumnsBuffer_view = matrixColumnsBuffer.getView();
+      auto thisValuesBuffer_view = thisValuesBuffer.getView();
+      auto thisColumnsBuffer_view = thisColumnsBuffer.getView();
+
+      IndexType baseRow( 0 );
+      const IndexType rowsCount = this->getRows();
+      while( baseRow < rowsCount )
+      {
+         const IndexType lastRow = min( baseRow + bufferRowsCount, rowsCount );
+         thisColumnsBuffer = paddingIndex;
+         matrixColumnsBuffer_view = paddingIndex;
+
+         ////
+         // Copy matrix elements into buffer
+         auto f1 = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIndex, const RHSRealType& value ) mutable {
+            if( columnIndex != paddingIndex )
+            {
+               const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx;
+               matrixColumnsBuffer_view[ bufferIdx ] = columnIndex;
+               matrixValuesBuffer_view[ bufferIdx ] = value;
+            }
+         };
+         matrix.forRows( baseRow, lastRow, f1 );
+
+         ////
+         // Copy the source matrix buffer to this matrix buffer
+         thisValuesBuffer_view = matrixValuesBuffer_view;
+         thisColumnsBuffer_view = matrixColumnsBuffer_view;
+
+         ////
+         // Copy matrix elements from the buffer to the matrix
+         auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType& columnIndex, RealType& value  ) mutable {
+            const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx;
+            const IndexType column = thisColumnsBuffer_view[ bufferIdx ];
+            if( column != paddingIndex )
+            {
+               columnIndex = column;
+               value = thisValuesBuffer_view[ bufferIdx ];
+            }
+         };
+         this->forRows( baseRow, lastRow, f2 );
+         baseRow += bufferRowsCount;
+      }
+      //std::cerr << "This matrix = " << std::endl << *this << std::endl;
+   }
+   return *this;
+   
+}
+
 template< typename Real,
           typename Device,
           typename Index,
diff --git a/src/UnitTests/Matrices/SparseMatrixCopyTest.h b/src/UnitTests/Matrices/SparseMatrixCopyTest.h
index f00daf1f3..6c4f8b261 100644
--- a/src/UnitTests/Matrices/SparseMatrixCopyTest.h
+++ b/src/UnitTests/Matrices/SparseMatrixCopyTest.h
@@ -14,6 +14,7 @@
 
 #include <TNL/Matrices/SparseMatrix.h>
 #include <TNL/Matrices/MatrixType.h>
+#include <TNL/Matrices/Dense.h>
 #include <TNL/Containers/Segments/CSR.h>
 #include <TNL/Containers/Segments/Ellpack.h>
 #include <TNL/Containers/Segments/SlicedEllpack.h>
@@ -436,6 +437,55 @@ void testConversion()
    }
 }
 
+template< typename Matrix >
+void denseMatrixAssignment()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+   
+   using DenseHost = TNL::Matrices::Dense< RealType, TNL::Devices::Host, IndexType >;
+   using DenseCuda = TNL::Matrices::Dense< RealType, TNL::Devices::Cuda, IndexType >;
+   
+   const IndexType rows( 10 ), columns( 10 );
+   DenseHost hostMatrix( rows, columns );
+   for( IndexType i = 0; i < columns; i++ )
+      for( IndexType j = 0; j <= i; j++ )
+         hostMatrix( i, j ) = i + j;
+   
+   Matrix matrix;
+   matrix = hostMatrix;
+   using RowCapacitiesType = typename Matrix::RowsCapacitiesType;
+   RowCapacitiesType rowCapacities;
+   matrix.getCompressedRowLengths( rowCapacities );
+   RowCapacitiesType exactRowLengths{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
+   EXPECT_EQ( rowCapacities, exactRowLengths );
+   for( IndexType i = 0; i < columns; i++ )
+      for( IndexType j = 0; j < rows; j++ )
+      {
+         if( j > i )
+            EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
+         else
+            EXPECT_EQ( matrix.getElement( i, j ), i + j );
+      }
+   
+#ifdef HAVE_CUDA
+   DenseCuda cudaMatrix;
+   cudaMatrix = hostMatrix;
+   matrix = cudaMatrix;
+   matrix.getCompressedRowLengths( rowCapacities );
+   EXPECT_EQ( rowCapacities, exactRowLengths );
+   for( IndexType i = 0; i < columns; i++ )
+      for( IndexType j = 0; j < rows; j++ )
+      {
+         if( j > i )
+            EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
+         else
+            EXPECT_EQ( matrix.getElement( i, j ), i + j );
+      }
+#endif
+}
+
 TEST( SparseMatrixCopyTest, CSR_HostToHost )
 {
    testCopyAssignment< CSR_host, CSR_host >();
@@ -568,6 +618,39 @@ TEST( SparseMatrixCopyTest, SlicedEllpack_to_Ellpack_cuda )
 }
 #endif
 
-#endif
+// Dense matrix assignment test
+TEST( SparseMatrixCopyTest, DenseMatrixAssignment_to_CSR_host )
+{
+   denseMatrixAssignment< CSR_host >();
+}
+
+TEST( SparseMatrixCopyTest, DenseMatrixAssignment_to_Ellpack_host )
+{
+   denseMatrixAssignment< E_host >();
+}
+
+TEST( SparseMatrixCopyTest, DenseMatrixAssignment_to_SlicedEllpack_host )
+{
+   denseMatrixAssignment< SE_host >();
+}
+
+#ifdef HAVE_CUDA
+TEST( SparseMatrixCopyTest, DenseMatrixAssignment_to_CSR_cuda )
+{
+   denseMatrixAssignment< CSR_cuda >();
+}
+
+TEST( SparseMatrixCopyTest, DenseMatrixAssignment_to_Ellpack_cuda )
+{
+   denseMatrixAssignment< E_cuda >();
+}
+
+TEST( SparseMatrixCopyTest, DenseMatrixAssignment_to_SlicedEllpack_cuda )
+{
+   denseMatrixAssignment< SE_cuda >();
+}
+#endif // HAVE_CUDA
+
+#endif //HAVE_GTEST
 
 #include "../main.h"
-- 
GitLab


From ad33eecb09bf3f85f18eeb921e8a2075203d3ec1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Mon, 6 Jan 2020 21:20:22 +0100
Subject: [PATCH 068/179] Implemented dense to sparse matrix assignement.

---
 src/TNL/Containers/Segments/CSR.hpp           |  2 +-
 src/TNL/Containers/Segments/SlicedEllpack.hpp |  4 +-
 src/TNL/Matrices/Dense.h                      | 16 +----
 src/TNL/Matrices/Dense.hpp                    | 40 ++++--------
 src/TNL/Matrices/SparseMatrix.h               |  1 +
 src/TNL/Matrices/SparseMatrix.hpp             | 62 ++++++++++---------
 src/TNL/Matrices/SparseMatrixView.hpp         |  2 +-
 src/UnitTests/Matrices/SparseMatrixCopyTest.h | 18 +++---
 8 files changed, 66 insertions(+), 79 deletions(-)

diff --git a/src/TNL/Containers/Segments/CSR.hpp b/src/TNL/Containers/Segments/CSR.hpp
index 971754b5a..3581748fa 100644
--- a/src/TNL/Containers/Segments/CSR.hpp
+++ b/src/TNL/Containers/Segments/CSR.hpp
@@ -218,7 +218,7 @@ void
 CSR< Device, Index, IndexAllocator >::
 segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
 {
-   using RealType = decltype( fetch( IndexType(), IndexType(), std::declval< bool& >(), args... ) );
+   using RealType = decltype( fetch( IndexType(), IndexType(), IndexType(), std::declval< bool& >(), args... ) );
    const auto offsetsView = this->offsets.getConstView();
    auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable {
       const IndexType begin = offsetsView[ i ];
diff --git a/src/TNL/Containers/Segments/SlicedEllpack.hpp b/src/TNL/Containers/Segments/SlicedEllpack.hpp
index 31f417df2..62e2ca7d5 100644
--- a/src/TNL/Containers/Segments/SlicedEllpack.hpp
+++ b/src/TNL/Containers/Segments/SlicedEllpack.hpp
@@ -127,7 +127,7 @@ setSegmentsSizes( const SizesHolder& sizes )
    const auto sizes_view = sizes.getConstView();
    auto slices_view = this->sliceOffsets.getView();
    auto slice_segment_size_view = this->sliceSegmentSizes.getView();
-   auto fetch = [=] __cuda_callable__ ( IndexType segmentIdx, IndexType globalIdx, bool& compute ) -> IndexType {
+   auto fetch = [=] __cuda_callable__ ( IndexType segmentIdx, IndexType localIdx, IndexType globalIdx, bool& compute ) -> IndexType {
       if( globalIdx < _size )
          return sizes_view[ globalIdx ];
       return 0;
@@ -341,7 +341,7 @@ void
 SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >::
 segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
 {
-   using RealType = decltype( fetch( IndexType(), IndexType(), std::declval< bool& >(), args... ) );
+   using RealType = decltype( fetch( IndexType(), IndexType(), IndexType(), std::declval< bool& >(), args... ) );
    const auto sliceSegmentSizes_view = this->sliceSegmentSizes.getConstView();
    const auto sliceOffsets_view = this->sliceOffsets.getConstView();
    if( RowMajorOrder )
diff --git a/src/TNL/Matrices/Dense.h b/src/TNL/Matrices/Dense.h
index 2e283c9e8..757fa4eae 100644
--- a/src/TNL/Matrices/Dense.h
+++ b/src/TNL/Matrices/Dense.h
@@ -30,15 +30,6 @@ template< typename Real = double,
           typename RealAllocator = typename Allocators::Default< Device >::template Allocator< Real > >
 class Dense : public Matrix< Real, Device, Index >
 {
-   private:
-      // convenient template alias for controlling the selection of copy-assignment operator
-      template< typename Device2 >
-      using Enabler = std::enable_if< ! std::is_same< Device2, Device >::value >;
-
-      // friend class will be needed for templated assignment operators
-      //template< typename Real2, typename Device2, typename Index2 >
-      //friend class Dense;
-
    public:
       using RealType = Real;
       using DeviceType = Device;
@@ -176,12 +167,11 @@ class Dense : public Matrix< Real, Device, Index >
                                 const RealType& omega = 1.0 ) const;
 
       // copy assignment
-      Dense& operator=( const Dense& matrix );
+      //Dense& operator=( const Dense& matrix );
 
       // cross-device copy assignment
-      template< typename Real2, typename Device2, typename Index2,
-                typename = typename Enabler< Device2 >::type >
-      Dense& operator=( const Dense< Real2, Device2, Index2 >& matrix );
+      template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAlocator_ >
+      Dense& operator=( const Dense< Real_, Device_, Index_, RowMajorOrder_, RealAlocator_ >& matrix );
 
       template< typename Real_, typename Device_, typename Index_, typename RealAllocator_ >
       bool operator==( const Dense< Real_, Device_, Index_, RowMajorOrder >& matrix ) const;
diff --git a/src/TNL/Matrices/Dense.hpp b/src/TNL/Matrices/Dense.hpp
index ecd5aec1c..7517c6b0e 100644
--- a/src/TNL/Matrices/Dense.hpp
+++ b/src/TNL/Matrices/Dense.hpp
@@ -373,7 +373,7 @@ forRows( IndexType first, IndexType last, Function& function ) const
 {
    const auto values_view = this->values.getConstView();
    auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx ) mutable -> bool {
-      function( rowIdx, columnIdx, values_view[ globalIdx ] );
+      function( rowIdx, columnIdx, globalIdx, values_view[ globalIdx ] );
       return true;
    };
    this->segments.forSegments( first, last, f );
@@ -959,39 +959,25 @@ void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::performSORItera
    x[ row ] = ( 1.0 - omega ) * x[ row ] + omega / diagonalValue * ( b[ row ] - sum );
 }
 
-
-// copy assignment
 template< typename Real,
           typename Device,
           typename Index,
           bool RowMajorOrder,
           typename RealAllocator >
+   template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ >
 Dense< Real, Device, Index, RowMajorOrder, RealAllocator >&
-Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::operator=( const Dense& matrix )
-{
-   this->setLike( matrix );
-   this->values = matrix.values;
-   return *this;
-}
-
-// cross-device copy assignment
-template< typename Real,
-          typename Device,
-          typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
-   template< typename Real2, typename Device2, typename Index2, typename >
-Dense< Real, Device, Index, RowMajorOrder, RealAllocator >&
-Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::operator=( const Dense< Real2, Device2, Index2 >& matrix )
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
+operator=( const Dense< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix )
 {
-   static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Cuda >::value,
-                  "unknown device" );
-   static_assert( std::is_same< Device2, Devices::Host >::value || std::is_same< Device2, Devices::Cuda >::value,
-                  "unknown device" );
-
-   this->setLike( matrix );
-
-   throw Exceptions::NotImplementedError("Cross-device assignment for the Dense format is not implemented yet.");
+   if( RowMajorOrder == RowMajorOrder_ )
+   {
+      this->setLike( matrix );
+      this->values = matrix.getValues();
+   }
+   else
+   {
+      
+   }
 }
 
 template< typename Real,
diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h
index 75d917928..44883a124 100644
--- a/src/TNL/Matrices/SparseMatrix.h
+++ b/src/TNL/Matrices/SparseMatrix.h
@@ -16,6 +16,7 @@
 #include <TNL/Containers/Segments/CSR.h>
 #include <TNL/Matrices/SparseMatrixRowView.h>
 #include <TNL/Matrices/SparseMatrixView.h>
+#include <TNL/Matrices/Dense.h>
 
 namespace TNL {
 namespace Matrices {
diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp
index d38b9de34..6aa75995f 100644
--- a/src/TNL/Matrices/SparseMatrix.hpp
+++ b/src/TNL/Matrices/SparseMatrix.hpp
@@ -448,10 +448,10 @@ rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Ke
    const auto columns_view = this->columnIndexes.getConstView();
    const auto values_view = this->values.getConstView();
    const IndexType paddingIndex_ = this->getPaddingIndex();
-   auto fetch_ = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx, bool& compute ) mutable -> decltype( fetch( IndexType(), IndexType(), RealType() ) ) {
+   auto fetch_ = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx, bool& compute ) mutable -> decltype( fetch( IndexType(), IndexType(), IndexType(), RealType() ) ) {
       IndexType columnIdx = columns_view[ globalIdx ];
       if( columnIdx != paddingIndex_ )
-         return fetch( rowIdx, columnIdx, values_view[ globalIdx ] );
+         return fetch( rowIdx, columnIdx, globalIdx, values_view[ globalIdx ] );
       return zero;
    };
    this->segments.segmentsReduction( first, last, fetch_, reduce, keep, zero );
@@ -594,7 +594,7 @@ performSORIteration( const Vector1& b,
                      Vector2& x,
                      const RealType& omega ) const
 {
-
+   return false;
 }
 
 // copy assignment
@@ -624,7 +624,8 @@ template< typename Real,
           typename IndexAllocator >
    template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder, typename RealAllocator_ >
 SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >&
-SparseMatrix& operator=( const Dense< Real_, Device_, Index_, RowMajorOrder, RealAllocator_ >& matrix )
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
+operator=( const Dense< Real_, Device_, Index_, RowMajorOrder, RealAllocator_ >& matrix )
 {
    using RHSMatrix = Dense< Real_, Device_, Index_, RowMajorOrder, RealAllocator_ >;
    using RHSIndexType = typename RHSMatrix::IndexType;
@@ -632,27 +633,29 @@ SparseMatrix& operator=( const Dense< Real_, Device_, Index_, RowMajorOrder, Rea
    using RHSDeviceType = typename RHSMatrix::DeviceType;
    using RHSRealAllocatorType = typename RHSMatrix::RealAllocatorType;
 
-   typename RHSMatrix::RowsCapacitiesType rowLengths;
+   Containers::Vector< RHSIndexType, RHSDeviceType, RHSIndexType > rowLengths;
    matrix.getCompressedRowLengths( rowLengths );
-   this->setDimensions( matrix.getRows(), matrix.getColumns() );
+   this->setLike( matrix );
    this->setCompressedRowLengths( rowLengths );
+   Containers::Vector< IndexType, DeviceType, IndexType > rowLocalIndexes( matrix.getRows() );
+   rowLocalIndexes = 0;
 
    // TODO: use getConstView when it works
    const auto matrixView = const_cast< RHSMatrix& >( matrix ).getView();
    const IndexType paddingIndex = this->getPaddingIndex();
    auto columns_view = this->columnIndexes.getView();
    auto values_view = this->values.getView();
+   auto rowLocalIndexes_view = rowLocalIndexes.getView();
    columns_view = paddingIndex;
 
    if( std::is_same< DeviceType, RHSDeviceType >::value )
    {
-      const auto this_segments_view = this->segments.getView();
       const auto segments_view = this->segments.getView();
-      auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIndex, const RHSRealType& value ) mutable {
-         if( columnIndex != paddingIndex )
+      auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType columnIdx, RHSIndexType globalIndex, const RHSRealType& value ) mutable {
+         if( value != 0.0 )
          {
-            IndexType thisGlobalIdx = segments_view.getGlobalIndex( rowIdx, localIdx );
-            columns_view[ thisGlobalIdx ] = columnIndex;
+            IndexType thisGlobalIdx = segments_view.getGlobalIndex( rowIdx, rowLocalIndexes_view[ rowIdx ]++ );
+            columns_view[ thisGlobalIdx ] = columnIdx;
             values_view[ thisGlobalIdx ] = value;
          }
       };
@@ -660,15 +663,13 @@ SparseMatrix& operator=( const Dense< Real_, Device_, Index_, RowMajorOrder, Rea
    }
    else
    {
-      const IndexType maxRowLength = max( rowLengths );
+      const IndexType maxRowLength = matrix.getColumns();
       const IndexType bufferRowsCount( 128 );
       const size_t bufferSize = bufferRowsCount * maxRowLength;
       Containers::Vector< RHSRealType, RHSDeviceType, RHSIndexType, RHSRealAllocatorType > matrixValuesBuffer( bufferSize );
-      Containers::Vector< RHSIndexType, RHSDeviceType, RHSIndexType, RHSIndexAllocatorType > matrixColumnsBuffer( bufferSize );
       Containers::Vector< RealType, DeviceType, IndexType, RealAllocatorType > thisValuesBuffer( bufferSize );
       Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocatorType > thisColumnsBuffer( bufferSize );
       auto matrixValuesBuffer_view = matrixValuesBuffer.getView();
-      auto matrixColumnsBuffer_view = matrixColumnsBuffer.getView();
       auto thisValuesBuffer_view = thisValuesBuffer.getView();
       auto thisColumnsBuffer_view = thisColumnsBuffer.getView();
 
@@ -678,34 +679,40 @@ SparseMatrix& operator=( const Dense< Real_, Device_, Index_, RowMajorOrder, Rea
       {
          const IndexType lastRow = min( baseRow + bufferRowsCount, rowsCount );
          thisColumnsBuffer = paddingIndex;
-         matrixColumnsBuffer_view = paddingIndex;
 
          ////
          // Copy matrix elements into buffer
          auto f1 = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIndex, const RHSRealType& value ) mutable {
-            if( columnIndex != paddingIndex )
-            {
-               const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx;
-               matrixColumnsBuffer_view[ bufferIdx ] = columnIndex;
-               matrixValuesBuffer_view[ bufferIdx ] = value;
-            }
+            const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx;
+            matrixValuesBuffer_view[ bufferIdx ] = value;
          };
          matrix.forRows( baseRow, lastRow, f1 );
 
          ////
          // Copy the source matrix buffer to this matrix buffer
          thisValuesBuffer_view = matrixValuesBuffer_view;
-         thisColumnsBuffer_view = matrixColumnsBuffer_view;
 
          ////
          // Copy matrix elements from the buffer to the matrix
+         const IndexType matrix_columns = this->getColumns();
          auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType& columnIndex, RealType& value  ) mutable {
-            const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx;
-            const IndexType column = thisColumnsBuffer_view[ bufferIdx ];
-            if( column != paddingIndex )
+            RealType inValue( 0.0 );
+            IndexType bufferIdx, column( rowLocalIndexes_view[ rowIdx ] );
+            while( inValue == 0.0 && column < matrix_columns )
             {
-               columnIndex = column;
-               value = thisValuesBuffer_view[ bufferIdx ];
+               bufferIdx = ( rowIdx - baseRow ) * maxRowLength + column++;
+               inValue = thisValuesBuffer_view[ bufferIdx ];
+            }
+            rowLocalIndexes_view[ rowIdx ] = column;
+            if( inValue == 0.0 )
+            {
+               columnIndex = paddingIndex;
+               value = 0.0;
+            }
+            else
+            {
+               columnIndex = column - 1;
+               value = inValue;
             }
          };
          this->forRows( baseRow, lastRow, f2 );
@@ -749,7 +756,6 @@ operator=( const RHSMatrix& matrix )
 
    if( std::is_same< DeviceType, RHSDeviceType >::value )
    {
-      const auto this_segments_view = this->segments.getView();
       const auto segments_view = this->segments.getView();
       auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIndex, const RHSRealType& value ) mutable {
          if( columnIndex != paddingIndex )
diff --git a/src/TNL/Matrices/SparseMatrixView.hpp b/src/TNL/Matrices/SparseMatrixView.hpp
index df136388e..d836fe5e9 100644
--- a/src/TNL/Matrices/SparseMatrixView.hpp
+++ b/src/TNL/Matrices/SparseMatrixView.hpp
@@ -494,7 +494,7 @@ performSORIteration( const Vector1& b,
                      Vector2& x,
                      const RealType& omega ) const
 {
-
+   return false;
 }
 
 template< typename Real,
diff --git a/src/UnitTests/Matrices/SparseMatrixCopyTest.h b/src/UnitTests/Matrices/SparseMatrixCopyTest.h
index 6c4f8b261..8677443b2 100644
--- a/src/UnitTests/Matrices/SparseMatrixCopyTest.h
+++ b/src/UnitTests/Matrices/SparseMatrixCopyTest.h
@@ -443,22 +443,22 @@ void denseMatrixAssignment()
    using RealType = typename Matrix::RealType;
    using DeviceType = typename Matrix::DeviceType;
    using IndexType = typename Matrix::IndexType;
-   
+
    using DenseHost = TNL::Matrices::Dense< RealType, TNL::Devices::Host, IndexType >;
    using DenseCuda = TNL::Matrices::Dense< RealType, TNL::Devices::Cuda, IndexType >;
-   
+
    const IndexType rows( 10 ), columns( 10 );
    DenseHost hostMatrix( rows, columns );
    for( IndexType i = 0; i < columns; i++ )
       for( IndexType j = 0; j <= i; j++ )
          hostMatrix( i, j ) = i + j;
-   
+
    Matrix matrix;
    matrix = hostMatrix;
    using RowCapacitiesType = typename Matrix::RowsCapacitiesType;
    RowCapacitiesType rowCapacities;
    matrix.getCompressedRowLengths( rowCapacities );
-   RowCapacitiesType exactRowLengths{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
+   RowCapacitiesType exactRowLengths{ 0, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
    EXPECT_EQ( rowCapacities, exactRowLengths );
    for( IndexType i = 0; i < columns; i++ )
       for( IndexType j = 0; j < rows; j++ )
@@ -468,10 +468,14 @@ void denseMatrixAssignment()
          else
             EXPECT_EQ( matrix.getElement( i, j ), i + j );
       }
-   
+
 #ifdef HAVE_CUDA
-   DenseCuda cudaMatrix;
-   cudaMatrix = hostMatrix;
+   DenseCuda cudaMatrix( rows, columns );
+   //cudaMatrix = hostMatrix;
+   for( IndexType i = 0; i < columns; i++ )
+      for( IndexType j = 0; j <= i; j++ )
+         cudaMatrix.setElement( i, j, i + j );
+
    matrix = cudaMatrix;
    matrix.getCompressedRowLengths( rowCapacities );
    EXPECT_EQ( rowCapacities, exactRowLengths );
-- 
GitLab


From fe457c9765fffac0090a51fb48600a9238c24857 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Mon, 6 Jan 2020 22:04:40 +0100
Subject: [PATCH 069/179] Fixed segments reduction lambda function parameters.

---
 src/TNL/Containers/Segments/CSRView.hpp            | 5 +++--
 src/TNL/Containers/Segments/EllpackView.hpp        | 8 +++++---
 src/TNL/Containers/Segments/SlicedEllpackView.hpp  | 8 +++++---
 src/TNL/Matrices/SparseMatrix.hpp                  | 6 +++---
 src/UnitTests/Containers/Segments/SegmentsTest.hpp | 2 +-
 src/UnitTests/Matrices/SparseMatrixTest.hpp        | 4 ++--
 6 files changed, 19 insertions(+), 14 deletions(-)

diff --git a/src/TNL/Containers/Segments/CSRView.hpp b/src/TNL/Containers/Segments/CSRView.hpp
index bbed8e3cb..2d2b58331 100644
--- a/src/TNL/Containers/Segments/CSRView.hpp
+++ b/src/TNL/Containers/Segments/CSRView.hpp
@@ -195,15 +195,16 @@ void
 CSRView< Device, Index >::
 segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
 {
-   using RealType = decltype( fetch( IndexType(), IndexType(), std::declval< bool& >(), args... ) );
+   using RealType = decltype( fetch( IndexType(), IndexType(), IndexType(), std::declval< bool& >(), args... ) );
    const auto offsetsView = this->offsets.getConstView();
    auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable {
       const IndexType begin = offsetsView[ i ];
       const IndexType end = offsetsView[ i + 1 ];
       RealType aux( zero );
+      IndexType localIdx( 0 );
       bool compute( true );
       for( IndexType j = begin; j < end && compute; j++  )
-         reduction( aux, fetch( i, j, compute, args... ) );
+         reduction( aux, fetch( i, localIdx++, j, compute, args... ) );
       keeper( i, aux );
    };
    Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
diff --git a/src/TNL/Containers/Segments/EllpackView.hpp b/src/TNL/Containers/Segments/EllpackView.hpp
index dc6bd485d..21be88654 100644
--- a/src/TNL/Containers/Segments/EllpackView.hpp
+++ b/src/TNL/Containers/Segments/EllpackView.hpp
@@ -245,7 +245,7 @@ void
 EllpackView< Device, Index, RowMajorOrder, Alignment >::
 segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
 {
-   using RealType = decltype( fetch( IndexType(), IndexType(), std::declval< bool& >(), args... ) );
+   using RealType = decltype( fetch( IndexType(), IndexType(), IndexType(), std::declval< bool& >(), args... ) );
    if( RowMajorOrder )
    {
       const IndexType segmentSize = this->segmentSize;
@@ -253,9 +253,10 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red
          const IndexType begin = i * segmentSize;
          const IndexType end = begin + segmentSize;
          RealType aux( zero );
+         IndexType localIdx( 0 );
          bool compute( true );
          for( IndexType j = begin; j < end && compute; j++  )
-            reduction( aux, fetch( i, j, compute, args... ) );
+            reduction( aux, fetch( i, localIdx++, j, compute, args... ) );
          keeper( i, aux );
       };
       Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
@@ -268,9 +269,10 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red
          const IndexType begin = i;
          const IndexType end = storageSize;
          RealType aux( zero );
+         IndexType localIdx( 0 );
          bool compute( true );
          for( IndexType j = begin; j < end && compute; j += alignedSize  )
-            reduction( aux, fetch( i, j, compute, args... ) );
+            reduction( aux, fetch( i, localIdx++, j, compute, args... ) );
          keeper( i, aux );
       };
       Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
diff --git a/src/TNL/Containers/Segments/SlicedEllpackView.hpp b/src/TNL/Containers/Segments/SlicedEllpackView.hpp
index 82570664f..5f9cbdee3 100644
--- a/src/TNL/Containers/Segments/SlicedEllpackView.hpp
+++ b/src/TNL/Containers/Segments/SlicedEllpackView.hpp
@@ -293,7 +293,7 @@ void
 SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >::
 segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
 {
-   using RealType = decltype( fetch( IndexType(), IndexType(), std::declval< bool& >(), args... ) );
+   using RealType = decltype( fetch( IndexType(), IndexType(), IndexType(), std::declval< bool& >(), args... ) );
    const auto sliceSegmentSizes_view = this->sliceSegmentSizes.getConstView();
    const auto sliceOffsets_view = this->sliceOffsets.getConstView();
    if( RowMajorOrder )
@@ -305,9 +305,10 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red
          const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx * segmentSize;
          const IndexType end = begin + segmentSize;
          RealType aux( zero );
+         IndexType localIdx( 0 );
          bool compute( true );
          for( IndexType globalIdx = begin; globalIdx< end; globalIdx++  )
-            reduction( aux, fetch( segmentIdx, globalIdx, compute, args... ) );
+            reduction( aux, fetch( segmentIdx, localIdx++, globalIdx, compute, args... ) );
          keeper( segmentIdx, aux );
       };
       Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
@@ -321,9 +322,10 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red
          const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx;
          const IndexType end = sliceOffsets_view[ sliceIdx + 1 ];
          RealType aux( zero );
+         IndexType localIdx( 0 );
          bool compute( true );
          for( IndexType globalIdx = begin; globalIdx < end; globalIdx += SliceSize  )
-            reduction( aux, fetch( segmentIdx, globalIdx, compute, args... ) );
+            reduction( aux, fetch( segmentIdx, localIdx++, globalIdx, compute, args... ) );
          keeper( segmentIdx, aux );
       };
       Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp
index 6aa75995f..8dbe53f4d 100644
--- a/src/TNL/Matrices/SparseMatrix.hpp
+++ b/src/TNL/Matrices/SparseMatrix.hpp
@@ -417,12 +417,12 @@ vectorProduct( const InVector& inVector,
    const auto valuesView = this->values.getConstView();
    const auto columnIndexesView = this->columnIndexes.getConstView();
    const IndexType paddingIndex = this->getPaddingIndex();
-   auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType offset, bool& compute ) -> RealType {
-      const IndexType column = columnIndexesView[ offset ];
+   auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType localIdx, IndexType globalIdx, bool& compute ) -> RealType {
+      const IndexType column = columnIndexesView[ globalIdx ];
       compute = ( column != paddingIndex );
       if( ! compute )
          return 0.0;
-      return valuesView[ offset ] * inVectorView[ column ];
+      return valuesView[ globalIdx ] * inVectorView[ column ];
    };
    auto reduction = [] __cuda_callable__ ( RealType& sum, const RealType& value ) {
       sum += value;
diff --git a/src/UnitTests/Containers/Segments/SegmentsTest.hpp b/src/UnitTests/Containers/Segments/SegmentsTest.hpp
index 6189c2e9a..8320fafe5 100644
--- a/src/UnitTests/Containers/Segments/SegmentsTest.hpp
+++ b/src/UnitTests/Containers/Segments/SegmentsTest.hpp
@@ -143,7 +143,7 @@ void test_AllReduction_MaximumInSegments()
 
    const auto v_view = v.getConstView();
    auto result_view = result.getView();
-   auto fetch = [=] __cuda_callable__ ( IndexType segmentIdx, IndexType globalIdx, bool& compute ) -> IndexType {
+   auto fetch = [=] __cuda_callable__ ( IndexType segmentIdx, IndexType localIdx, IndexType globalIdx, bool& compute ) -> IndexType {
       return v_view[ globalIdx ];
    };
    auto reduce = [] __cuda_callable__ ( IndexType& a, const IndexType b ) {
diff --git a/src/UnitTests/Matrices/SparseMatrixTest.hpp b/src/UnitTests/Matrices/SparseMatrixTest.hpp
index bf261aa84..b0a9fcb00 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest.hpp
+++ b/src/UnitTests/Matrices/SparseMatrixTest.hpp
@@ -1214,7 +1214,7 @@ void test_RowsReduction()
    // Compute number of non-zero elements in rows.
    typename Matrix::RowsCapacitiesType rowLengths( rows );
    auto rowLengths_view = rowLengths.getView();
-   auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, const RealType& value ) -> IndexType {
+   auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, IndexType globalIdx, const RealType& value ) -> IndexType {
       return ( value != 0.0 );
    };
    auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) {
@@ -1232,7 +1232,7 @@ void test_RowsReduction()
    // Compute max norm
    TNL::Containers::Vector< RealType, DeviceType, IndexType > rowSums( rows );
    auto rowSums_view = rowSums.getView();
-   auto max_fetch = [] __cuda_callable__ ( IndexType row, IndexType column, const RealType& value ) -> IndexType {
+   auto max_fetch = [] __cuda_callable__ ( IndexType row, IndexType column, IndexType globalIdx, const RealType& value ) -> IndexType {
       return abs( value );
    };
    auto max_reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) {
-- 
GitLab


From 50b1a44a6faecbedeb14e761dfb726281e5a2d7f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Mon, 6 Jan 2020 22:45:42 +0100
Subject: [PATCH 070/179] Added dense matrix assignment test.

---
 src/UnitTests/Matrices/DenseMatrixTest.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/UnitTests/Matrices/DenseMatrixTest.h b/src/UnitTests/Matrices/DenseMatrixTest.h
index 897861f7f..2ddd19c7a 100644
--- a/src/UnitTests/Matrices/DenseMatrixTest.h
+++ b/src/UnitTests/Matrices/DenseMatrixTest.h
@@ -1166,6 +1166,12 @@ void test_PerformSORIteration()
     EXPECT_EQ( xVector[ 3 ], 0.3671875 );
 }
 
+template< typename Matrix >
+void test_AssignmentOperator()
+{
+   EXPECT_EQ( 1, 0 );
+}
+
 template< typename Matrix >
 void test_SaveAndLoad()
 {
-- 
GitLab


From 74c5c158f7e76b7d6bad578de8ab4290715a9ca8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Tue, 7 Jan 2020 23:10:48 +0100
Subject: [PATCH 071/179] Implemented dense matrix assignment operator.

---
 src/TNL/Matrices/Dense.h                 |  3 +-
 src/TNL/Matrices/Dense.hpp               | 98 ++++++++++++++++++++++--
 src/UnitTests/Matrices/DenseMatrixTest.h | 51 +++++++++++-
 3 files changed, 143 insertions(+), 9 deletions(-)

diff --git a/src/TNL/Matrices/Dense.h b/src/TNL/Matrices/Dense.h
index 757fa4eae..9c05297d1 100644
--- a/src/TNL/Matrices/Dense.h
+++ b/src/TNL/Matrices/Dense.h
@@ -34,7 +34,8 @@ class Dense : public Matrix< Real, Device, Index >
       using RealType = Real;
       using DeviceType = Device;
       using IndexType = Index;
-      using BaseType = Matrix< Real, Device, Index >;
+      using RealAllocatorType = RealAllocator;
+      using BaseType = Matrix< Real, Device, Index, RealAllocator >;
       using ValuesType = typename BaseType::ValuesVector;
       using ValuesViewType = typename ValuesType::ViewType;
       using SegmentsType = Containers::Segments::Ellpack< DeviceType, IndexType, typename Allocators::Default< Device >::template Allocator< IndexType >, RowMajorOrder, 1 >;
diff --git a/src/TNL/Matrices/Dense.hpp b/src/TNL/Matrices/Dense.hpp
index 7517c6b0e..7a6c4becc 100644
--- a/src/TNL/Matrices/Dense.hpp
+++ b/src/TNL/Matrices/Dense.hpp
@@ -377,7 +377,6 @@ forRows( IndexType first, IndexType last, Function& function ) const
       return true;
    };
    this->segments.forSegments( first, last, f );
-
 }
 
 template< typename Real,
@@ -392,11 +391,10 @@ forRows( IndexType first, IndexType last, Function& function )
 {
    auto values_view = this->values.getView();
    auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx ) mutable -> bool {
-      function( rowIdx, columnIdx, values_view[ globalIdx ] );
+      function( rowIdx, columnIdx, globalIdx, values_view[ globalIdx ] );
       return true;
    };
    this->segments.forSegments( first, last, f );
-
 }
 
 template< typename Real,
@@ -959,6 +957,50 @@ void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::performSORItera
    x[ row ] = ( 1.0 - omega ) * x[ row ] + omega / diagonalValue * ( b[ row ] - sum );
 }
 
+/*template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >&
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
+operator=( const Dense< Real, Device, Index, RowMajorOrder, RealAllocator >& matrix )
+{
+   const IndexType bufferRowsCount( 128 );
+   const IndexType columns = this->getColumns();
+   const size_t bufferSize = bufferRowsCount * columns;
+   Containers::Vector< RealType, Device, IndexType, RealAllocatorType > sourceValuesBuffer( bufferSize );
+   Containers::Vector< RealType, DeviceType, IndexType, RealAllocatorType > destinationValuesBuffer( bufferSize );
+   auto sourceValuesBuffer_view = sourceValuesBuffer.getView();
+   auto destinationValuesBuffer_view = destinationValuesBuffer.getView();
+
+   IndexType baseRow( 0 );
+   const IndexType rowsCount = this->getRows();
+   while( baseRow < rowsCount )
+   {
+      const IndexType lastRow = min( baseRow + bufferRowsCount, rowsCount );
+
+      ////
+      // Copy matrix elements into buffer
+      auto f1 = [=] __cuda_callable__ ( Index rowIdx, Index columnIdx, Index globalIdx, const Real& value ) mutable {
+         const IndexType bufferIdx = ( rowIdx - baseRow ) * columns + columnIdx;
+         sourceValuesBuffer_view[ bufferIdx ] = value;
+      };
+      matrix.forRows( baseRow, lastRow, f1 );
+      destinationValuesBuffer = sourceValuesBuffer;
+
+      ////
+      // Copy buffer to this matrix
+      auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx, RealType& value ) mutable {
+         const IndexType bufferIdx = ( rowIdx - baseRow ) * columns + columnIdx;
+         value = destinationValuesBuffer_view[ bufferIdx ];
+      };
+      this->forRows( baseRow, lastRow, f2 );
+      baseRow += bufferRowsCount;
+   }
+   return *this;
+}*/
+
 template< typename Real,
           typename Device,
           typename Index,
@@ -969,15 +1011,57 @@ Dense< Real, Device, Index, RowMajorOrder, RealAllocator >&
 Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
 operator=( const Dense< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix )
 {
+   this->setLike( matrix );
    if( RowMajorOrder == RowMajorOrder_ )
-   {
-      this->setLike( matrix );
       this->values = matrix.getValues();
-   }
    else
    {
-      
+      if( std::is_same< DeviceType, Device_ >::value )
+      {
+         auto this_view = this->getView();
+         auto f = [=] __cuda_callable__ ( Index_ rowIdx, Index_ columnIdx, Index_ globalIdx, const Real_& value ) mutable {
+            this_view.getRow( rowIdx ).setElement( columnIdx, value );
+         };
+         matrix.forAllRows( f );
+      }
+      else
+      {
+         const IndexType bufferRowsCount( 128 );
+         const IndexType columns = this->getColumns();
+         const size_t bufferSize = bufferRowsCount * columns;
+         Containers::Vector< RealType, Device_, IndexType, RealAllocator_ > sourceValuesBuffer( bufferSize );
+         Containers::Vector< RealType, DeviceType, IndexType, RealAllocatorType > destinationValuesBuffer( bufferSize );
+         auto sourceValuesBuffer_view = sourceValuesBuffer.getView();
+         auto destinationValuesBuffer_view = destinationValuesBuffer.getView();
+
+         IndexType baseRow( 0 );
+         const IndexType rowsCount = this->getRows();
+         while( baseRow < rowsCount )
+         {
+            const IndexType lastRow = min( baseRow + bufferRowsCount, rowsCount );
+
+            ////
+            // Copy matrix elements into buffer
+            auto f1 = [=] __cuda_callable__ ( Index_ rowIdx, Index_ columnIdx, Index_ globalIdx, const Real_& value ) mutable {
+               const IndexType bufferIdx = ( rowIdx - baseRow ) * columns + columnIdx;
+               sourceValuesBuffer_view[ bufferIdx ] = value;
+            };
+            matrix.forRows( baseRow, lastRow, f1 );
+
+            destinationValuesBuffer = sourceValuesBuffer;
+
+            ////
+            // Copy buffer to this matrix
+            auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx, RealType& value ) mutable {
+               const IndexType bufferIdx = ( rowIdx - baseRow ) * columns + columnIdx;
+               value = destinationValuesBuffer_view[ bufferIdx ];
+            };
+            this->forRows( baseRow, lastRow, f2 );
+            baseRow += bufferRowsCount;
+         }
+      }
    }
+   return *this;
 }
 
 template< typename Real,
diff --git a/src/UnitTests/Matrices/DenseMatrixTest.h b/src/UnitTests/Matrices/DenseMatrixTest.h
index 2ddd19c7a..686602ebd 100644
--- a/src/UnitTests/Matrices/DenseMatrixTest.h
+++ b/src/UnitTests/Matrices/DenseMatrixTest.h
@@ -1169,9 +1169,51 @@ void test_PerformSORIteration()
 template< typename Matrix >
 void test_AssignmentOperator()
 {
-   EXPECT_EQ( 1, 0 );
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   using DenseHost = TNL::Matrices::Dense< RealType, TNL::Devices::Host, IndexType >;
+   using DenseCuda = TNL::Matrices::Dense< RealType, TNL::Devices::Cuda, IndexType >;
+
+   const IndexType rows( 10 ), columns( 10 );
+   DenseHost hostMatrix( rows, columns );
+   for( IndexType i = 0; i < columns; i++ )
+      for( IndexType j = 0; j <= i; j++ )
+         hostMatrix( i, j ) = i + j;
+
+   Matrix matrix( rows, columns );
+   matrix.getValues() = 0.0;
+   matrix = hostMatrix;
+   for( IndexType i = 0; i < columns; i++ )
+      for( IndexType j = 0; j < rows; j++ )
+      {
+         if( j > i )
+            EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
+         else
+            EXPECT_EQ( matrix.getElement( i, j ), i + j );
+      }
+
+#ifdef HAVE_CUDA
+   DenseCuda cudaMatrix( rows, columns );
+   for( IndexType i = 0; i < columns; i++ )
+      for( IndexType j = 0; j <= i; j++ )
+         cudaMatrix.setElement( i, j, i + j );
+
+   matrix.getValues() = 0.0;
+   matrix = cudaMatrix;
+   for( IndexType i = 0; i < columns; i++ )
+      for( IndexType j = 0; j < rows; j++ )
+      {
+         if( j > i )
+            EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
+         else
+            EXPECT_EQ( matrix.getElement( i, j ), i + j );
+      }
+#endif
 }
 
+
 template< typename Matrix >
 void test_SaveAndLoad()
 {
@@ -1426,6 +1468,13 @@ TYPED_TEST( MatrixTest, addMatrixTest )
     test_AddMatrix< MatrixType >();
 }
 
+TYPED_TEST( MatrixTest, assignmentOperatorTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_AssignmentOperator< MatrixType >();
+}
+
 TYPED_TEST( MatrixTest, saveAndLoadTest )
 {
     using MatrixType = typename TestFixture::MatrixType;
-- 
GitLab


From 09492f416df5bc2883246727b25e3eda67ddc544 Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Wed, 8 Jan 2020 15:23:13 +0100
Subject: [PATCH 072/179] Temporarily disabling few dense matrix unit tests.

---
 src/UnitTests/Matrices/DenseMatrixTest.h | 355 ++++++++++++-----------
 1 file changed, 178 insertions(+), 177 deletions(-)

diff --git a/src/UnitTests/Matrices/DenseMatrixTest.h b/src/UnitTests/Matrices/DenseMatrixTest.h
index 686602ebd..8c3132caf 100644
--- a/src/UnitTests/Matrices/DenseMatrixTest.h
+++ b/src/UnitTests/Matrices/DenseMatrixTest.h
@@ -26,7 +26,7 @@ using Dense_cuda_int = TNL::Matrices::Dense< int, TNL::Devices::Cuda, int >;
 
 static const char* TEST_FILE_NAME = "test_DenseMatrixTest.tnl";
 
-#ifdef HAVE_GTEST 
+#ifdef HAVE_GTEST
 #include <type_traits>
 
 #include <gtest/gtest.h>
@@ -36,7 +36,7 @@ void host_test_GetType()
 {
     MatrixHostFloat mtrxHostFloat;
     MatrixHostInt mtrxHostInt;
-    
+
     EXPECT_EQ( mtrxHostFloat.getType(), TNL::String( "Matrices::Dense< float, Devices::Host, int >" ) );
     EXPECT_EQ( mtrxHostInt.getType(), TNL::String( "Matrices::Dense< int, Devices::Host, int >" ) );
 }
@@ -57,13 +57,13 @@ void test_SetDimensions()
     using RealType = typename Matrix::RealType;
     using DeviceType = typename Matrix::DeviceType;
     using IndexType = typename Matrix::IndexType;
-    
+
     const IndexType rows = 9;
     const IndexType cols = 8;
-    
+
     Matrix m;
     m.setDimensions( rows, cols );
-    
+
     EXPECT_EQ( m.getRows(), 9 );
     EXPECT_EQ( m.getColumns(), 8 );
 }
@@ -74,20 +74,20 @@ void test_SetLike()
     using RealType = typename Matrix1::RealType;
     using DeviceType = typename Matrix1::DeviceType;
     using IndexType = typename Matrix1::IndexType;
-    
+
     const IndexType rows = 8;
     const IndexType cols = 7;
-    
+
     Matrix1 m1;
     m1.reset();
     m1.setDimensions( rows + 1, cols + 2 );
-    
+
     Matrix2 m2;
     m2.reset();
     m2.setDimensions( rows, cols );
-    
+
     m1.setLike( m2 );
-    
+
     EXPECT_EQ( m1.getRows(), m2.getRows() );
     EXPECT_EQ( m1.getColumns(), m2.getColumns() );
 }
@@ -150,14 +150,14 @@ void test_GetRowLength()
     using RealType = typename Matrix::RealType;
     using DeviceType = typename Matrix::DeviceType;
     using IndexType = typename Matrix::IndexType;
-    
+
     const IndexType rows = 8;
     const IndexType cols = 7;
-    
+
     Matrix m;
     m.reset();
     m.setDimensions( rows, cols );
-    
+
     EXPECT_EQ( m.getRowLength( 0 ), 7 );
     EXPECT_EQ( m.getRowLength( 1 ), 7 );
     EXPECT_EQ( m.getRowLength( 2 ), 7 );
@@ -174,14 +174,14 @@ void test_GetNumberOfMatrixElements()
     using RealType = typename Matrix::RealType;
     using DeviceType = typename Matrix::DeviceType;
     using IndexType = typename Matrix::IndexType;
-    
+
     const IndexType rows = 7;
     const IndexType cols = 6;
-    
+
     Matrix m;
     m.reset();
     m.setDimensions( rows, cols );
-    
+
     EXPECT_EQ( m.getNumberOfMatrixElements(), 42 );
 }
 
@@ -191,7 +191,7 @@ void test_GetNumberOfNonzeroMatrixElements()
     using RealType = typename Matrix::RealType;
     using DeviceType = typename Matrix::DeviceType;
     using IndexType = typename Matrix::IndexType;
-    
+
 /*
  * Sets up the following 7x6 dense matrix:
  *
@@ -205,19 +205,19 @@ void test_GetNumberOfNonzeroMatrixElements()
  */
     const IndexType rows = 7;
     const IndexType cols = 6;
-    
+
     Matrix m;
     m.reset();
     m.setDimensions( rows, cols );
-    
+
     RealType value = 1;
     for( IndexType i = 0; i < rows; i++ )
         for( IndexType j = 0; j < cols; j++ )
             m.setElement( i, j, value++ );
-    
+
     m.setElement( 0, 0, 0); // Set the first element of the diagonal to 0.
     m.setElement( 6, 5, 0); // Set the last element of the diagonal to 0.
-    
+
     EXPECT_EQ( m.getNumberOfNonzeroMatrixElements(), 40 );
 }
 
@@ -227,7 +227,7 @@ void test_Reset()
     using RealType = typename Matrix::RealType;
     using DeviceType = typename Matrix::DeviceType;
     using IndexType = typename Matrix::IndexType;
-    
+
 /*
  * Sets up the following 5x4 dense matrix:
  *
@@ -239,12 +239,12 @@ void test_Reset()
  */
     const IndexType rows = 5;
     const IndexType cols = 4;
-    
+
     Matrix m;
     m.setDimensions( rows, cols );
-    
+
     m.reset();
-    
+
     EXPECT_EQ( m.getRows(), 0 );
     EXPECT_EQ( m.getColumns(), 0 );
 }
@@ -254,7 +254,7 @@ void test_SetValue()
 {
     using RealType = typename Matrix::RealType;
     using DeviceType = typename Matrix::DeviceType;
-    using IndexType = typename Matrix::IndexType;  
+    using IndexType = typename Matrix::IndexType;
 /*
  * Sets up the following 7x6 dense matrix:
  *
@@ -268,110 +268,110 @@ void test_SetValue()
  */
     const IndexType rows = 7;
     const IndexType cols = 6;
-    
+
     Matrix m;
     m.reset();
     m.setDimensions( rows, cols );
-    
+
     RealType value = 1;
     for( IndexType i = 0; i < rows; i++ )
         for( IndexType j = 0; j < cols; j++ )
             m.setElement( i, j, value++ );
-    
+
     EXPECT_EQ( m.getElement( 0, 0 ),  1 );
     EXPECT_EQ( m.getElement( 0, 1 ),  2 );
     EXPECT_EQ( m.getElement( 0, 2 ),  3 );
     EXPECT_EQ( m.getElement( 0, 3 ),  4 );
     EXPECT_EQ( m.getElement( 0, 4 ),  5 );
     EXPECT_EQ( m.getElement( 0, 5 ),  6 );
-    
+
     EXPECT_EQ( m.getElement( 1, 0 ),  7 );
     EXPECT_EQ( m.getElement( 1, 1 ),  8 );
     EXPECT_EQ( m.getElement( 1, 2 ),  9 );
     EXPECT_EQ( m.getElement( 1, 3 ), 10 );
     EXPECT_EQ( m.getElement( 1, 4 ), 11 );
     EXPECT_EQ( m.getElement( 1, 5 ), 12 );
-    
+
     EXPECT_EQ( m.getElement( 2, 0 ), 13 );
     EXPECT_EQ( m.getElement( 2, 1 ), 14 );
     EXPECT_EQ( m.getElement( 2, 2 ), 15 );
     EXPECT_EQ( m.getElement( 2, 3 ), 16 );
     EXPECT_EQ( m.getElement( 2, 4 ), 17 );
     EXPECT_EQ( m.getElement( 2, 5 ), 18 );
-    
+
     EXPECT_EQ( m.getElement( 3, 0 ), 19 );
     EXPECT_EQ( m.getElement( 3, 1 ), 20 );
     EXPECT_EQ( m.getElement( 3, 2 ), 21 );
     EXPECT_EQ( m.getElement( 3, 3 ), 22 );
     EXPECT_EQ( m.getElement( 3, 4 ), 23 );
     EXPECT_EQ( m.getElement( 3, 5 ), 24 );
-    
+
     EXPECT_EQ( m.getElement( 4, 0 ), 25 );
     EXPECT_EQ( m.getElement( 4, 1 ), 26 );
     EXPECT_EQ( m.getElement( 4, 2 ), 27 );
     EXPECT_EQ( m.getElement( 4, 3 ), 28 );
     EXPECT_EQ( m.getElement( 4, 4 ), 29 );
     EXPECT_EQ( m.getElement( 4, 5 ), 30 );
-    
+
     EXPECT_EQ( m.getElement( 5, 0 ), 31 );
     EXPECT_EQ( m.getElement( 5, 1 ), 32 );
     EXPECT_EQ( m.getElement( 5, 2 ), 33 );
     EXPECT_EQ( m.getElement( 5, 3 ), 34 );
     EXPECT_EQ( m.getElement( 5, 4 ), 35 );
     EXPECT_EQ( m.getElement( 5, 5 ), 36 );
-    
+
     EXPECT_EQ( m.getElement( 6, 0 ), 37 );
     EXPECT_EQ( m.getElement( 6, 1 ), 38 );
     EXPECT_EQ( m.getElement( 6, 2 ), 39 );
     EXPECT_EQ( m.getElement( 6, 3 ), 40 );
     EXPECT_EQ( m.getElement( 6, 4 ), 41 );
     EXPECT_EQ( m.getElement( 6, 5 ), 42 );
-    
+
     // Set the values of all elements to a certain number
     m.setValue( 42 );
-    
+
     EXPECT_EQ( m.getElement( 0, 0 ), 42 );
     EXPECT_EQ( m.getElement( 0, 1 ), 42 );
     EXPECT_EQ( m.getElement( 0, 2 ), 42 );
     EXPECT_EQ( m.getElement( 0, 3 ), 42 );
     EXPECT_EQ( m.getElement( 0, 4 ), 42 );
     EXPECT_EQ( m.getElement( 0, 5 ), 42 );
-    
+
     EXPECT_EQ( m.getElement( 1, 0 ), 42 );
     EXPECT_EQ( m.getElement( 1, 1 ), 42 );
     EXPECT_EQ( m.getElement( 1, 2 ), 42 );
     EXPECT_EQ( m.getElement( 1, 3 ), 42 );
     EXPECT_EQ( m.getElement( 1, 4 ), 42 );
     EXPECT_EQ( m.getElement( 1, 5 ), 42 );
-    
+
     EXPECT_EQ( m.getElement( 2, 0 ), 42 );
     EXPECT_EQ( m.getElement( 2, 1 ), 42 );
     EXPECT_EQ( m.getElement( 2, 2 ), 42 );
     EXPECT_EQ( m.getElement( 2, 3 ), 42 );
     EXPECT_EQ( m.getElement( 2, 4 ), 42 );
     EXPECT_EQ( m.getElement( 2, 5 ), 42 );
-    
+
     EXPECT_EQ( m.getElement( 3, 0 ), 42 );
     EXPECT_EQ( m.getElement( 3, 1 ), 42 );
     EXPECT_EQ( m.getElement( 3, 2 ), 42 );
     EXPECT_EQ( m.getElement( 3, 3 ), 42 );
     EXPECT_EQ( m.getElement( 3, 4 ), 42 );
     EXPECT_EQ( m.getElement( 3, 5 ), 42 );
-    
+
     EXPECT_EQ( m.getElement( 4, 0 ), 42 );
     EXPECT_EQ( m.getElement( 4, 1 ), 42 );
     EXPECT_EQ( m.getElement( 4, 2 ), 42 );
     EXPECT_EQ( m.getElement( 4, 3 ), 42 );
     EXPECT_EQ( m.getElement( 4, 4 ), 42 );
     EXPECT_EQ( m.getElement( 4, 5 ), 42 );
-    
+
     EXPECT_EQ( m.getElement( 5, 0 ), 42 );
     EXPECT_EQ( m.getElement( 5, 1 ), 42 );
     EXPECT_EQ( m.getElement( 5, 2 ), 42 );
     EXPECT_EQ( m.getElement( 5, 3 ), 42 );
     EXPECT_EQ( m.getElement( 5, 4 ), 42 );
     EXPECT_EQ( m.getElement( 5, 5 ), 42 );
-    
+
     EXPECT_EQ( m.getElement( 6, 0 ), 42 );
     EXPECT_EQ( m.getElement( 6, 1 ), 42 );
     EXPECT_EQ( m.getElement( 6, 2 ), 42 );
@@ -397,40 +397,40 @@ void test_SetElement()
  */
     const IndexType rows = 5;
     const IndexType cols = 5;
-    
+
     Matrix m;
     m.reset();
-    m.setDimensions( rows, cols );    
-    
+    m.setDimensions( rows, cols );
+
     RealType value = 1;
     for( IndexType i = 0; i < rows; i++ )
         for( IndexType j = 0; j < cols; j++ )
             m.setElement( i, j, value++ );
-    
+
     EXPECT_EQ( m.getElement( 0, 0 ),  1 );
     EXPECT_EQ( m.getElement( 0, 1 ),  2 );
     EXPECT_EQ( m.getElement( 0, 2 ),  3 );
     EXPECT_EQ( m.getElement( 0, 3 ),  4 );
     EXPECT_EQ( m.getElement( 0, 4 ),  5 );
-    
+
     EXPECT_EQ( m.getElement( 1, 0 ),  6 );
     EXPECT_EQ( m.getElement( 1, 1 ),  7 );
     EXPECT_EQ( m.getElement( 1, 2 ),  8 );
     EXPECT_EQ( m.getElement( 1, 3 ),  9 );
     EXPECT_EQ( m.getElement( 1, 4 ), 10 );
-    
+
     EXPECT_EQ( m.getElement( 2, 0 ), 11 );
     EXPECT_EQ( m.getElement( 2, 1 ), 12 );
     EXPECT_EQ( m.getElement( 2, 2 ), 13 );
     EXPECT_EQ( m.getElement( 2, 3 ), 14 );
     EXPECT_EQ( m.getElement( 2, 4 ), 15 );
-    
+
     EXPECT_EQ( m.getElement( 3, 0 ), 16 );
     EXPECT_EQ( m.getElement( 3, 1 ), 17 );
     EXPECT_EQ( m.getElement( 3, 2 ), 18 );
     EXPECT_EQ( m.getElement( 3, 3 ), 19 );
     EXPECT_EQ( m.getElement( 3, 4 ), 20 );
-    
+
     EXPECT_EQ( m.getElement( 4, 0 ), 21 );
     EXPECT_EQ( m.getElement( 4, 1 ), 22 );
     EXPECT_EQ( m.getElement( 4, 2 ), 23 );
@@ -456,53 +456,53 @@ void test_AddElement()
  */
     const IndexType rows = 6;
     const IndexType cols = 5;
-    
+
     Matrix m;
     m.reset();
     m.setDimensions( rows, cols );
-    
+
     RealType value = 1;
     for( IndexType i = 0; i < rows; i++ )
         for( IndexType j = 0; j < cols; j++ )
             m.setElement( i, j, value++ );
-    
+
     // Check the added elements
     EXPECT_EQ( m.getElement( 0, 0 ),  1 );
     EXPECT_EQ( m.getElement( 0, 1 ),  2 );
     EXPECT_EQ( m.getElement( 0, 2 ),  3 );
     EXPECT_EQ( m.getElement( 0, 3 ),  4 );
     EXPECT_EQ( m.getElement( 0, 4 ),  5 );
-    
+
     EXPECT_EQ( m.getElement( 1, 0 ),  6 );
     EXPECT_EQ( m.getElement( 1, 1 ),  7 );
     EXPECT_EQ( m.getElement( 1, 2 ),  8 );
     EXPECT_EQ( m.getElement( 1, 3 ),  9 );
     EXPECT_EQ( m.getElement( 1, 4 ), 10 );
-    
+
     EXPECT_EQ( m.getElement( 2, 0 ), 11 );
     EXPECT_EQ( m.getElement( 2, 1 ), 12 );
     EXPECT_EQ( m.getElement( 2, 2 ), 13 );
     EXPECT_EQ( m.getElement( 2, 3 ), 14 );
     EXPECT_EQ( m.getElement( 2, 4 ), 15 );
-    
+
     EXPECT_EQ( m.getElement( 3, 0 ), 16 );
     EXPECT_EQ( m.getElement( 3, 1 ), 17 );
     EXPECT_EQ( m.getElement( 3, 2 ), 18 );
     EXPECT_EQ( m.getElement( 3, 3 ), 19 );
     EXPECT_EQ( m.getElement( 3, 4 ), 20 );
-    
+
     EXPECT_EQ( m.getElement( 4, 0 ), 21 );
     EXPECT_EQ( m.getElement( 4, 1 ), 22 );
     EXPECT_EQ( m.getElement( 4, 2 ), 23 );
     EXPECT_EQ( m.getElement( 4, 3 ), 24 );
     EXPECT_EQ( m.getElement( 4, 4 ), 25 );
-    
+
     EXPECT_EQ( m.getElement( 5, 0 ), 26 );
     EXPECT_EQ( m.getElement( 5, 1 ), 27 );
     EXPECT_EQ( m.getElement( 5, 2 ), 28 );
     EXPECT_EQ( m.getElement( 5, 3 ), 29 );
     EXPECT_EQ( m.getElement( 5, 4 ), 30 );
-    
+
     // Add new elements to the old elements with a multiplying factor applied to the old elements.
 /*
  * The following setup results in the following 6x5 dense matrix:
@@ -518,38 +518,38 @@ void test_AddElement()
     RealType multiplicator = 2;
     for( IndexType i = 0; i < rows; i++ )
         for( IndexType j = 0; j < cols; j++ )
-            m.addElement( i, j, newValue++, multiplicator );    
-    
+            m.addElement( i, j, newValue++, multiplicator );
+
     EXPECT_EQ( m.getElement( 0, 0 ),  3 );
     EXPECT_EQ( m.getElement( 0, 1 ),  6 );
     EXPECT_EQ( m.getElement( 0, 2 ),  9 );
     EXPECT_EQ( m.getElement( 0, 3 ), 12 );
     EXPECT_EQ( m.getElement( 0, 4 ), 15 );
-    
+
     EXPECT_EQ( m.getElement( 1, 0 ), 18 );
     EXPECT_EQ( m.getElement( 1, 1 ), 21 );
     EXPECT_EQ( m.getElement( 1, 2 ), 24 );
     EXPECT_EQ( m.getElement( 1, 3 ), 27 );
     EXPECT_EQ( m.getElement( 1, 4 ), 30 );
-    
+
     EXPECT_EQ( m.getElement( 2, 0 ), 33 );
     EXPECT_EQ( m.getElement( 2, 1 ), 36 );
     EXPECT_EQ( m.getElement( 2, 2 ), 39 );
     EXPECT_EQ( m.getElement( 2, 3 ), 42 );
     EXPECT_EQ( m.getElement( 2, 4 ), 45 );
-    
+
     EXPECT_EQ( m.getElement( 3, 0 ), 48 );
     EXPECT_EQ( m.getElement( 3, 1 ), 51 );
     EXPECT_EQ( m.getElement( 3, 2 ), 54 );
     EXPECT_EQ( m.getElement( 3, 3 ), 57 );
     EXPECT_EQ( m.getElement( 3, 4 ), 60 );
-    
+
     EXPECT_EQ( m.getElement( 4, 0 ), 63 );
     EXPECT_EQ( m.getElement( 4, 1 ), 66 );
     EXPECT_EQ( m.getElement( 4, 2 ), 69 );
     EXPECT_EQ( m.getElement( 4, 3 ), 72 );
     EXPECT_EQ( m.getElement( 4, 4 ), 75 );
-    
+
     EXPECT_EQ( m.getElement( 5, 0 ), 78 );
     EXPECT_EQ( m.getElement( 5, 1 ), 81 );
     EXPECT_EQ( m.getElement( 5, 2 ), 84 );
@@ -718,37 +718,37 @@ void test_AddRow()
    };
    TNL::Algorithms::ParallelFor< DeviceType >::exec( 0, 6, f );
 
-    
+
     EXPECT_EQ( m.getElement( 0, 0 ),  11 );
     EXPECT_EQ( m.getElement( 0, 1 ),  11 );
     EXPECT_EQ( m.getElement( 0, 2 ),  11 );
     EXPECT_EQ( m.getElement( 0, 3 ),  11 );
     EXPECT_EQ( m.getElement( 0, 4 ),   0 );
-    
+
     EXPECT_EQ( m.getElement( 1, 0 ),  28 );
     EXPECT_EQ( m.getElement( 1, 1 ),  29 );
     EXPECT_EQ( m.getElement( 1, 2 ),  30 );
     EXPECT_EQ( m.getElement( 1, 3 ),  31 );
     EXPECT_EQ( m.getElement( 1, 4 ),  10 );
-    
+
     EXPECT_EQ( m.getElement( 2, 0 ),  55 );
     EXPECT_EQ( m.getElement( 2, 1 ),  57 );
     EXPECT_EQ( m.getElement( 2, 2 ),  59 );
     EXPECT_EQ( m.getElement( 2, 3 ),  61 );
     EXPECT_EQ( m.getElement( 2, 4 ),  30 );
-    
+
     EXPECT_EQ( m.getElement( 3, 0 ),  92 );
     EXPECT_EQ( m.getElement( 3, 1 ),  95 );
     EXPECT_EQ( m.getElement( 3, 2 ),  98 );
     EXPECT_EQ( m.getElement( 3, 3 ), 101 );
     EXPECT_EQ( m.getElement( 3, 4 ),  60 );
-    
+
     EXPECT_EQ( m.getElement( 4, 0 ), 139 );
     EXPECT_EQ( m.getElement( 4, 1 ), 143 );
     EXPECT_EQ( m.getElement( 4, 2 ), 147 );
     EXPECT_EQ( m.getElement( 4, 3 ), 151 );
     EXPECT_EQ( m.getElement( 4, 4 ), 100 );
-    
+
     EXPECT_EQ( m.getElement( 5, 0 ), 196 );
     EXPECT_EQ( m.getElement( 5, 1 ), 201 );
     EXPECT_EQ( m.getElement( 5, 2 ), 206 );
@@ -773,31 +773,31 @@ void test_VectorProduct()
  */
     const IndexType rows = 5;
     const IndexType cols = 4;
-    
+
     Matrix m;
     m.reset();
     m.setDimensions( rows, cols );
-    
+
     RealType value = 1;
     for( IndexType i = 0; i < rows; i++ )
         for( IndexType j = 0; j < cols; j++)
             m.setElement( i, j, value++ );
 
     using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >;
-    
+
     VectorType inVector;
     inVector.setSize( 4 );
-    for( IndexType i = 0; i < inVector.getSize(); i++ )        
+    for( IndexType i = 0; i < inVector.getSize(); i++ )
         inVector.setElement( i, 2 );
 
-    VectorType outVector;  
+    VectorType outVector;
     outVector.setSize( 5 );
     for( IndexType j = 0; j < outVector.getSize(); j++ )
         outVector.setElement( j, 0 );
- 
-    
+
+
     m.vectorProduct( inVector, outVector);
-   
+
     EXPECT_EQ( outVector.getElement( 0 ),  20 );
     EXPECT_EQ( outVector.getElement( 1 ),  52 );
     EXPECT_EQ( outVector.getElement( 2 ),  84 );
@@ -822,16 +822,16 @@ void test_AddMatrix()
  */
     const IndexType rows = 5;
     const IndexType cols = 4;
-    
+
     Matrix m;
     m.reset();
     m.setDimensions( rows, cols );
-    
+
     RealType value = 1;
     for( IndexType i = 0; i < rows; i++ )
         for( IndexType j = 0; j < cols; j++)
             m.setElement( i, j, value++ );
-    
+
 /*
  * Sets up the following 5x4 dense matrix:
  *
@@ -841,16 +841,16 @@ void test_AddMatrix()
  *    | 13 14 15 16 |
  *    \ 17 18 19 20 /
  */
-    
+
     Matrix m2;
     m2.reset();
     m2.setDimensions( rows, cols );
-    
+
     RealType newValue = 1;
     for( IndexType i = 0; i < rows; i++ )
         for( IndexType j = 0; j < cols; j++)
             m2.setElement( i, j, newValue++ );
-    
+
     /*
  * Sets up the following 5x4 dense matrix:
  *
@@ -860,63 +860,63 @@ void test_AddMatrix()
  *    | 13 14 15 16 |
  *    \ 17 18 19 20 /
  */
-    
+
     Matrix mResult;
     mResult.reset();
     mResult.setDimensions( rows, cols );
-    
+
     mResult = m;
-    
+
     RealType matrixMultiplicator = 2;
     RealType thisMatrixMultiplicator = 1;
-    
+
     mResult.addMatrix( m2, matrixMultiplicator, thisMatrixMultiplicator );
-    
+
     EXPECT_EQ( mResult.getElement( 0, 0 ), matrixMultiplicator * m2.getElement( 0, 0 ) + thisMatrixMultiplicator * m.getElement( 0, 0 ) );
     EXPECT_EQ( mResult.getElement( 0, 1 ), matrixMultiplicator * m2.getElement( 0, 1 ) + thisMatrixMultiplicator * m.getElement( 0, 1 ) );
     EXPECT_EQ( mResult.getElement( 0, 2 ), matrixMultiplicator * m2.getElement( 0, 2 ) + thisMatrixMultiplicator * m.getElement( 0, 2 ) );
     EXPECT_EQ( mResult.getElement( 0, 3 ), matrixMultiplicator * m2.getElement( 0, 3 ) + thisMatrixMultiplicator * m.getElement( 0, 3 ) );
-    
+
     EXPECT_EQ( mResult.getElement( 1, 0 ), matrixMultiplicator * m2.getElement( 1, 0 ) + thisMatrixMultiplicator * m.getElement( 1, 0 ) );
     EXPECT_EQ( mResult.getElement( 1, 1 ), matrixMultiplicator * m2.getElement( 1, 1 ) + thisMatrixMultiplicator * m.getElement( 1, 1 ) );
     EXPECT_EQ( mResult.getElement( 1, 2 ), matrixMultiplicator * m2.getElement( 1, 2 ) + thisMatrixMultiplicator * m.getElement( 1, 2 ) );
     EXPECT_EQ( mResult.getElement( 1, 3 ), matrixMultiplicator * m2.getElement( 1, 3 ) + thisMatrixMultiplicator * m.getElement( 1, 3 ) );
-    
+
     EXPECT_EQ( mResult.getElement( 2, 0 ), matrixMultiplicator * m2.getElement( 2, 0 ) + thisMatrixMultiplicator * m.getElement( 2, 0 ) );
     EXPECT_EQ( mResult.getElement( 2, 1 ), matrixMultiplicator * m2.getElement( 2, 1 ) + thisMatrixMultiplicator * m.getElement( 2, 1 ) );
     EXPECT_EQ( mResult.getElement( 2, 2 ), matrixMultiplicator * m2.getElement( 2, 2 ) + thisMatrixMultiplicator * m.getElement( 2, 2 ) );
     EXPECT_EQ( mResult.getElement( 2, 3 ), matrixMultiplicator * m2.getElement( 2, 3 ) + thisMatrixMultiplicator * m.getElement( 2, 3 ) );
-    
+
     EXPECT_EQ( mResult.getElement( 3, 0 ), matrixMultiplicator * m2.getElement( 3, 0 ) + thisMatrixMultiplicator * m.getElement( 3, 0 ) );
     EXPECT_EQ( mResult.getElement( 3, 1 ), matrixMultiplicator * m2.getElement( 3, 1 ) + thisMatrixMultiplicator * m.getElement( 3, 1 ) );
     EXPECT_EQ( mResult.getElement( 3, 2 ), matrixMultiplicator * m2.getElement( 3, 2 ) + thisMatrixMultiplicator * m.getElement( 3, 2 ) );
     EXPECT_EQ( mResult.getElement( 3, 3 ), matrixMultiplicator * m2.getElement( 3, 3 ) + thisMatrixMultiplicator * m.getElement( 3, 3 ) );
-    
+
     EXPECT_EQ( mResult.getElement( 4, 0 ), matrixMultiplicator * m2.getElement( 4, 0 ) + thisMatrixMultiplicator * m.getElement( 4, 0 ) );
     EXPECT_EQ( mResult.getElement( 4, 1 ), matrixMultiplicator * m2.getElement( 4, 1 ) + thisMatrixMultiplicator * m.getElement( 4, 1 ) );
     EXPECT_EQ( mResult.getElement( 4, 2 ), matrixMultiplicator * m2.getElement( 4, 2 ) + thisMatrixMultiplicator * m.getElement( 4, 2 ) );
     EXPECT_EQ( mResult.getElement( 4, 3 ), matrixMultiplicator * m2.getElement( 4, 3 ) + thisMatrixMultiplicator * m.getElement( 4, 3 ) );
-    
+
     EXPECT_EQ( mResult.getElement( 0, 0 ),  3 );
     EXPECT_EQ( mResult.getElement( 0, 1 ),  6 );
     EXPECT_EQ( mResult.getElement( 0, 2 ),  9 );
     EXPECT_EQ( mResult.getElement( 0, 3 ), 12 );
-    
+
     EXPECT_EQ( mResult.getElement( 1, 0 ), 15 );
     EXPECT_EQ( mResult.getElement( 1, 1 ), 18 );
     EXPECT_EQ( mResult.getElement( 1, 2 ), 21 );
     EXPECT_EQ( mResult.getElement( 1, 3 ), 24 );
-    
+
     EXPECT_EQ( mResult.getElement( 2, 0 ), 27 );
     EXPECT_EQ( mResult.getElement( 2, 1 ), 30 );
     EXPECT_EQ( mResult.getElement( 2, 2 ), 33 );
     EXPECT_EQ( mResult.getElement( 2, 3 ), 36 );
-    
+
     EXPECT_EQ( mResult.getElement( 3, 0 ), 39 );
     EXPECT_EQ( mResult.getElement( 3, 1 ), 42 );
     EXPECT_EQ( mResult.getElement( 3, 2 ), 45 );
     EXPECT_EQ( mResult.getElement( 3, 3 ), 48 );
-    
+
     EXPECT_EQ( mResult.getElement( 4, 0 ), 51 );
     EXPECT_EQ( mResult.getElement( 4, 1 ), 54 );
     EXPECT_EQ( mResult.getElement( 4, 2 ), 57 );
@@ -940,16 +940,16 @@ void test_GetMatrixProduct()
  */
     const IndexType leftRows = 5;
     const IndexType leftCols = 4;
-    
+
     Matrix leftMatrix;
     leftMatrix.reset();
     leftMatrix.setDimensions( leftRows, leftCols );
-    
+
     RealType value = 1;
     for( IndexType i = 0; i < leftRows; i++ )
         for( IndexType j = 0; j < leftCols; j++)
             leftMatrix.setElement( i, j, value++ );
-    
+
 /*
  * Sets up the following 4x5 dense matrix:
  *
@@ -960,16 +960,16 @@ void test_GetMatrixProduct()
  */
     const IndexType rightRows = 4;
     const IndexType rightCols = 5;
-    
+
     Matrix rightMatrix;
     rightMatrix.reset();
     rightMatrix.setDimensions( rightRows, rightCols );
-    
+
     RealType newValue = 1;
     for( IndexType i = 0; i < rightRows; i++ )
         for( IndexType j = 0; j < rightCols; j++)
             rightMatrix.setElement( i, j, newValue++ );
-    
+
 /*
  * Sets up the following 5x5 resulting dense matrix:
  *
@@ -979,48 +979,48 @@ void test_GetMatrixProduct()
  *    |  0  0  0  0 |
  *    \  0  0  0  0 /
  */
-    
+
     Matrix mResult;
     mResult.reset();
     mResult.setDimensions( leftRows, rightCols );
     mResult.setValue( 0 );
-    
+
     RealType leftMatrixMultiplicator = 1;
     RealType rightMatrixMultiplicator = 2;
-/*   
+/*
  *      /  1  2  3  4 \                            /  220  240  260  280  300 \
  *      |  5  6  7  8 |       /  1  2  3  4  5 \   |  492  544  596  648  700 |
  *  1 * |  9 10 11 12 | * 2 * |  6  7  8  9 10 | = |  764  848  932 1016 1100 |
  *      | 13 14 15 16 |       | 11 12 13 14 15 |   | 1036 1152 1268 1384 1500 |
  *      \ 17 18 19 20 /       \ 16 17 18 19 20 /   \ 1308 1456 1604 1752 1900 /
  */
-    
+
     mResult.getMatrixProduct( leftMatrix, rightMatrix, leftMatrixMultiplicator, rightMatrixMultiplicator );
-    
+
     EXPECT_EQ( mResult.getElement( 0, 0 ),  220 );
     EXPECT_EQ( mResult.getElement( 0, 1 ),  240 );
     EXPECT_EQ( mResult.getElement( 0, 2 ),  260 );
     EXPECT_EQ( mResult.getElement( 0, 3 ),  280 );
     EXPECT_EQ( mResult.getElement( 0, 4 ),  300 );
-    
+
     EXPECT_EQ( mResult.getElement( 1, 0 ),  492 );
     EXPECT_EQ( mResult.getElement( 1, 1 ),  544 );
     EXPECT_EQ( mResult.getElement( 1, 2 ),  596 );
     EXPECT_EQ( mResult.getElement( 1, 3 ),  648 );
     EXPECT_EQ( mResult.getElement( 1, 4 ),  700 );
-    
+
     EXPECT_EQ( mResult.getElement( 2, 0 ),  764 );
     EXPECT_EQ( mResult.getElement( 2, 1 ),  848 );
     EXPECT_EQ( mResult.getElement( 2, 2 ),  932 );
     EXPECT_EQ( mResult.getElement( 2, 3 ), 1016 );
     EXPECT_EQ( mResult.getElement( 2, 4 ), 1100 );
-    
+
     EXPECT_EQ( mResult.getElement( 3, 0 ), 1036 );
     EXPECT_EQ( mResult.getElement( 3, 1 ), 1152 );
     EXPECT_EQ( mResult.getElement( 3, 2 ), 1268 );
     EXPECT_EQ( mResult.getElement( 3, 3 ), 1384 );
     EXPECT_EQ( mResult.getElement( 3, 4 ), 1500 );
-    
+
     EXPECT_EQ( mResult.getElement( 4, 0 ), 1308 );
     EXPECT_EQ( mResult.getElement( 4, 1 ), 1456 );
     EXPECT_EQ( mResult.getElement( 4, 2 ), 1604 );
@@ -1054,36 +1054,36 @@ void test_GetTransposition()
             m.setElement( i, j, value++ );
 
     m.print( std::cout );
-    
+
 /*
  * Sets up the following 2x3 dense matrix:
  *
  *    /  0  0  0 \
  *    \  0  0  0 /
- */ 
+ */
     Matrix mTransposed;
     mTransposed.reset();
     mTransposed.setDimensions( cols, rows );
-    
+
     mTransposed.print( std::cout );
-    
+
     RealType matrixMultiplicator = 1;
-    
+
     mTransposed.getTransposition( m, matrixMultiplicator );
-    
+
     mTransposed.print( std::cout );
-    
+
 /*
  * Should result in the following 2x3 dense matrix:
  *
  *    /  1  3  5 \
  *    \  2  4  6 /
- */ 
-    
+ */
+
     EXPECT_EQ( mTransposed.getElement( 0, 0 ), 1 );
     EXPECT_EQ( mTransposed.getElement( 0, 1 ), 3 );
     EXPECT_EQ( mTransposed.getElement( 0, 2 ), 5 );
-    
+
     EXPECT_EQ( mTransposed.getElement( 1, 0 ), 2 );
     EXPECT_EQ( mTransposed.getElement( 1, 1 ), 4 );
     EXPECT_EQ( mTransposed.getElement( 1, 2 ), 6 );
@@ -1106,60 +1106,60 @@ void test_PerformSORIteration()
  */
     const IndexType rows = 4;
     const IndexType cols = 4;
-    
+
     Matrix m;
     m.reset();
     m.setDimensions( rows, cols );
-    
+
     m.setElement( 0, 0, 4.0 );        // 0th row
     m.setElement( 0, 1, 1.0 );
     m.setElement( 0, 2, 1.0 );
     m.setElement( 0, 3, 1.0 );
-        
+
     m.setElement( 1, 0, 1.0 );        // 1st row
     m.setElement( 1, 1, 4.0 );
     m.setElement( 1, 2, 1.0 );
     m.setElement( 1, 3, 1.0 );
-        
+
     m.setElement( 2, 0, 1.0 );
     m.setElement( 2, 1, 1.0 );        // 2nd row
     m.setElement( 2, 2, 4.0 );
     m.setElement( 2, 3, 1.0 );
-        
+
     m.setElement( 3, 0, 1.0 );        // 3rd row
     m.setElement( 3, 1, 1.0 );
     m.setElement( 3, 2, 1.0 );
     m.setElement( 3, 3, 4.0 );
-    
+
     RealType bVector [ 4 ] = { 1.0, 1.0, 1.0, 1.0 };
     RealType xVector [ 4 ] = { 1.0, 1.0, 1.0, 1.0 };
-    
+
     IndexType row = 0;
     RealType omega = 1;
-    
+
     m.performSORIteration( bVector, row++, xVector, omega);
-    
+
     EXPECT_EQ( xVector[ 0 ], -0.5 );
     EXPECT_EQ( xVector[ 1 ],  1.0 );
     EXPECT_EQ( xVector[ 2 ],  1.0 );
     EXPECT_EQ( xVector[ 3 ],  1.0 );
-    
+
     m.performSORIteration( bVector, row++, xVector, omega);
-    
+
     EXPECT_EQ( xVector[ 0 ], -0.5 );
     EXPECT_EQ( xVector[ 1 ], -0.125 );
     EXPECT_EQ( xVector[ 2 ],  1.0 );
     EXPECT_EQ( xVector[ 3 ],  1.0 );
-    
+
     m.performSORIteration( bVector, row++, xVector, omega);
-    
+
     EXPECT_EQ( xVector[ 0 ], -0.5 );
     EXPECT_EQ( xVector[ 1 ], -0.125 );
     EXPECT_EQ( xVector[ 2 ],  0.15625 );
     EXPECT_EQ( xVector[ 3 ],  1.0 );
-    
+
     m.performSORIteration( bVector, row++, xVector, omega);
-    
+
     EXPECT_EQ( xVector[ 0 ], -0.5 );
     EXPECT_EQ( xVector[ 1 ], -0.125 );
     EXPECT_EQ( xVector[ 2 ], 0.15625 );
@@ -1230,59 +1230,59 @@ void test_SaveAndLoad()
  */
     const IndexType rows = 4;
     const IndexType cols = 4;
-    
+
     Matrix savedMatrix;
     savedMatrix.reset();
     savedMatrix.setDimensions( rows, cols );
-    
+
     RealType value = 1;
     for( IndexType i = 0; i < rows; i++ )
         for( IndexType j = 0; j < cols; j++ )
             savedMatrix.setElement( i, j, value++ );
-        
+
     ASSERT_NO_THROW( savedMatrix.save( TEST_FILE_NAME ) );
-    
+
     Matrix loadedMatrix;
     loadedMatrix.reset();
     loadedMatrix.setDimensions( rows, cols );
-    
+
     ASSERT_NO_THROW( loadedMatrix.load( TEST_FILE_NAME ) );
-    
+
     EXPECT_EQ( savedMatrix.getElement( 0, 0 ), loadedMatrix.getElement( 0, 0 ) );
     EXPECT_EQ( savedMatrix.getElement( 0, 1 ), loadedMatrix.getElement( 0, 1 ) );
     EXPECT_EQ( savedMatrix.getElement( 0, 2 ), loadedMatrix.getElement( 0, 2 ) );
     EXPECT_EQ( savedMatrix.getElement( 0, 3 ), loadedMatrix.getElement( 0, 3 ) );
-    
+
     EXPECT_EQ( savedMatrix.getElement( 1, 0 ), loadedMatrix.getElement( 1, 0 ) );
     EXPECT_EQ( savedMatrix.getElement( 1, 1 ), loadedMatrix.getElement( 1, 1 ) );
     EXPECT_EQ( savedMatrix.getElement( 1, 2 ), loadedMatrix.getElement( 1, 2 ) );
     EXPECT_EQ( savedMatrix.getElement( 1, 3 ), loadedMatrix.getElement( 1, 3 ) );
-    
+
     EXPECT_EQ( savedMatrix.getElement( 2, 0 ), loadedMatrix.getElement( 2, 0 ) );
     EXPECT_EQ( savedMatrix.getElement( 2, 1 ), loadedMatrix.getElement( 2, 1 ) );
     EXPECT_EQ( savedMatrix.getElement( 2, 2 ), loadedMatrix.getElement( 2, 2 ) );
     EXPECT_EQ( savedMatrix.getElement( 2, 3 ), loadedMatrix.getElement( 2, 3 ) );
-    
+
     EXPECT_EQ( savedMatrix.getElement( 3, 0 ), loadedMatrix.getElement( 3, 0 ) );
     EXPECT_EQ( savedMatrix.getElement( 3, 1 ), loadedMatrix.getElement( 3, 1 ) );
     EXPECT_EQ( savedMatrix.getElement( 3, 2 ), loadedMatrix.getElement( 3, 2 ) );
     EXPECT_EQ( savedMatrix.getElement( 3, 3 ), loadedMatrix.getElement( 3, 3 ) );
-    
+
     EXPECT_EQ( savedMatrix.getElement( 0, 0 ),  1 );
     EXPECT_EQ( savedMatrix.getElement( 0, 1 ),  2 );
     EXPECT_EQ( savedMatrix.getElement( 0, 2 ),  3 );
     EXPECT_EQ( savedMatrix.getElement( 0, 3 ),  4 );
-    
+
     EXPECT_EQ( savedMatrix.getElement( 1, 0 ),  5 );
     EXPECT_EQ( savedMatrix.getElement( 1, 1 ),  6 );
     EXPECT_EQ( savedMatrix.getElement( 1, 2 ),  7 );
     EXPECT_EQ( savedMatrix.getElement( 1, 3 ),  8 );
-    
+
     EXPECT_EQ( savedMatrix.getElement( 2, 0 ),  9 );
     EXPECT_EQ( savedMatrix.getElement( 2, 1 ), 10 );
     EXPECT_EQ( savedMatrix.getElement( 2, 2 ), 11 );
     EXPECT_EQ( savedMatrix.getElement( 2, 3 ), 12 );
-    
+
     EXPECT_EQ( savedMatrix.getElement( 3, 0 ), 13 );
     EXPECT_EQ( savedMatrix.getElement( 3, 1 ), 14 );
     EXPECT_EQ( savedMatrix.getElement( 3, 2 ), 15 );
@@ -1306,33 +1306,33 @@ void test_Print()
  */
     const IndexType rows = 5;
     const IndexType cols = 4;
-    
+
     Matrix m;
     m.reset();
     m.setDimensions( rows, cols );
-    
+
     RealType value = 1;
     for( IndexType i = 0; i < rows; i++)
         for( IndexType j = 0; j < cols; j++)
             m.setElement( i, j, value++ );
-    
+
     #include <sstream>
     std::stringstream printed;
     std::stringstream couted;
-    
+
     //change the underlying buffer and save the old buffer
-    auto old_buf = std::cout.rdbuf(printed.rdbuf()); 
+    auto old_buf = std::cout.rdbuf(printed.rdbuf());
 
     m.print( std::cout ); //all the std::cout goes to ss
 
     std::cout.rdbuf(old_buf); //reset
-    
+
     couted << "Row: 0 ->  Col:0->1	 Col:1->2	 Col:2->3	 Col:3->4\t\n"
               "Row: 1 ->  Col:0->5	 Col:1->6	 Col:2->7	 Col:3->8\t\n"
               "Row: 2 ->  Col:0->9	 Col:1->10	 Col:2->11	 Col:3->12\t\n"
               "Row: 3 ->  Col:0->13	 Col:1->14	 Col:2->15	 Col:3->16\t\n"
               "Row: 4 ->  Col:0->17	 Col:1->18	 Col:2->19	 Col:3->20\t\n";
-    
+
     EXPECT_EQ( printed.str(), couted.str() );
 }
 
@@ -1380,91 +1380,91 @@ TYPED_TEST_SUITE( MatrixTest, MatrixTypes );
 TYPED_TEST( MatrixTest, setDimensionsTest )
 {
     using MatrixType = typename TestFixture::MatrixType;
-    
+
     test_SetDimensions< MatrixType >();
 }
 
 TYPED_TEST( MatrixTest, setLikeTest )
 {
     using MatrixType = typename TestFixture::MatrixType;
-    
+
     test_SetLike< MatrixType, MatrixType >();
 }
 
 TYPED_TEST( MatrixTest, getRowLengthTest )
 {
     using MatrixType = typename TestFixture::MatrixType;
-    
+
     test_GetRowLength< MatrixType >();
 }
 
 TYPED_TEST( MatrixTest, getNumberOfMatrixElementsTest )
 {
     using MatrixType = typename TestFixture::MatrixType;
-    
+
     test_GetNumberOfMatrixElements< MatrixType >();
 }
 
 TYPED_TEST( MatrixTest, getNumberOfNonzeroMatrixElementsTest )
 {
     using MatrixType = typename TestFixture::MatrixType;
-    
+
     test_GetNumberOfNonzeroMatrixElements< MatrixType >();
 }
 
 TYPED_TEST( MatrixTest, resetTest )
 {
     using MatrixType = typename TestFixture::MatrixType;
-    
+
     test_Reset< MatrixType >();
 }
 
 TYPED_TEST( MatrixTest, setValueTest )
 {
     using MatrixType = typename TestFixture::MatrixType;
-    
+
     test_SetValue< MatrixType >();
 }
 
 TYPED_TEST( MatrixTest, setElementTest )
 {
     using MatrixType = typename TestFixture::MatrixType;
-    
+
     test_SetElement< MatrixType >();
 }
 
 TYPED_TEST( MatrixTest, addElementTest )
 {
     using MatrixType = typename TestFixture::MatrixType;
-    
+
     test_AddElement< MatrixType >();
 }
 
 TYPED_TEST( MatrixTest, setRowTest )
 {
     using MatrixType = typename TestFixture::MatrixType;
-    
+
     test_SetRow< MatrixType >();
 }
 
 TYPED_TEST( MatrixTest, addRowTest )
 {
     using MatrixType = typename TestFixture::MatrixType;
-    
+
     test_AddRow< MatrixType >();
 }
 
 TYPED_TEST( MatrixTest, vectorProductTest )
 {
     using MatrixType = typename TestFixture::MatrixType;
-    
+
     test_VectorProduct< MatrixType >();
 }
 
 TYPED_TEST( MatrixTest, addMatrixTest )
 {
     using MatrixType = typename TestFixture::MatrixType;
-    
+
     test_AddMatrix< MatrixType >();
 }
 
@@ -1478,14 +1478,14 @@ TYPED_TEST( MatrixTest, assignmentOperatorTest )
 TYPED_TEST( MatrixTest, saveAndLoadTest )
 {
     using MatrixType = typename TestFixture::MatrixType;
-    
+
     test_SaveAndLoad< MatrixType >();
 }
 
 TYPED_TEST( MatrixTest, printTest )
 {
     using MatrixType = typename TestFixture::MatrixType;
-    
+
     test_Print< MatrixType >();
 }
 
@@ -1503,7 +1503,7 @@ TYPED_TEST( MatrixTest, printTest )
 //}
 //#endif
 
-TEST( DenseMatrixTest, Dense_getMatrixProductTest_Host )
+/*TEST( DenseMatrixTest, Dense_getMatrixProductTest_Host )
 {
     bool testRan = false;
     EXPECT_TRUE( testRan );
@@ -1607,6 +1607,7 @@ TEST( DenseMatrixTest, Dense_performSORIterationTest_Cuda )
     std::cout << "\n THIS IS NOT IMPLEMENTED FOR CUDA YET!!\n\n";
 }
 #endif
+ * */
 
 #endif // HAVE_GTEST
 
-- 
GitLab


From 55341d35b8e67f418868e15d780cd2f8f636fdcd Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Wed, 8 Jan 2020 17:06:01 +0100
Subject: [PATCH 073/179] Fixing serialization type of dense matrix.

---
 src/TNL/Matrices/Dense.h                      |  4 ++--
 src/TNL/Matrices/Dense.hpp                    | 20 +++++++++----------
 src/TNL/Matrices/DenseMatrixView.hpp          | 14 ++++++-------
 .../{Tridiagonal_impl.h => Tridiagonal.hpp}   |  0
 4 files changed, 19 insertions(+), 19 deletions(-)
 rename src/TNL/Matrices/{Tridiagonal_impl.h => Tridiagonal.hpp} (100%)

diff --git a/src/TNL/Matrices/Dense.h b/src/TNL/Matrices/Dense.h
index 9c05297d1..778fd0bd4 100644
--- a/src/TNL/Matrices/Dense.h
+++ b/src/TNL/Matrices/Dense.h
@@ -56,7 +56,7 @@ class Dense : public Matrix< Real, Device, Index >
       Dense();
 
       Dense( const IndexType rows, const IndexType columns );
-      
+
       ViewType getView();
 
       ConstViewType getConstView() const;
@@ -179,7 +179,7 @@ class Dense : public Matrix< Real, Device, Index >
 
       template< typename Real_, typename Device_, typename Index_, typename RealAllocator_ >
       bool operator!=( const Dense< Real_, Device_, Index_, RowMajorOrder >& matrix ) const;
-      
+
       void save( const String& fileName ) const;
 
       void load( const String& fileName );
diff --git a/src/TNL/Matrices/Dense.hpp b/src/TNL/Matrices/Dense.hpp
index 7a6c4becc..49e218c77 100644
--- a/src/TNL/Matrices/Dense.hpp
+++ b/src/TNL/Matrices/Dense.hpp
@@ -15,7 +15,7 @@
 #include <TNL/Exceptions/NotImplementedError.h>
 
 namespace TNL {
-namespace Matrices {   
+namespace Matrices {
 
 template< typename Real,
           typename Device,
@@ -46,7 +46,7 @@ auto
 Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
 getView() -> ViewType
 {
-   return ViewType( this->getRows(), 
+   return ViewType( this->getRows(),
                     this->getColumns(),
                     this->getValues().getView(),
                     this->segments.getView() );
@@ -77,9 +77,9 @@ Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
 getSerializationType()
 {
    return String( "Matrices::Dense< " ) +
-          getType< RealType >() + ", " +
-          getType< Device >() + ", " +
-          getType< IndexType >() + " >";
+          TNL::getSerializationType< RealType >() + ", [any_device], " +
+          TNL::getSerializationType< IndexType >() +
+          ( RowMajorOrder ? "true" : "false" ) + ", [any_allocator] >";
 }
 
 template< typename Real,
@@ -99,7 +99,7 @@ template< typename Real,
           typename Index,
           bool RowMajorOrder,
           typename RealAllocator >
-void 
+void
 Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
 setDimensions( const IndexType rows,
                const IndexType columns )
@@ -128,7 +128,7 @@ template< typename Real,
           typename Index,
           bool RowMajorOrder,
           typename RealAllocator >
-void 
+void
 Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
 setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths )
 {
@@ -322,7 +322,7 @@ template< typename Real,
           typename Index,
           bool RowMajorOrder,
           typename RealAllocator >
-Real 
+Real
 Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
 getElement( const IndexType row,
             const IndexType column ) const
@@ -447,7 +447,7 @@ template< typename Real,
           typename RealAllocator >
    template< typename InVector,
              typename OutVector >
-void 
+void
 Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
 vectorProduct( const InVector& inVector, OutVector& outVector ) const
 {
@@ -857,7 +857,7 @@ void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getTranspositio
                     << "This matrix rows: " << this->getRows() << std::endl
                     << "That matrix columns: " << matrix.getColumns() << std::endl
                     << "That matrix rows: " << matrix.getRows() << std::endl );
- 
+
    if( std::is_same< Device, Devices::Host >::value )
    {
       const IndexType& rows = matrix.getRows();
diff --git a/src/TNL/Matrices/DenseMatrixView.hpp b/src/TNL/Matrices/DenseMatrixView.hpp
index 08cfab843..48c0ccdc3 100644
--- a/src/TNL/Matrices/DenseMatrixView.hpp
+++ b/src/TNL/Matrices/DenseMatrixView.hpp
@@ -15,7 +15,7 @@
 #include <TNL/Exceptions/NotImplementedError.h>
 
 namespace TNL {
-namespace Matrices {   
+namespace Matrices {
 
 template< typename Real,
           typename Device,
@@ -50,7 +50,7 @@ auto
 DenseMatrixView< Real, Device, Index, RowMajorOrder >::
 getView() -> ViewType
 {
-   return ViewType( this->getRows(), 
+   return ViewType( this->getRows(),
                     this->getColumns(),
                     this->getValues().getView(),
                     this->columnIndexes.getView(),
@@ -82,9 +82,9 @@ DenseMatrixView< Real, Device, Index, RowMajorOrder >::
 getSerializationType()
 {
    return String( "Matrices::Dense< " ) +
-          getType< RealType >() + ", " +
-          getType< Device >() + ", " +
-          getType< IndexType >() + " >";
+          TNL::getSerializationType< RealType >() + ", [any_device], " +
+          TNL::getSerializationType< IndexType >() +
+          ( RowMajorOrder ? "true" : "false" ) + ", [any_allocator] >";
 }
 
 template< typename Real,
@@ -271,7 +271,7 @@ template< typename Real,
           typename Device,
           typename Index,
           bool RowMajorOrder >
-Real 
+Real
 DenseMatrixView< Real, Device, Index, RowMajorOrder >::
 getElement( const IndexType row,
             const IndexType column ) const
@@ -783,7 +783,7 @@ void DenseMatrixView< Real, Device, Index, RowMajorOrder >::getTransposition( co
                     << "This matrix rows: " << this->getRows() << std::endl
                     << "That matrix columns: " << matrix.getColumns() << std::endl
                     << "That matrix rows: " << matrix.getRows() << std::endl );
- 
+
    if( std::is_same< Device, Devices::Host >::value )
    {
       const IndexType& rows = matrix.getRows();
diff --git a/src/TNL/Matrices/Tridiagonal_impl.h b/src/TNL/Matrices/Tridiagonal.hpp
similarity index 100%
rename from src/TNL/Matrices/Tridiagonal_impl.h
rename to src/TNL/Matrices/Tridiagonal.hpp
-- 
GitLab


From 2cc4680c7564f2f062781eef3d4775368b98b5d1 Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Wed, 8 Jan 2020 17:38:17 +0100
Subject: [PATCH 074/179] Fixed Matrices::Dense::getSerializationType unit
 test.

---
 src/TNL/Matrices/Dense.hpp                    |    2 +-
 src/UnitTests/Matrices/DenseMatrixTest.h      |   31 +-
 .../Matrices/TridiagonalMatrixTest.cpp        |   11 +
 .../Matrices/TridiagonalMatrixTest.cu         |   11 +
 .../Matrices/TridiagonalMatrixTest.h          | 1614 +++++++++++++++++
 5 files changed, 1651 insertions(+), 18 deletions(-)
 create mode 100644 src/UnitTests/Matrices/TridiagonalMatrixTest.cpp
 create mode 100644 src/UnitTests/Matrices/TridiagonalMatrixTest.cu
 create mode 100644 src/UnitTests/Matrices/TridiagonalMatrixTest.h

diff --git a/src/TNL/Matrices/Dense.hpp b/src/TNL/Matrices/Dense.hpp
index 49e218c77..c4deeb6fa 100644
--- a/src/TNL/Matrices/Dense.hpp
+++ b/src/TNL/Matrices/Dense.hpp
@@ -78,7 +78,7 @@ getSerializationType()
 {
    return String( "Matrices::Dense< " ) +
           TNL::getSerializationType< RealType >() + ", [any_device], " +
-          TNL::getSerializationType< IndexType >() +
+          TNL::getSerializationType< IndexType >() + ", " +
           ( RowMajorOrder ? "true" : "false" ) + ", [any_allocator] >";
 }
 
diff --git a/src/UnitTests/Matrices/DenseMatrixTest.h b/src/UnitTests/Matrices/DenseMatrixTest.h
index 8c3132caf..183783ea3 100644
--- a/src/UnitTests/Matrices/DenseMatrixTest.h
+++ b/src/UnitTests/Matrices/DenseMatrixTest.h
@@ -31,24 +31,16 @@ static const char* TEST_FILE_NAME = "test_DenseMatrixTest.tnl";
 
 #include <gtest/gtest.h>
 
-template< typename MatrixHostFloat, typename MatrixHostInt >
-void host_test_GetType()
+void test_GetSerializationType()
 {
-    MatrixHostFloat mtrxHostFloat;
-    MatrixHostInt mtrxHostInt;
-
-    EXPECT_EQ( mtrxHostFloat.getType(), TNL::String( "Matrices::Dense< float, Devices::Host, int >" ) );
-    EXPECT_EQ( mtrxHostInt.getType(), TNL::String( "Matrices::Dense< int, Devices::Host, int >" ) );
-}
-
-template< typename MatrixCudaFloat, typename MatrixCudaInt >
-void cuda_test_GetType()
-{
-    MatrixCudaFloat mtrxCudaFloat;
-    MatrixCudaInt mtrxCudaInt;
-
-    EXPECT_EQ( mtrxCudaFloat.getType(), TNL::String( "Matrices::Dense< float, Devices::Cuda, int >" ) );
-    EXPECT_EQ( mtrxCudaInt.getType(), TNL::String( "Matrices::Dense< int, Devices::Cuda, int >" ) );
+   EXPECT_EQ( ( TNL::Matrices::Dense< float, TNL::Devices::Host, int, true >::getSerializationType() ), TNL::String( "Matrices::Dense< float, [any_device], int, true, [any_allocator] >" ) );
+   EXPECT_EQ( ( TNL::Matrices::Dense< int,   TNL::Devices::Host, int, true >::getSerializationType() ), TNL::String( "Matrices::Dense< int, [any_device], int, true, [any_allocator] >" ) );
+   EXPECT_EQ( ( TNL::Matrices::Dense< float, TNL::Devices::Cuda, int, true >::getSerializationType() ), TNL::String( "Matrices::Dense< float, [any_device], int, true, [any_allocator] >" ) );
+   EXPECT_EQ( ( TNL::Matrices::Dense< int,   TNL::Devices::Cuda, int, true >::getSerializationType() ), TNL::String( "Matrices::Dense< int, [any_device], int, true, [any_allocator] >" ) );
+   EXPECT_EQ( ( TNL::Matrices::Dense< float, TNL::Devices::Host, int, false >::getSerializationType() ), TNL::String( "Matrices::Dense< float, [any_device], int, false, [any_allocator] >" ) );
+   EXPECT_EQ( ( TNL::Matrices::Dense< int,   TNL::Devices::Host, int, false >::getSerializationType() ), TNL::String( "Matrices::Dense< int, [any_device], int, false, [any_allocator] >" ) );
+   EXPECT_EQ( ( TNL::Matrices::Dense< float, TNL::Devices::Cuda, int, false >::getSerializationType() ), TNL::String( "Matrices::Dense< float, [any_device], int, false, [any_allocator] >" ) );
+   EXPECT_EQ( ( TNL::Matrices::Dense< int,   TNL::Devices::Cuda, int, false >::getSerializationType() ), TNL::String( "Matrices::Dense< int, [any_device], int, false, [any_allocator] >" ) );
 }
 
 template< typename Matrix >
@@ -1377,6 +1369,11 @@ using MatrixTypes = ::testing::Types
 
 TYPED_TEST_SUITE( MatrixTest, MatrixTypes );
 
+TYPED_TEST( MatrixTest, getSerializationType )
+{
+   test_GetSerializationType();
+}
+
 TYPED_TEST( MatrixTest, setDimensionsTest )
 {
     using MatrixType = typename TestFixture::MatrixType;
diff --git a/src/UnitTests/Matrices/TridiagonalMatrixTest.cpp b/src/UnitTests/Matrices/TridiagonalMatrixTest.cpp
new file mode 100644
index 000000000..a56349360
--- /dev/null
+++ b/src/UnitTests/Matrices/TridiagonalMatrixTest.cpp
@@ -0,0 +1,11 @@
+/***************************************************************************
+                          DenseMatrixTest.cpp -  description
+                             -------------------
+    begin                : Nov 10, 2018
+    copyright            : (C) 2018 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include "DenseMatrixTest.h"
\ No newline at end of file
diff --git a/src/UnitTests/Matrices/TridiagonalMatrixTest.cu b/src/UnitTests/Matrices/TridiagonalMatrixTest.cu
new file mode 100644
index 000000000..11d45efdb
--- /dev/null
+++ b/src/UnitTests/Matrices/TridiagonalMatrixTest.cu
@@ -0,0 +1,11 @@
+/***************************************************************************
+                          DenseMatrixTest.cu -  description
+                             -------------------
+    begin                : Nov 10, 2018
+    copyright            : (C) 2018 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include "DenseMatrixTest.h"
\ No newline at end of file
diff --git a/src/UnitTests/Matrices/TridiagonalMatrixTest.h b/src/UnitTests/Matrices/TridiagonalMatrixTest.h
new file mode 100644
index 000000000..8c3132caf
--- /dev/null
+++ b/src/UnitTests/Matrices/TridiagonalMatrixTest.h
@@ -0,0 +1,1614 @@
+/***************************************************************************
+                          DenseMatrixTest.h -  description
+                             -------------------
+    begin                : Nov 10, 2018
+    copyright            : (C) 2018 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include <TNL/Devices/Host.h>
+#include <TNL/Matrices/Matrix.h>
+#include <TNL/Matrices/Dense.h>
+#include <TNL/Containers/Array.h>
+
+#include <TNL/Containers/Vector.h>
+#include <TNL/Containers/VectorView.h>
+#include <TNL/Math.h>
+#include <iostream>
+
+using Dense_host_float = TNL::Matrices::Dense< float, TNL::Devices::Host, int >;
+using Dense_host_int = TNL::Matrices::Dense< int, TNL::Devices::Host, int >;
+
+using Dense_cuda_float = TNL::Matrices::Dense< float, TNL::Devices::Cuda, int >;
+using Dense_cuda_int = TNL::Matrices::Dense< int, TNL::Devices::Cuda, int >;
+
+static const char* TEST_FILE_NAME = "test_DenseMatrixTest.tnl";
+
+#ifdef HAVE_GTEST
+#include <type_traits>
+
+#include <gtest/gtest.h>
+
+template< typename MatrixHostFloat, typename MatrixHostInt >
+void host_test_GetType()
+{
+    MatrixHostFloat mtrxHostFloat;
+    MatrixHostInt mtrxHostInt;
+
+    EXPECT_EQ( mtrxHostFloat.getType(), TNL::String( "Matrices::Dense< float, Devices::Host, int >" ) );
+    EXPECT_EQ( mtrxHostInt.getType(), TNL::String( "Matrices::Dense< int, Devices::Host, int >" ) );
+}
+
+template< typename MatrixCudaFloat, typename MatrixCudaInt >
+void cuda_test_GetType()
+{
+    MatrixCudaFloat mtrxCudaFloat;
+    MatrixCudaInt mtrxCudaInt;
+
+    EXPECT_EQ( mtrxCudaFloat.getType(), TNL::String( "Matrices::Dense< float, Devices::Cuda, int >" ) );
+    EXPECT_EQ( mtrxCudaInt.getType(), TNL::String( "Matrices::Dense< int, Devices::Cuda, int >" ) );
+}
+
+template< typename Matrix >
+void test_SetDimensions()
+{
+    using RealType = typename Matrix::RealType;
+    using DeviceType = typename Matrix::DeviceType;
+    using IndexType = typename Matrix::IndexType;
+
+    const IndexType rows = 9;
+    const IndexType cols = 8;
+
+    Matrix m;
+    m.setDimensions( rows, cols );
+
+    EXPECT_EQ( m.getRows(), 9 );
+    EXPECT_EQ( m.getColumns(), 8 );
+}
+
+template< typename Matrix1, typename Matrix2 >
+void test_SetLike()
+{
+    using RealType = typename Matrix1::RealType;
+    using DeviceType = typename Matrix1::DeviceType;
+    using IndexType = typename Matrix1::IndexType;
+
+    const IndexType rows = 8;
+    const IndexType cols = 7;
+
+    Matrix1 m1;
+    m1.reset();
+    m1.setDimensions( rows + 1, cols + 2 );
+
+    Matrix2 m2;
+    m2.reset();
+    m2.setDimensions( rows, cols );
+
+    m1.setLike( m2 );
+
+    EXPECT_EQ( m1.getRows(), m2.getRows() );
+    EXPECT_EQ( m1.getColumns(), m2.getColumns() );
+}
+
+template< typename Matrix >
+void test_GetCompressedRowLengths()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   const IndexType rows = 10;
+   const IndexType cols = 11;
+
+    Matrix m( rows, cols );
+
+    // Insert values into the rows.
+    RealType value = 1;
+
+    for( IndexType i = 0; i < 3; i++ )      // 0th row
+        m.setElement( 0, i, value++ );
+
+    for( IndexType i = 0; i < 3; i++ )      // 1st row
+        m.setElement( 1, i, value++ );
+
+    for( IndexType i = 0; i < 1; i++ )      // 2nd row
+        m.setElement( 2, i, value++ );
+
+    for( IndexType i = 0; i < 2; i++ )      // 3rd row
+        m.setElement( 3, i, value++ );
+
+    for( IndexType i = 0; i < 3; i++ )      // 4th row
+        m.setElement( 4, i, value++ );
+
+    for( IndexType i = 0; i < 4; i++ )      // 5th row
+        m.setElement( 5, i, value++ );
+
+    for( IndexType i = 0; i < 5; i++ )      // 6th row
+        m.setElement( 6, i, value++ );
+
+    for( IndexType i = 0; i < 6; i++ )      // 7th row
+        m.setElement( 7, i, value++ );
+
+    for( IndexType i = 0; i < 7; i++ )      // 8th row
+        m.setElement( 8, i, value++ );
+
+    for( IndexType i = 0; i < 8; i++ )      // 9th row
+        m.setElement( 9, i, value++ );
+
+   typename Matrix::CompressedRowLengthsVector rowLengths;
+   rowLengths = 0;
+   m.getCompressedRowLengths( rowLengths );
+   typename Matrix::CompressedRowLengthsVector correctRowLengths{ 3, 3, 1, 2, 3, 4, 5, 6, 7, 8 };
+   EXPECT_EQ( rowLengths, correctRowLengths );
+}
+
+template< typename Matrix >
+void test_GetRowLength()
+{
+    using RealType = typename Matrix::RealType;
+    using DeviceType = typename Matrix::DeviceType;
+    using IndexType = typename Matrix::IndexType;
+
+    const IndexType rows = 8;
+    const IndexType cols = 7;
+
+    Matrix m;
+    m.reset();
+    m.setDimensions( rows, cols );
+
+    EXPECT_EQ( m.getRowLength( 0 ), 7 );
+    EXPECT_EQ( m.getRowLength( 1 ), 7 );
+    EXPECT_EQ( m.getRowLength( 2 ), 7 );
+    EXPECT_EQ( m.getRowLength( 3 ), 7 );
+    EXPECT_EQ( m.getRowLength( 4 ), 7 );
+    EXPECT_EQ( m.getRowLength( 5 ), 7 );
+    EXPECT_EQ( m.getRowLength( 6 ), 7 );
+    EXPECT_EQ( m.getRowLength( 7 ), 7 );
+}
+
+template< typename Matrix >
+void test_GetNumberOfMatrixElements()
+{
+    using RealType = typename Matrix::RealType;
+    using DeviceType = typename Matrix::DeviceType;
+    using IndexType = typename Matrix::IndexType;
+
+    const IndexType rows = 7;
+    const IndexType cols = 6;
+
+    Matrix m;
+    m.reset();
+    m.setDimensions( rows, cols );
+
+    EXPECT_EQ( m.getNumberOfMatrixElements(), 42 );
+}
+
+template< typename Matrix >
+void test_GetNumberOfNonzeroMatrixElements()
+{
+    using RealType = typename Matrix::RealType;
+    using DeviceType = typename Matrix::DeviceType;
+    using IndexType = typename Matrix::IndexType;
+
+/*
+ * Sets up the following 7x6 dense matrix:
+ *
+ *    /  0  2  3  4  5  6 \
+ *    |  7  8  9 10 11 12 |
+ *    | 13 14 15 16 17 18 |
+ *    | 19 20 21 22 23 24 |
+ *    | 25 26 27 28 29 30 |
+ *    | 31 32 33 34 35 36 |
+ *    \ 37 38 39 40 41  0 /
+ */
+    const IndexType rows = 7;
+    const IndexType cols = 6;
+
+    Matrix m;
+    m.reset();
+    m.setDimensions( rows, cols );
+
+    RealType value = 1;
+    for( IndexType i = 0; i < rows; i++ )
+        for( IndexType j = 0; j < cols; j++ )
+            m.setElement( i, j, value++ );
+
+    m.setElement( 0, 0, 0); // Set the first element of the diagonal to 0.
+    m.setElement( 6, 5, 0); // Set the last element of the diagonal to 0.
+
+    EXPECT_EQ( m.getNumberOfNonzeroMatrixElements(), 40 );
+}
+
+template< typename Matrix >
+void test_Reset()
+{
+    using RealType = typename Matrix::RealType;
+    using DeviceType = typename Matrix::DeviceType;
+    using IndexType = typename Matrix::IndexType;
+
+/*
+ * Sets up the following 5x4 dense matrix:
+ *
+ *    /  0  0  0  0 \
+ *    |  0  0  0  0 |
+ *    |  0  0  0  0 |
+ *    |  0  0  0  0 |
+ *    \  0  0  0  0 /
+ */
+    const IndexType rows = 5;
+    const IndexType cols = 4;
+
+    Matrix m;
+    m.setDimensions( rows, cols );
+
+    m.reset();
+
+    EXPECT_EQ( m.getRows(), 0 );
+    EXPECT_EQ( m.getColumns(), 0 );
+}
+
+template< typename Matrix >
+void test_SetValue()
+{
+    using RealType = typename Matrix::RealType;
+    using DeviceType = typename Matrix::DeviceType;
+    using IndexType = typename Matrix::IndexType;
+/*
+ * Sets up the following 7x6 dense matrix:
+ *
+ *    /  1  2  3  4  5  6 \
+ *    |  7  8  9 10 11 12 |
+ *    | 13 14 15 16 17 18 |
+ *    | 19 20 21 22 23 24 |
+ *    | 25 26 27 28 29 30 |
+ *    | 31 32 33 34 35 36 |
+ *    \ 37 38 39 40 41 42 /
+ */
+    const IndexType rows = 7;
+    const IndexType cols = 6;
+
+    Matrix m;
+    m.reset();
+    m.setDimensions( rows, cols );
+
+    RealType value = 1;
+    for( IndexType i = 0; i < rows; i++ )
+        for( IndexType j = 0; j < cols; j++ )
+            m.setElement( i, j, value++ );
+
+    EXPECT_EQ( m.getElement( 0, 0 ),  1 );
+    EXPECT_EQ( m.getElement( 0, 1 ),  2 );
+    EXPECT_EQ( m.getElement( 0, 2 ),  3 );
+    EXPECT_EQ( m.getElement( 0, 3 ),  4 );
+    EXPECT_EQ( m.getElement( 0, 4 ),  5 );
+    EXPECT_EQ( m.getElement( 0, 5 ),  6 );
+
+    EXPECT_EQ( m.getElement( 1, 0 ),  7 );
+    EXPECT_EQ( m.getElement( 1, 1 ),  8 );
+    EXPECT_EQ( m.getElement( 1, 2 ),  9 );
+    EXPECT_EQ( m.getElement( 1, 3 ), 10 );
+    EXPECT_EQ( m.getElement( 1, 4 ), 11 );
+    EXPECT_EQ( m.getElement( 1, 5 ), 12 );
+
+    EXPECT_EQ( m.getElement( 2, 0 ), 13 );
+    EXPECT_EQ( m.getElement( 2, 1 ), 14 );
+    EXPECT_EQ( m.getElement( 2, 2 ), 15 );
+    EXPECT_EQ( m.getElement( 2, 3 ), 16 );
+    EXPECT_EQ( m.getElement( 2, 4 ), 17 );
+    EXPECT_EQ( m.getElement( 2, 5 ), 18 );
+
+    EXPECT_EQ( m.getElement( 3, 0 ), 19 );
+    EXPECT_EQ( m.getElement( 3, 1 ), 20 );
+    EXPECT_EQ( m.getElement( 3, 2 ), 21 );
+    EXPECT_EQ( m.getElement( 3, 3 ), 22 );
+    EXPECT_EQ( m.getElement( 3, 4 ), 23 );
+    EXPECT_EQ( m.getElement( 3, 5 ), 24 );
+
+    EXPECT_EQ( m.getElement( 4, 0 ), 25 );
+    EXPECT_EQ( m.getElement( 4, 1 ), 26 );
+    EXPECT_EQ( m.getElement( 4, 2 ), 27 );
+    EXPECT_EQ( m.getElement( 4, 3 ), 28 );
+    EXPECT_EQ( m.getElement( 4, 4 ), 29 );
+    EXPECT_EQ( m.getElement( 4, 5 ), 30 );
+
+    EXPECT_EQ( m.getElement( 5, 0 ), 31 );
+    EXPECT_EQ( m.getElement( 5, 1 ), 32 );
+    EXPECT_EQ( m.getElement( 5, 2 ), 33 );
+    EXPECT_EQ( m.getElement( 5, 3 ), 34 );
+    EXPECT_EQ( m.getElement( 5, 4 ), 35 );
+    EXPECT_EQ( m.getElement( 5, 5 ), 36 );
+
+    EXPECT_EQ( m.getElement( 6, 0 ), 37 );
+    EXPECT_EQ( m.getElement( 6, 1 ), 38 );
+    EXPECT_EQ( m.getElement( 6, 2 ), 39 );
+    EXPECT_EQ( m.getElement( 6, 3 ), 40 );
+    EXPECT_EQ( m.getElement( 6, 4 ), 41 );
+    EXPECT_EQ( m.getElement( 6, 5 ), 42 );
+
+    // Set the values of all elements to a certain number
+    m.setValue( 42 );
+
+    EXPECT_EQ( m.getElement( 0, 0 ), 42 );
+    EXPECT_EQ( m.getElement( 0, 1 ), 42 );
+    EXPECT_EQ( m.getElement( 0, 2 ), 42 );
+    EXPECT_EQ( m.getElement( 0, 3 ), 42 );
+    EXPECT_EQ( m.getElement( 0, 4 ), 42 );
+    EXPECT_EQ( m.getElement( 0, 5 ), 42 );
+
+    EXPECT_EQ( m.getElement( 1, 0 ), 42 );
+    EXPECT_EQ( m.getElement( 1, 1 ), 42 );
+    EXPECT_EQ( m.getElement( 1, 2 ), 42 );
+    EXPECT_EQ( m.getElement( 1, 3 ), 42 );
+    EXPECT_EQ( m.getElement( 1, 4 ), 42 );
+    EXPECT_EQ( m.getElement( 1, 5 ), 42 );
+
+    EXPECT_EQ( m.getElement( 2, 0 ), 42 );
+    EXPECT_EQ( m.getElement( 2, 1 ), 42 );
+    EXPECT_EQ( m.getElement( 2, 2 ), 42 );
+    EXPECT_EQ( m.getElement( 2, 3 ), 42 );
+    EXPECT_EQ( m.getElement( 2, 4 ), 42 );
+    EXPECT_EQ( m.getElement( 2, 5 ), 42 );
+
+    EXPECT_EQ( m.getElement( 3, 0 ), 42 );
+    EXPECT_EQ( m.getElement( 3, 1 ), 42 );
+    EXPECT_EQ( m.getElement( 3, 2 ), 42 );
+    EXPECT_EQ( m.getElement( 3, 3 ), 42 );
+    EXPECT_EQ( m.getElement( 3, 4 ), 42 );
+    EXPECT_EQ( m.getElement( 3, 5 ), 42 );
+
+    EXPECT_EQ( m.getElement( 4, 0 ), 42 );
+    EXPECT_EQ( m.getElement( 4, 1 ), 42 );
+    EXPECT_EQ( m.getElement( 4, 2 ), 42 );
+    EXPECT_EQ( m.getElement( 4, 3 ), 42 );
+    EXPECT_EQ( m.getElement( 4, 4 ), 42 );
+    EXPECT_EQ( m.getElement( 4, 5 ), 42 );
+
+    EXPECT_EQ( m.getElement( 5, 0 ), 42 );
+    EXPECT_EQ( m.getElement( 5, 1 ), 42 );
+    EXPECT_EQ( m.getElement( 5, 2 ), 42 );
+    EXPECT_EQ( m.getElement( 5, 3 ), 42 );
+    EXPECT_EQ( m.getElement( 5, 4 ), 42 );
+    EXPECT_EQ( m.getElement( 5, 5 ), 42 );
+
+    EXPECT_EQ( m.getElement( 6, 0 ), 42 );
+    EXPECT_EQ( m.getElement( 6, 1 ), 42 );
+    EXPECT_EQ( m.getElement( 6, 2 ), 42 );
+    EXPECT_EQ( m.getElement( 6, 3 ), 42 );
+    EXPECT_EQ( m.getElement( 6, 4 ), 42 );
+    EXPECT_EQ( m.getElement( 6, 5 ), 42 );
+}
+
+template< typename Matrix >
+void test_SetElement()
+{
+    using RealType = typename Matrix::RealType;
+    using DeviceType = typename Matrix::DeviceType;
+    using IndexType = typename Matrix::IndexType;
+/*
+ * Sets up the following 5x5 dense matrix:
+ *
+ *    /  1  2  3  4  5 \
+ *    |  6  7  8  9 10 |
+ *    | 11 12 13 14 15 |
+ *    | 16 17 18 19 20 |
+ *    \ 21 22 23 24 25 /
+ */
+    const IndexType rows = 5;
+    const IndexType cols = 5;
+
+    Matrix m;
+    m.reset();
+    m.setDimensions( rows, cols );
+
+    RealType value = 1;
+    for( IndexType i = 0; i < rows; i++ )
+        for( IndexType j = 0; j < cols; j++ )
+            m.setElement( i, j, value++ );
+
+    EXPECT_EQ( m.getElement( 0, 0 ),  1 );
+    EXPECT_EQ( m.getElement( 0, 1 ),  2 );
+    EXPECT_EQ( m.getElement( 0, 2 ),  3 );
+    EXPECT_EQ( m.getElement( 0, 3 ),  4 );
+    EXPECT_EQ( m.getElement( 0, 4 ),  5 );
+
+    EXPECT_EQ( m.getElement( 1, 0 ),  6 );
+    EXPECT_EQ( m.getElement( 1, 1 ),  7 );
+    EXPECT_EQ( m.getElement( 1, 2 ),  8 );
+    EXPECT_EQ( m.getElement( 1, 3 ),  9 );
+    EXPECT_EQ( m.getElement( 1, 4 ), 10 );
+
+    EXPECT_EQ( m.getElement( 2, 0 ), 11 );
+    EXPECT_EQ( m.getElement( 2, 1 ), 12 );
+    EXPECT_EQ( m.getElement( 2, 2 ), 13 );
+    EXPECT_EQ( m.getElement( 2, 3 ), 14 );
+    EXPECT_EQ( m.getElement( 2, 4 ), 15 );
+
+    EXPECT_EQ( m.getElement( 3, 0 ), 16 );
+    EXPECT_EQ( m.getElement( 3, 1 ), 17 );
+    EXPECT_EQ( m.getElement( 3, 2 ), 18 );
+    EXPECT_EQ( m.getElement( 3, 3 ), 19 );
+    EXPECT_EQ( m.getElement( 3, 4 ), 20 );
+
+    EXPECT_EQ( m.getElement( 4, 0 ), 21 );
+    EXPECT_EQ( m.getElement( 4, 1 ), 22 );
+    EXPECT_EQ( m.getElement( 4, 2 ), 23 );
+    EXPECT_EQ( m.getElement( 4, 3 ), 24 );
+    EXPECT_EQ( m.getElement( 4, 4 ), 25 );
+}
+
+template< typename Matrix >
+void test_AddElement()
+{
+    using RealType = typename Matrix::RealType;
+    using DeviceType = typename Matrix::DeviceType;
+    using IndexType = typename Matrix::IndexType;
+/*
+ * Sets up the following 6x5 dense matrix:
+ *
+ *    /  1  2  3  4  5 \
+ *    |  6  7  8  9 10 |
+ *    | 11 12 13 14 15 |
+ *    | 16 17 18 19 20 |
+ *    | 21 22 23 24 25 |
+ *    \ 26 27 28 29 30 /
+ */
+    const IndexType rows = 6;
+    const IndexType cols = 5;
+
+    Matrix m;
+    m.reset();
+    m.setDimensions( rows, cols );
+
+    RealType value = 1;
+    for( IndexType i = 0; i < rows; i++ )
+        for( IndexType j = 0; j < cols; j++ )
+            m.setElement( i, j, value++ );
+
+    // Check the added elements
+    EXPECT_EQ( m.getElement( 0, 0 ),  1 );
+    EXPECT_EQ( m.getElement( 0, 1 ),  2 );
+    EXPECT_EQ( m.getElement( 0, 2 ),  3 );
+    EXPECT_EQ( m.getElement( 0, 3 ),  4 );
+    EXPECT_EQ( m.getElement( 0, 4 ),  5 );
+
+    EXPECT_EQ( m.getElement( 1, 0 ),  6 );
+    EXPECT_EQ( m.getElement( 1, 1 ),  7 );
+    EXPECT_EQ( m.getElement( 1, 2 ),  8 );
+    EXPECT_EQ( m.getElement( 1, 3 ),  9 );
+    EXPECT_EQ( m.getElement( 1, 4 ), 10 );
+
+    EXPECT_EQ( m.getElement( 2, 0 ), 11 );
+    EXPECT_EQ( m.getElement( 2, 1 ), 12 );
+    EXPECT_EQ( m.getElement( 2, 2 ), 13 );
+    EXPECT_EQ( m.getElement( 2, 3 ), 14 );
+    EXPECT_EQ( m.getElement( 2, 4 ), 15 );
+
+    EXPECT_EQ( m.getElement( 3, 0 ), 16 );
+    EXPECT_EQ( m.getElement( 3, 1 ), 17 );
+    EXPECT_EQ( m.getElement( 3, 2 ), 18 );
+    EXPECT_EQ( m.getElement( 3, 3 ), 19 );
+    EXPECT_EQ( m.getElement( 3, 4 ), 20 );
+
+    EXPECT_EQ( m.getElement( 4, 0 ), 21 );
+    EXPECT_EQ( m.getElement( 4, 1 ), 22 );
+    EXPECT_EQ( m.getElement( 4, 2 ), 23 );
+    EXPECT_EQ( m.getElement( 4, 3 ), 24 );
+    EXPECT_EQ( m.getElement( 4, 4 ), 25 );
+
+    EXPECT_EQ( m.getElement( 5, 0 ), 26 );
+    EXPECT_EQ( m.getElement( 5, 1 ), 27 );
+    EXPECT_EQ( m.getElement( 5, 2 ), 28 );
+    EXPECT_EQ( m.getElement( 5, 3 ), 29 );
+    EXPECT_EQ( m.getElement( 5, 4 ), 30 );
+
+    // Add new elements to the old elements with a multiplying factor applied to the old elements.
+/*
+ * The following setup results in the following 6x5 dense matrix:
+ *
+ *    /  3  6  9 12 15 \
+ *    | 18 21 24 27 30 |
+ *    | 33 36 39 42 45 |
+ *    | 48 51 54 57 60 |
+ *    | 63 66 69 72 75 |
+ *    \ 78 81 84 87 90 /
+ */
+    RealType newValue = 1;
+    RealType multiplicator = 2;
+    for( IndexType i = 0; i < rows; i++ )
+        for( IndexType j = 0; j < cols; j++ )
+            m.addElement( i, j, newValue++, multiplicator );
+
+    EXPECT_EQ( m.getElement( 0, 0 ),  3 );
+    EXPECT_EQ( m.getElement( 0, 1 ),  6 );
+    EXPECT_EQ( m.getElement( 0, 2 ),  9 );
+    EXPECT_EQ( m.getElement( 0, 3 ), 12 );
+    EXPECT_EQ( m.getElement( 0, 4 ), 15 );
+
+    EXPECT_EQ( m.getElement( 1, 0 ), 18 );
+    EXPECT_EQ( m.getElement( 1, 1 ), 21 );
+    EXPECT_EQ( m.getElement( 1, 2 ), 24 );
+    EXPECT_EQ( m.getElement( 1, 3 ), 27 );
+    EXPECT_EQ( m.getElement( 1, 4 ), 30 );
+
+    EXPECT_EQ( m.getElement( 2, 0 ), 33 );
+    EXPECT_EQ( m.getElement( 2, 1 ), 36 );
+    EXPECT_EQ( m.getElement( 2, 2 ), 39 );
+    EXPECT_EQ( m.getElement( 2, 3 ), 42 );
+    EXPECT_EQ( m.getElement( 2, 4 ), 45 );
+
+    EXPECT_EQ( m.getElement( 3, 0 ), 48 );
+    EXPECT_EQ( m.getElement( 3, 1 ), 51 );
+    EXPECT_EQ( m.getElement( 3, 2 ), 54 );
+    EXPECT_EQ( m.getElement( 3, 3 ), 57 );
+    EXPECT_EQ( m.getElement( 3, 4 ), 60 );
+
+    EXPECT_EQ( m.getElement( 4, 0 ), 63 );
+    EXPECT_EQ( m.getElement( 4, 1 ), 66 );
+    EXPECT_EQ( m.getElement( 4, 2 ), 69 );
+    EXPECT_EQ( m.getElement( 4, 3 ), 72 );
+    EXPECT_EQ( m.getElement( 4, 4 ), 75 );
+
+    EXPECT_EQ( m.getElement( 5, 0 ), 78 );
+    EXPECT_EQ( m.getElement( 5, 1 ), 81 );
+    EXPECT_EQ( m.getElement( 5, 2 ), 84 );
+    EXPECT_EQ( m.getElement( 5, 3 ), 87 );
+    EXPECT_EQ( m.getElement( 5, 4 ), 90 );
+}
+
+template< typename Matrix >
+void test_SetRow()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /*
+    * Sets up the following 3x7 dense matrix:
+    *
+    *    /  1  2  3  4  5  6  7 \
+    *    |  8  9 10 11 12 13 14 |
+    *    \ 15 16 17 18 19 20 21 /
+    */
+   const IndexType rows = 3;
+   const IndexType cols = 7;
+
+   Matrix m;
+   m.reset();
+   m.setDimensions( rows, cols );
+
+   RealType value = 1;
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < cols; j++ )
+         m.setElement( i, j, value++ );
+
+   auto matrix_view = m.getView();
+   auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
+      RealType values[ 3 ][ 5 ] {
+         { 11, 11, 11, 11, 11 },
+         { 22, 22, 22, 22, 22 },
+         { 33, 33, 33, 33, 33 } };
+      IndexType columnIndexes[ 3 ][ 5 ] {
+         { 0, 1, 2, 3, 4 },
+         { 0, 1, 2, 3, 4 },
+         { 2, 3, 4, 5, 6 } };
+      auto row = matrix_view.getRow( rowIdx );
+      for( IndexType i = 0; i < 5; i++ )
+        row.setElement( columnIndexes[ rowIdx ][ i ], values[ rowIdx ][ i ] );
+   };
+   TNL::Algorithms::ParallelFor< DeviceType >::exec( 0, 3, f );
+
+   EXPECT_EQ( m.getElement( 0, 0 ), 11 );
+   EXPECT_EQ( m.getElement( 0, 1 ), 11 );
+   EXPECT_EQ( m.getElement( 0, 2 ), 11 );
+   EXPECT_EQ( m.getElement( 0, 3 ), 11 );
+   EXPECT_EQ( m.getElement( 0, 4 ), 11 );
+   EXPECT_EQ( m.getElement( 0, 5 ),  6 );
+   EXPECT_EQ( m.getElement( 0, 6 ),  7 );
+
+   EXPECT_EQ( m.getElement( 1, 0 ), 22 );
+   EXPECT_EQ( m.getElement( 1, 1 ), 22 );
+   EXPECT_EQ( m.getElement( 1, 2 ), 22 );
+   EXPECT_EQ( m.getElement( 1, 3 ), 22 );
+   EXPECT_EQ( m.getElement( 1, 4 ), 22 );
+   EXPECT_EQ( m.getElement( 1, 5 ), 13 );
+   EXPECT_EQ( m.getElement( 1, 6 ), 14 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ), 15 );
+   EXPECT_EQ( m.getElement( 2, 1 ), 16 );
+   EXPECT_EQ( m.getElement( 2, 2 ), 33 );
+   EXPECT_EQ( m.getElement( 2, 3 ), 33 );
+   EXPECT_EQ( m.getElement( 2, 4 ), 33 );
+   EXPECT_EQ( m.getElement( 2, 5 ), 33 );
+   EXPECT_EQ( m.getElement( 2, 6 ), 33 );
+}
+
+template< typename Matrix >
+void test_AddRow()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+   /*
+    * Sets up the following 6x5 dense matrix:
+    *
+    *    /  1  2  3  4  5 \
+    *    |  6  7  8  9 10 |
+    *    | 11 12 13 14 15 |
+    *    | 16 17 18 19 20 |
+    *    | 21 22 23 24 25 |
+    *    \ 26 27 28 29 30 /
+    */
+
+   const IndexType rows = 6;
+   const IndexType cols = 5;
+
+   Matrix m( rows, cols );
+
+   RealType value = 1;
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < cols; j++ )
+         m.setElement( i, j, value++ );
+
+   // Check the added elements
+   EXPECT_EQ( m.getElement( 0, 0 ),  1 );
+   EXPECT_EQ( m.getElement( 0, 1 ),  2 );
+   EXPECT_EQ( m.getElement( 0, 2 ),  3 );
+   EXPECT_EQ( m.getElement( 0, 3 ),  4 );
+   EXPECT_EQ( m.getElement( 0, 4 ),  5 );
+
+   EXPECT_EQ( m.getElement( 1, 0 ),  6 );
+   EXPECT_EQ( m.getElement( 1, 1 ),  7 );
+   EXPECT_EQ( m.getElement( 1, 2 ),  8 );
+   EXPECT_EQ( m.getElement( 1, 3 ),  9 );
+   EXPECT_EQ( m.getElement( 1, 4 ), 10 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ), 11 );
+   EXPECT_EQ( m.getElement( 2, 1 ), 12 );
+   EXPECT_EQ( m.getElement( 2, 2 ), 13 );
+   EXPECT_EQ( m.getElement( 2, 3 ), 14 );
+   EXPECT_EQ( m.getElement( 2, 4 ), 15 );
+
+   EXPECT_EQ( m.getElement( 3, 0 ), 16 );
+   EXPECT_EQ( m.getElement( 3, 1 ), 17 );
+   EXPECT_EQ( m.getElement( 3, 2 ), 18 );
+   EXPECT_EQ( m.getElement( 3, 3 ), 19 );
+   EXPECT_EQ( m.getElement( 3, 4 ), 20 );
+
+   EXPECT_EQ( m.getElement( 4, 0 ), 21 );
+   EXPECT_EQ( m.getElement( 4, 1 ), 22 );
+   EXPECT_EQ( m.getElement( 4, 2 ), 23 );
+   EXPECT_EQ( m.getElement( 4, 3 ), 24 );
+   EXPECT_EQ( m.getElement( 4, 4 ), 25 );
+
+   EXPECT_EQ( m.getElement( 5, 0 ), 26 );
+   EXPECT_EQ( m.getElement( 5, 1 ), 27 );
+   EXPECT_EQ( m.getElement( 5, 2 ), 28 );
+   EXPECT_EQ( m.getElement( 5, 3 ), 29 );
+   EXPECT_EQ( m.getElement( 5, 4 ), 30 );
+
+   // Add new elements to the old elements with a multiplying factor applied to the old elements.
+   /*
+    * The following setup results in the following 6x5 sparse matrix:
+    *
+    *    /  3  6  9 12 15 \
+    *    | 18 21 24 27 30 |
+    *    | 33 36 39 42 45 |
+    *    | 48 51 54 57 60 |
+    *    | 63 66 69 72 75 |
+    *    \ 78 81 84 87 90 /
+    */
+
+   auto matrix_view = m.getView();
+   auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
+      RealType values[ 6 ][ 5 ] {
+         { 11, 11, 11, 11, 0 },
+         { 22, 22, 22, 22, 0 },
+         { 33, 33, 33, 33, 0 },
+         { 44, 44, 44, 44, 0 },
+         { 55, 55, 55, 55, 0 },
+         { 66, 66, 66, 66, 0 } };
+      auto row = matrix_view.getRow( rowIdx );
+      for( IndexType i = 0; i < 5; i++ )
+      {
+         RealType& val = row.getValue( i );
+         val = rowIdx * val + values[ rowIdx ][ i ];
+      }
+   };
+   TNL::Algorithms::ParallelFor< DeviceType >::exec( 0, 6, f );
+
+
+    EXPECT_EQ( m.getElement( 0, 0 ),  11 );
+    EXPECT_EQ( m.getElement( 0, 1 ),  11 );
+    EXPECT_EQ( m.getElement( 0, 2 ),  11 );
+    EXPECT_EQ( m.getElement( 0, 3 ),  11 );
+    EXPECT_EQ( m.getElement( 0, 4 ),   0 );
+
+    EXPECT_EQ( m.getElement( 1, 0 ),  28 );
+    EXPECT_EQ( m.getElement( 1, 1 ),  29 );
+    EXPECT_EQ( m.getElement( 1, 2 ),  30 );
+    EXPECT_EQ( m.getElement( 1, 3 ),  31 );
+    EXPECT_EQ( m.getElement( 1, 4 ),  10 );
+
+    EXPECT_EQ( m.getElement( 2, 0 ),  55 );
+    EXPECT_EQ( m.getElement( 2, 1 ),  57 );
+    EXPECT_EQ( m.getElement( 2, 2 ),  59 );
+    EXPECT_EQ( m.getElement( 2, 3 ),  61 );
+    EXPECT_EQ( m.getElement( 2, 4 ),  30 );
+
+    EXPECT_EQ( m.getElement( 3, 0 ),  92 );
+    EXPECT_EQ( m.getElement( 3, 1 ),  95 );
+    EXPECT_EQ( m.getElement( 3, 2 ),  98 );
+    EXPECT_EQ( m.getElement( 3, 3 ), 101 );
+    EXPECT_EQ( m.getElement( 3, 4 ),  60 );
+
+    EXPECT_EQ( m.getElement( 4, 0 ), 139 );
+    EXPECT_EQ( m.getElement( 4, 1 ), 143 );
+    EXPECT_EQ( m.getElement( 4, 2 ), 147 );
+    EXPECT_EQ( m.getElement( 4, 3 ), 151 );
+    EXPECT_EQ( m.getElement( 4, 4 ), 100 );
+
+    EXPECT_EQ( m.getElement( 5, 0 ), 196 );
+    EXPECT_EQ( m.getElement( 5, 1 ), 201 );
+    EXPECT_EQ( m.getElement( 5, 2 ), 206 );
+    EXPECT_EQ( m.getElement( 5, 3 ), 211 );
+    EXPECT_EQ( m.getElement( 5, 4 ), 150 );
+}
+
+template< typename Matrix >
+void test_VectorProduct()
+{
+    using RealType = typename Matrix::RealType;
+    using DeviceType = typename Matrix::DeviceType;
+    using IndexType = typename Matrix::IndexType;
+/*
+ * Sets up the following 5x4 dense matrix:
+ *
+ *    /  1  2  3  4 \
+ *    |  5  6  7  8 |
+ *    |  9 10 11 12 |
+ *    | 13 14 15 16 |
+ *    \ 17 18 19 20 /
+ */
+    const IndexType rows = 5;
+    const IndexType cols = 4;
+
+    Matrix m;
+    m.reset();
+    m.setDimensions( rows, cols );
+
+    RealType value = 1;
+    for( IndexType i = 0; i < rows; i++ )
+        for( IndexType j = 0; j < cols; j++)
+            m.setElement( i, j, value++ );
+
+    using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >;
+
+    VectorType inVector;
+    inVector.setSize( 4 );
+    for( IndexType i = 0; i < inVector.getSize(); i++ )
+        inVector.setElement( i, 2 );
+
+    VectorType outVector;
+    outVector.setSize( 5 );
+    for( IndexType j = 0; j < outVector.getSize(); j++ )
+        outVector.setElement( j, 0 );
+
+
+    m.vectorProduct( inVector, outVector);
+
+    EXPECT_EQ( outVector.getElement( 0 ),  20 );
+    EXPECT_EQ( outVector.getElement( 1 ),  52 );
+    EXPECT_EQ( outVector.getElement( 2 ),  84 );
+    EXPECT_EQ( outVector.getElement( 3 ), 116 );
+    EXPECT_EQ( outVector.getElement( 4 ), 148 );
+}
+
+template< typename Matrix >
+void test_AddMatrix()
+{
+    using RealType = typename Matrix::RealType;
+    using DeviceType = typename Matrix::DeviceType;
+    using IndexType = typename Matrix::IndexType;
+/*
+ * Sets up the following 5x4 dense matrix:
+ *
+ *    /  1  2  3  4 \
+ *    |  5  6  7  8 |
+ *    |  9 10 11 12 |
+ *    | 13 14 15 16 |
+ *    \ 17 18 19 20 /
+ */
+    const IndexType rows = 5;
+    const IndexType cols = 4;
+
+    Matrix m;
+    m.reset();
+    m.setDimensions( rows, cols );
+
+    RealType value = 1;
+    for( IndexType i = 0; i < rows; i++ )
+        for( IndexType j = 0; j < cols; j++)
+            m.setElement( i, j, value++ );
+
+/*
+ * Sets up the following 5x4 dense matrix:
+ *
+ *    /  1  2  3  4 \
+ *    |  5  6  7  8 |
+ *    |  9 10 11 12 |
+ *    | 13 14 15 16 |
+ *    \ 17 18 19 20 /
+ */
+
+    Matrix m2;
+    m2.reset();
+    m2.setDimensions( rows, cols );
+
+    RealType newValue = 1;
+    for( IndexType i = 0; i < rows; i++ )
+        for( IndexType j = 0; j < cols; j++)
+            m2.setElement( i, j, newValue++ );
+
+    /*
+ * Sets up the following 5x4 dense matrix:
+ *
+ *    /  1  2  3  4 \
+ *    |  5  6  7  8 |
+ *    |  9 10 11 12 |
+ *    | 13 14 15 16 |
+ *    \ 17 18 19 20 /
+ */
+
+    Matrix mResult;
+    mResult.reset();
+    mResult.setDimensions( rows, cols );
+
+    mResult = m;
+
+    RealType matrixMultiplicator = 2;
+    RealType thisMatrixMultiplicator = 1;
+
+    mResult.addMatrix( m2, matrixMultiplicator, thisMatrixMultiplicator );
+
+    EXPECT_EQ( mResult.getElement( 0, 0 ), matrixMultiplicator * m2.getElement( 0, 0 ) + thisMatrixMultiplicator * m.getElement( 0, 0 ) );
+    EXPECT_EQ( mResult.getElement( 0, 1 ), matrixMultiplicator * m2.getElement( 0, 1 ) + thisMatrixMultiplicator * m.getElement( 0, 1 ) );
+    EXPECT_EQ( mResult.getElement( 0, 2 ), matrixMultiplicator * m2.getElement( 0, 2 ) + thisMatrixMultiplicator * m.getElement( 0, 2 ) );
+    EXPECT_EQ( mResult.getElement( 0, 3 ), matrixMultiplicator * m2.getElement( 0, 3 ) + thisMatrixMultiplicator * m.getElement( 0, 3 ) );
+
+    EXPECT_EQ( mResult.getElement( 1, 0 ), matrixMultiplicator * m2.getElement( 1, 0 ) + thisMatrixMultiplicator * m.getElement( 1, 0 ) );
+    EXPECT_EQ( mResult.getElement( 1, 1 ), matrixMultiplicator * m2.getElement( 1, 1 ) + thisMatrixMultiplicator * m.getElement( 1, 1 ) );
+    EXPECT_EQ( mResult.getElement( 1, 2 ), matrixMultiplicator * m2.getElement( 1, 2 ) + thisMatrixMultiplicator * m.getElement( 1, 2 ) );
+    EXPECT_EQ( mResult.getElement( 1, 3 ), matrixMultiplicator * m2.getElement( 1, 3 ) + thisMatrixMultiplicator * m.getElement( 1, 3 ) );
+
+    EXPECT_EQ( mResult.getElement( 2, 0 ), matrixMultiplicator * m2.getElement( 2, 0 ) + thisMatrixMultiplicator * m.getElement( 2, 0 ) );
+    EXPECT_EQ( mResult.getElement( 2, 1 ), matrixMultiplicator * m2.getElement( 2, 1 ) + thisMatrixMultiplicator * m.getElement( 2, 1 ) );
+    EXPECT_EQ( mResult.getElement( 2, 2 ), matrixMultiplicator * m2.getElement( 2, 2 ) + thisMatrixMultiplicator * m.getElement( 2, 2 ) );
+    EXPECT_EQ( mResult.getElement( 2, 3 ), matrixMultiplicator * m2.getElement( 2, 3 ) + thisMatrixMultiplicator * m.getElement( 2, 3 ) );
+
+    EXPECT_EQ( mResult.getElement( 3, 0 ), matrixMultiplicator * m2.getElement( 3, 0 ) + thisMatrixMultiplicator * m.getElement( 3, 0 ) );
+    EXPECT_EQ( mResult.getElement( 3, 1 ), matrixMultiplicator * m2.getElement( 3, 1 ) + thisMatrixMultiplicator * m.getElement( 3, 1 ) );
+    EXPECT_EQ( mResult.getElement( 3, 2 ), matrixMultiplicator * m2.getElement( 3, 2 ) + thisMatrixMultiplicator * m.getElement( 3, 2 ) );
+    EXPECT_EQ( mResult.getElement( 3, 3 ), matrixMultiplicator * m2.getElement( 3, 3 ) + thisMatrixMultiplicator * m.getElement( 3, 3 ) );
+
+    EXPECT_EQ( mResult.getElement( 4, 0 ), matrixMultiplicator * m2.getElement( 4, 0 ) + thisMatrixMultiplicator * m.getElement( 4, 0 ) );
+    EXPECT_EQ( mResult.getElement( 4, 1 ), matrixMultiplicator * m2.getElement( 4, 1 ) + thisMatrixMultiplicator * m.getElement( 4, 1 ) );
+    EXPECT_EQ( mResult.getElement( 4, 2 ), matrixMultiplicator * m2.getElement( 4, 2 ) + thisMatrixMultiplicator * m.getElement( 4, 2 ) );
+    EXPECT_EQ( mResult.getElement( 4, 3 ), matrixMultiplicator * m2.getElement( 4, 3 ) + thisMatrixMultiplicator * m.getElement( 4, 3 ) );
+
+    EXPECT_EQ( mResult.getElement( 0, 0 ),  3 );
+    EXPECT_EQ( mResult.getElement( 0, 1 ),  6 );
+    EXPECT_EQ( mResult.getElement( 0, 2 ),  9 );
+    EXPECT_EQ( mResult.getElement( 0, 3 ), 12 );
+
+    EXPECT_EQ( mResult.getElement( 1, 0 ), 15 );
+    EXPECT_EQ( mResult.getElement( 1, 1 ), 18 );
+    EXPECT_EQ( mResult.getElement( 1, 2 ), 21 );
+    EXPECT_EQ( mResult.getElement( 1, 3 ), 24 );
+
+    EXPECT_EQ( mResult.getElement( 2, 0 ), 27 );
+    EXPECT_EQ( mResult.getElement( 2, 1 ), 30 );
+    EXPECT_EQ( mResult.getElement( 2, 2 ), 33 );
+    EXPECT_EQ( mResult.getElement( 2, 3 ), 36 );
+
+    EXPECT_EQ( mResult.getElement( 3, 0 ), 39 );
+    EXPECT_EQ( mResult.getElement( 3, 1 ), 42 );
+    EXPECT_EQ( mResult.getElement( 3, 2 ), 45 );
+    EXPECT_EQ( mResult.getElement( 3, 3 ), 48 );
+
+    EXPECT_EQ( mResult.getElement( 4, 0 ), 51 );
+    EXPECT_EQ( mResult.getElement( 4, 1 ), 54 );
+    EXPECT_EQ( mResult.getElement( 4, 2 ), 57 );
+    EXPECT_EQ( mResult.getElement( 4, 3 ), 60 );
+}
+
+template< typename Matrix >
+void test_GetMatrixProduct()
+{
+    using RealType = typename Matrix::RealType;
+    using DeviceType = typename Matrix::DeviceType;
+    using IndexType = typename Matrix::IndexType;
+/*
+ * Sets up the following 5x4 dense matrix:
+ *
+ *    /  1  2  3  4 \
+ *    |  5  6  7  8 |
+ *    |  9 10 11 12 |
+ *    | 13 14 15 16 |
+ *    \ 17 18 19 20 /
+ */
+    const IndexType leftRows = 5;
+    const IndexType leftCols = 4;
+
+    Matrix leftMatrix;
+    leftMatrix.reset();
+    leftMatrix.setDimensions( leftRows, leftCols );
+
+    RealType value = 1;
+    for( IndexType i = 0; i < leftRows; i++ )
+        for( IndexType j = 0; j < leftCols; j++)
+            leftMatrix.setElement( i, j, value++ );
+
+/*
+ * Sets up the following 4x5 dense matrix:
+ *
+ *    /  1  2  3  4  5 \
+ *    |  6  7  8  9 10 |
+ *    | 11 12 13 14 15 |
+ *    \ 16 17 18 19 20 /
+ */
+    const IndexType rightRows = 4;
+    const IndexType rightCols = 5;
+
+    Matrix rightMatrix;
+    rightMatrix.reset();
+    rightMatrix.setDimensions( rightRows, rightCols );
+
+    RealType newValue = 1;
+    for( IndexType i = 0; i < rightRows; i++ )
+        for( IndexType j = 0; j < rightCols; j++)
+            rightMatrix.setElement( i, j, newValue++ );
+
+/*
+ * Sets up the following 5x5 resulting dense matrix:
+ *
+ *    /  0  0  0  0 \
+ *    |  0  0  0  0 |
+ *    |  0  0  0  0 |
+ *    |  0  0  0  0 |
+ *    \  0  0  0  0 /
+ */
+
+    Matrix mResult;
+    mResult.reset();
+    mResult.setDimensions( leftRows, rightCols );
+    mResult.setValue( 0 );
+
+    RealType leftMatrixMultiplicator = 1;
+    RealType rightMatrixMultiplicator = 2;
+/*
+ *      /  1  2  3  4 \                            /  220  240  260  280  300 \
+ *      |  5  6  7  8 |       /  1  2  3  4  5 \   |  492  544  596  648  700 |
+ *  1 * |  9 10 11 12 | * 2 * |  6  7  8  9 10 | = |  764  848  932 1016 1100 |
+ *      | 13 14 15 16 |       | 11 12 13 14 15 |   | 1036 1152 1268 1384 1500 |
+ *      \ 17 18 19 20 /       \ 16 17 18 19 20 /   \ 1308 1456 1604 1752 1900 /
+ */
+
+    mResult.getMatrixProduct( leftMatrix, rightMatrix, leftMatrixMultiplicator, rightMatrixMultiplicator );
+
+    EXPECT_EQ( mResult.getElement( 0, 0 ),  220 );
+    EXPECT_EQ( mResult.getElement( 0, 1 ),  240 );
+    EXPECT_EQ( mResult.getElement( 0, 2 ),  260 );
+    EXPECT_EQ( mResult.getElement( 0, 3 ),  280 );
+    EXPECT_EQ( mResult.getElement( 0, 4 ),  300 );
+
+    EXPECT_EQ( mResult.getElement( 1, 0 ),  492 );
+    EXPECT_EQ( mResult.getElement( 1, 1 ),  544 );
+    EXPECT_EQ( mResult.getElement( 1, 2 ),  596 );
+    EXPECT_EQ( mResult.getElement( 1, 3 ),  648 );
+    EXPECT_EQ( mResult.getElement( 1, 4 ),  700 );
+
+    EXPECT_EQ( mResult.getElement( 2, 0 ),  764 );
+    EXPECT_EQ( mResult.getElement( 2, 1 ),  848 );
+    EXPECT_EQ( mResult.getElement( 2, 2 ),  932 );
+    EXPECT_EQ( mResult.getElement( 2, 3 ), 1016 );
+    EXPECT_EQ( mResult.getElement( 2, 4 ), 1100 );
+
+    EXPECT_EQ( mResult.getElement( 3, 0 ), 1036 );
+    EXPECT_EQ( mResult.getElement( 3, 1 ), 1152 );
+    EXPECT_EQ( mResult.getElement( 3, 2 ), 1268 );
+    EXPECT_EQ( mResult.getElement( 3, 3 ), 1384 );
+    EXPECT_EQ( mResult.getElement( 3, 4 ), 1500 );
+
+    EXPECT_EQ( mResult.getElement( 4, 0 ), 1308 );
+    EXPECT_EQ( mResult.getElement( 4, 1 ), 1456 );
+    EXPECT_EQ( mResult.getElement( 4, 2 ), 1604 );
+    EXPECT_EQ( mResult.getElement( 4, 3 ), 1752 );
+    EXPECT_EQ( mResult.getElement( 4, 4 ), 1900 );
+}
+
+template< typename Matrix >
+void test_GetTransposition()
+{
+    using RealType = typename Matrix::RealType;
+    using DeviceType = typename Matrix::DeviceType;
+    using IndexType = typename Matrix::IndexType;
+/*
+ * Sets up the following 3x2 dense matrix:
+ *
+ *    /  1  2 \
+ *    |  3  4 |
+ *    \  5  6 /
+ */
+    const IndexType rows = 3;
+    const IndexType cols = 2;
+
+    Matrix m;
+    m.reset();
+    m.setDimensions( rows, cols );
+
+    RealType value = 1;
+    for( IndexType i = 0; i < rows; i++ )
+        for( IndexType j = 0; j < cols; j++ )
+            m.setElement( i, j, value++ );
+
+    m.print( std::cout );
+
+/*
+ * Sets up the following 2x3 dense matrix:
+ *
+ *    /  0  0  0 \
+ *    \  0  0  0 /
+ */
+    Matrix mTransposed;
+    mTransposed.reset();
+    mTransposed.setDimensions( cols, rows );
+
+    mTransposed.print( std::cout );
+
+    RealType matrixMultiplicator = 1;
+
+    mTransposed.getTransposition( m, matrixMultiplicator );
+
+    mTransposed.print( std::cout );
+
+/*
+ * Should result in the following 2x3 dense matrix:
+ *
+ *    /  1  3  5 \
+ *    \  2  4  6 /
+ */
+
+    EXPECT_EQ( mTransposed.getElement( 0, 0 ), 1 );
+    EXPECT_EQ( mTransposed.getElement( 0, 1 ), 3 );
+    EXPECT_EQ( mTransposed.getElement( 0, 2 ), 5 );
+
+    EXPECT_EQ( mTransposed.getElement( 1, 0 ), 2 );
+    EXPECT_EQ( mTransposed.getElement( 1, 1 ), 4 );
+    EXPECT_EQ( mTransposed.getElement( 1, 2 ), 6 );
+}
+
+
+template< typename Matrix >
+void test_PerformSORIteration()
+{
+    using RealType = typename Matrix::RealType;
+    using DeviceType = typename Matrix::DeviceType;
+    using IndexType = typename Matrix::IndexType;
+/*
+ * Sets up the following 4x4 dense matrix:
+ *
+ *    /  4  1  1  1 \
+ *    |  1  4  1  1 |
+ *    |  1  1  4  1 |
+ *    \  1  1  1  4 /
+ */
+    const IndexType rows = 4;
+    const IndexType cols = 4;
+
+    Matrix m;
+    m.reset();
+    m.setDimensions( rows, cols );
+
+    m.setElement( 0, 0, 4.0 );        // 0th row
+    m.setElement( 0, 1, 1.0 );
+    m.setElement( 0, 2, 1.0 );
+    m.setElement( 0, 3, 1.0 );
+
+    m.setElement( 1, 0, 1.0 );        // 1st row
+    m.setElement( 1, 1, 4.0 );
+    m.setElement( 1, 2, 1.0 );
+    m.setElement( 1, 3, 1.0 );
+
+    m.setElement( 2, 0, 1.0 );
+    m.setElement( 2, 1, 1.0 );        // 2nd row
+    m.setElement( 2, 2, 4.0 );
+    m.setElement( 2, 3, 1.0 );
+
+    m.setElement( 3, 0, 1.0 );        // 3rd row
+    m.setElement( 3, 1, 1.0 );
+    m.setElement( 3, 2, 1.0 );
+    m.setElement( 3, 3, 4.0 );
+
+    RealType bVector [ 4 ] = { 1.0, 1.0, 1.0, 1.0 };
+    RealType xVector [ 4 ] = { 1.0, 1.0, 1.0, 1.0 };
+
+    IndexType row = 0;
+    RealType omega = 1;
+
+    m.performSORIteration( bVector, row++, xVector, omega);
+
+    EXPECT_EQ( xVector[ 0 ], -0.5 );
+    EXPECT_EQ( xVector[ 1 ],  1.0 );
+    EXPECT_EQ( xVector[ 2 ],  1.0 );
+    EXPECT_EQ( xVector[ 3 ],  1.0 );
+
+    m.performSORIteration( bVector, row++, xVector, omega);
+
+    EXPECT_EQ( xVector[ 0 ], -0.5 );
+    EXPECT_EQ( xVector[ 1 ], -0.125 );
+    EXPECT_EQ( xVector[ 2 ],  1.0 );
+    EXPECT_EQ( xVector[ 3 ],  1.0 );
+
+    m.performSORIteration( bVector, row++, xVector, omega);
+
+    EXPECT_EQ( xVector[ 0 ], -0.5 );
+    EXPECT_EQ( xVector[ 1 ], -0.125 );
+    EXPECT_EQ( xVector[ 2 ],  0.15625 );
+    EXPECT_EQ( xVector[ 3 ],  1.0 );
+
+    m.performSORIteration( bVector, row++, xVector, omega);
+
+    EXPECT_EQ( xVector[ 0 ], -0.5 );
+    EXPECT_EQ( xVector[ 1 ], -0.125 );
+    EXPECT_EQ( xVector[ 2 ], 0.15625 );
+    EXPECT_EQ( xVector[ 3 ], 0.3671875 );
+}
+
+template< typename Matrix >
+void test_AssignmentOperator()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   using DenseHost = TNL::Matrices::Dense< RealType, TNL::Devices::Host, IndexType >;
+   using DenseCuda = TNL::Matrices::Dense< RealType, TNL::Devices::Cuda, IndexType >;
+
+   const IndexType rows( 10 ), columns( 10 );
+   DenseHost hostMatrix( rows, columns );
+   for( IndexType i = 0; i < columns; i++ )
+      for( IndexType j = 0; j <= i; j++ )
+         hostMatrix( i, j ) = i + j;
+
+   Matrix matrix( rows, columns );
+   matrix.getValues() = 0.0;
+   matrix = hostMatrix;
+   for( IndexType i = 0; i < columns; i++ )
+      for( IndexType j = 0; j < rows; j++ )
+      {
+         if( j > i )
+            EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
+         else
+            EXPECT_EQ( matrix.getElement( i, j ), i + j );
+      }
+
+#ifdef HAVE_CUDA
+   DenseCuda cudaMatrix( rows, columns );
+   for( IndexType i = 0; i < columns; i++ )
+      for( IndexType j = 0; j <= i; j++ )
+         cudaMatrix.setElement( i, j, i + j );
+
+   matrix.getValues() = 0.0;
+   matrix = cudaMatrix;
+   for( IndexType i = 0; i < columns; i++ )
+      for( IndexType j = 0; j < rows; j++ )
+      {
+         if( j > i )
+            EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
+         else
+            EXPECT_EQ( matrix.getElement( i, j ), i + j );
+      }
+#endif
+}
+
+
+template< typename Matrix >
+void test_SaveAndLoad()
+{
+    using RealType = typename Matrix::RealType;
+    using DeviceType = typename Matrix::DeviceType;
+    using IndexType = typename Matrix::IndexType;
+/*
+ * Sets up the following 4x4 dense matrix:
+ *
+ *    /  1  2  3  4 \
+ *    |  5  6  7  8 |
+ *    |  9 10 11 12 |
+ *    \ 13 14 15 16 /
+ */
+    const IndexType rows = 4;
+    const IndexType cols = 4;
+
+    Matrix savedMatrix;
+    savedMatrix.reset();
+    savedMatrix.setDimensions( rows, cols );
+
+    RealType value = 1;
+    for( IndexType i = 0; i < rows; i++ )
+        for( IndexType j = 0; j < cols; j++ )
+            savedMatrix.setElement( i, j, value++ );
+
+    ASSERT_NO_THROW( savedMatrix.save( TEST_FILE_NAME ) );
+
+    Matrix loadedMatrix;
+    loadedMatrix.reset();
+    loadedMatrix.setDimensions( rows, cols );
+
+    ASSERT_NO_THROW( loadedMatrix.load( TEST_FILE_NAME ) );
+
+    EXPECT_EQ( savedMatrix.getElement( 0, 0 ), loadedMatrix.getElement( 0, 0 ) );
+    EXPECT_EQ( savedMatrix.getElement( 0, 1 ), loadedMatrix.getElement( 0, 1 ) );
+    EXPECT_EQ( savedMatrix.getElement( 0, 2 ), loadedMatrix.getElement( 0, 2 ) );
+    EXPECT_EQ( savedMatrix.getElement( 0, 3 ), loadedMatrix.getElement( 0, 3 ) );
+
+    EXPECT_EQ( savedMatrix.getElement( 1, 0 ), loadedMatrix.getElement( 1, 0 ) );
+    EXPECT_EQ( savedMatrix.getElement( 1, 1 ), loadedMatrix.getElement( 1, 1 ) );
+    EXPECT_EQ( savedMatrix.getElement( 1, 2 ), loadedMatrix.getElement( 1, 2 ) );
+    EXPECT_EQ( savedMatrix.getElement( 1, 3 ), loadedMatrix.getElement( 1, 3 ) );
+
+    EXPECT_EQ( savedMatrix.getElement( 2, 0 ), loadedMatrix.getElement( 2, 0 ) );
+    EXPECT_EQ( savedMatrix.getElement( 2, 1 ), loadedMatrix.getElement( 2, 1 ) );
+    EXPECT_EQ( savedMatrix.getElement( 2, 2 ), loadedMatrix.getElement( 2, 2 ) );
+    EXPECT_EQ( savedMatrix.getElement( 2, 3 ), loadedMatrix.getElement( 2, 3 ) );
+
+    EXPECT_EQ( savedMatrix.getElement( 3, 0 ), loadedMatrix.getElement( 3, 0 ) );
+    EXPECT_EQ( savedMatrix.getElement( 3, 1 ), loadedMatrix.getElement( 3, 1 ) );
+    EXPECT_EQ( savedMatrix.getElement( 3, 2 ), loadedMatrix.getElement( 3, 2 ) );
+    EXPECT_EQ( savedMatrix.getElement( 3, 3 ), loadedMatrix.getElement( 3, 3 ) );
+
+    EXPECT_EQ( savedMatrix.getElement( 0, 0 ),  1 );
+    EXPECT_EQ( savedMatrix.getElement( 0, 1 ),  2 );
+    EXPECT_EQ( savedMatrix.getElement( 0, 2 ),  3 );
+    EXPECT_EQ( savedMatrix.getElement( 0, 3 ),  4 );
+
+    EXPECT_EQ( savedMatrix.getElement( 1, 0 ),  5 );
+    EXPECT_EQ( savedMatrix.getElement( 1, 1 ),  6 );
+    EXPECT_EQ( savedMatrix.getElement( 1, 2 ),  7 );
+    EXPECT_EQ( savedMatrix.getElement( 1, 3 ),  8 );
+
+    EXPECT_EQ( savedMatrix.getElement( 2, 0 ),  9 );
+    EXPECT_EQ( savedMatrix.getElement( 2, 1 ), 10 );
+    EXPECT_EQ( savedMatrix.getElement( 2, 2 ), 11 );
+    EXPECT_EQ( savedMatrix.getElement( 2, 3 ), 12 );
+
+    EXPECT_EQ( savedMatrix.getElement( 3, 0 ), 13 );
+    EXPECT_EQ( savedMatrix.getElement( 3, 1 ), 14 );
+    EXPECT_EQ( savedMatrix.getElement( 3, 2 ), 15 );
+    EXPECT_EQ( savedMatrix.getElement( 3, 3 ), 16 );
+}
+
+template< typename Matrix >
+void test_Print()
+{
+    using RealType = typename Matrix::RealType;
+    using DeviceType = typename Matrix::DeviceType;
+    using IndexType = typename Matrix::IndexType;
+/*
+ * Sets up the following 5x4 sparse matrix:
+ *
+ *    /  1  2  3  4 \
+ *    |  5  6  7  8 |
+ *    |  9 10 11 12 |
+ *    | 13 14 15 16 |
+ *    \ 17 18 19 20 /
+ */
+    const IndexType rows = 5;
+    const IndexType cols = 4;
+
+    Matrix m;
+    m.reset();
+    m.setDimensions( rows, cols );
+
+    RealType value = 1;
+    for( IndexType i = 0; i < rows; i++)
+        for( IndexType j = 0; j < cols; j++)
+            m.setElement( i, j, value++ );
+
+    #include <sstream>
+    std::stringstream printed;
+    std::stringstream couted;
+
+    //change the underlying buffer and save the old buffer
+    auto old_buf = std::cout.rdbuf(printed.rdbuf());
+
+    m.print( std::cout ); //all the std::cout goes to ss
+
+    std::cout.rdbuf(old_buf); //reset
+
+    couted << "Row: 0 ->  Col:0->1	 Col:1->2	 Col:2->3	 Col:3->4\t\n"
+              "Row: 1 ->  Col:0->5	 Col:1->6	 Col:2->7	 Col:3->8\t\n"
+              "Row: 2 ->  Col:0->9	 Col:1->10	 Col:2->11	 Col:3->12\t\n"
+              "Row: 3 ->  Col:0->13	 Col:1->14	 Col:2->15	 Col:3->16\t\n"
+              "Row: 4 ->  Col:0->17	 Col:1->18	 Col:2->19	 Col:3->20\t\n";
+
+    EXPECT_EQ( printed.str(), couted.str() );
+}
+
+// test fixture for typed tests
+template< typename Matrix >
+class MatrixTest : public ::testing::Test
+{
+protected:
+   using MatrixType = Matrix;
+};
+
+// types for which MatrixTest is instantiated
+using MatrixTypes = ::testing::Types
+<
+    TNL::Matrices::Dense< int,    TNL::Devices::Host, short >,
+    TNL::Matrices::Dense< long,   TNL::Devices::Host, short >,
+    TNL::Matrices::Dense< float,  TNL::Devices::Host, short >,
+    TNL::Matrices::Dense< double, TNL::Devices::Host, short >,
+    TNL::Matrices::Dense< int,    TNL::Devices::Host, int >,
+    TNL::Matrices::Dense< long,   TNL::Devices::Host, int >,
+    TNL::Matrices::Dense< float,  TNL::Devices::Host, int >,
+    TNL::Matrices::Dense< double, TNL::Devices::Host, int >,
+    TNL::Matrices::Dense< int,    TNL::Devices::Host, long >,
+    TNL::Matrices::Dense< long,   TNL::Devices::Host, long >,
+    TNL::Matrices::Dense< float,  TNL::Devices::Host, long >,
+    TNL::Matrices::Dense< double, TNL::Devices::Host, long >
+#ifdef HAVE_CUDA
+    ,TNL::Matrices::Dense< int,    TNL::Devices::Cuda, short >,
+    TNL::Matrices::Dense< long,   TNL::Devices::Cuda, short >,
+    TNL::Matrices::Dense< float,  TNL::Devices::Cuda, short >,
+    TNL::Matrices::Dense< double, TNL::Devices::Cuda, short >,
+    TNL::Matrices::Dense< int,    TNL::Devices::Cuda, int >,
+    TNL::Matrices::Dense< long,   TNL::Devices::Cuda, int >,
+    TNL::Matrices::Dense< float,  TNL::Devices::Cuda, int >,
+    TNL::Matrices::Dense< double, TNL::Devices::Cuda, int >,
+    TNL::Matrices::Dense< int,    TNL::Devices::Cuda, long >,
+    TNL::Matrices::Dense< long,   TNL::Devices::Cuda, long >,
+    TNL::Matrices::Dense< float,  TNL::Devices::Cuda, long >,
+    TNL::Matrices::Dense< double, TNL::Devices::Cuda, long >
+#endif
+>;
+
+TYPED_TEST_SUITE( MatrixTest, MatrixTypes );
+
+TYPED_TEST( MatrixTest, setDimensionsTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_SetDimensions< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, setLikeTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_SetLike< MatrixType, MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, getRowLengthTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_GetRowLength< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, getNumberOfMatrixElementsTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_GetNumberOfMatrixElements< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, getNumberOfNonzeroMatrixElementsTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_GetNumberOfNonzeroMatrixElements< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, resetTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_Reset< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, setValueTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_SetValue< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, setElementTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_SetElement< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, addElementTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_AddElement< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, setRowTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_SetRow< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, addRowTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_AddRow< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, vectorProductTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_VectorProduct< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, addMatrixTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_AddMatrix< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, assignmentOperatorTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_AssignmentOperator< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, saveAndLoadTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_SaveAndLoad< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, printTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_Print< MatrixType >();
+}
+
+//// test_getType is not general enough yet. DO NOT TEST IT YET.
+
+//TEST( DenseMatrixTest, Dense_GetTypeTest_Host )
+//{
+//    host_test_GetType< Dense_host_float, Dense_host_int >();
+//}
+//
+//#ifdef HAVE_CUDA
+//TEST( DenseMatrixTest, Dense_GetTypeTest_Cuda )
+//{
+//    cuda_test_GetType< Dense_cuda_float, Dense_cuda_int >();
+//}
+//#endif
+
+/*TEST( DenseMatrixTest, Dense_getMatrixProductTest_Host )
+{
+    bool testRan = false;
+    EXPECT_TRUE( testRan );
+    std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n";
+    std::cout << "If launched on CPU, this test will not build, but will print the following message: \n";
+    std::cout << "      /home/lukas/tnl-dev/src/TNL/Matrices/Dense_impl.h(609): error: no instance of function template \"TNL::Matrices::DenseMatrixProductKernel\" matches the argument list\n";
+    std::cout << "              argument types are: (TNL::Matrices::Dense<int, TNL::Devices::Host, int> *, Dense_host_int *, Dense_host_int *, const int, const int, int, int)\n";
+    std::cout << "          detected during:\n";
+    std::cout << "              instantiation of \"void TNL::Matrices::Dense<Real, Device, Index>::getMatrixProduct(const Matrix1 &, const Matrix2 &, const TNL::Matrices::Dense<Real, Device, Index>::RealType &, const TNL::Matrices::Dense<Real, Device, Index>::RealType &) [with Real=int, Device=TNL::Devices::Host, Index=int, Matrix1=Dense_host_int, Matrix2=Dense_host_int, tileDim=32]\"\n";
+    std::cout << "              /home/lukas/tnl-dev/src/UnitTests/Matrices/DenseMatrixTest.h(901): here\n";
+    std::cout << "                  instantiation of \"void test_GetMatrixProduct<Matrix>() [with Matrix=Dense_host_int]\"\n";
+    std::cout << "              /home/lukas/tnl-dev/src/UnitTests/Matrices/DenseMatrixTest.h(1315): here\n\n";
+}
+
+#ifdef HAVE_CUDA
+TEST( DenseMatrixTest, Dense_getMatrixProductTest_Cuda )
+{
+    bool testRan = false;
+    EXPECT_TRUE( testRan );
+    std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n";
+    std::cout << "If launched on GPU, this test will not build, but will print the following message: \n";
+    std::cout << "      /home/lukas/tnl-dev/src/TNL/Matrices/Dense_impl.h(510): error: identifier \"tnlCudaMin\" is undefined\n";
+    std::cout << "          detected during:\n";
+    std::cout << "              instantiation of \"void TNL::Matrices::DenseMatrixProductKernel<Real,Index,Matrix1,Matrix2,tileDim,tileRowBlockSize>(TNL::Matrices::Dense<Real, TNL::Devices::Cuda, Index> *, const Matrix1 *, const Matrix2 *, Real, Real, Index, Index) [with Real=int, Index=int, Matrix1=Dense_cuda_int, Matrix2=Dense_cuda_int, tileDim=32, tileRowBlockSize=8]\"\n";
+    std::cout << "              instantiation of \"void TNL::Matrices::Dense<Real, Device, Index>::getMatrixProduct(const Matrix1 &, const Matrix2 &, const TNL::Matrices::Dense<Real, Device, Index>::RealType &, const TNL::Matrices::Dense<Real, Device, Index>::RealType &) [with Real=int, Device=TNL::Devices::Cuda, Index=int, Matrix1=Dense_cuda_int, Matrix2=Dense_cuda_int, tileDim=32]\"\n";
+    std::cout << "              /home/lukas/tnl-dev/src/UnitTests/Matrices/DenseMatrixTest.h(901): here\n";
+    std::cout << "                  instantiation of \"void test_GetMatrixProduct<Matrix>() [with Matrix=Dense_cuda_int]\"\n";
+    std::cout << "              /home/lukas/tnl-dev/src/UnitTests/Matrices/DenseMatrixTest.h(1332): here\n\n";
+}
+#endif
+
+TEST( DenseMatrixTest, Dense_getTranspositionTest_Host )
+{
+//    test_GetTransposition< Dense_host_int >();
+    bool testRan = false;
+    EXPECT_TRUE( testRan );
+    std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n";
+    std::cout << "If launched on CPU, this test will not build, but will print the following message: \n";
+    std::cout << "      /home/lukas/tnl-dev/src/TNL/Matrices/Dense_impl.h(836): error: no instance of function template \"TNL::Matrices::DenseTranspositionAlignedKernel\" matches the argument list\n";
+    std::cout << "              argument types are: (TNL::Matrices::Dense<int, TNL::Devices::Host, int> *, Dense_host_int *, const int, int, int)\n";
+    std::cout << "          detected during:\n";
+    std::cout << "              instantiation of \"void TNL::Matrices::Dense<Real, Device, Index>::getTransposition(const Matrix &, const TNL::Matrices::Dense<Real, Device, Index>::RealType &) [with Real=int, Device=TNL::Devices::Host, Index=int, Matrix=Dense_host_int, tileDim=32]\"\n";
+    std::cout << "              /home/lukas/tnl-dev/src/UnitTests/Matrices/DenseMatrixTest.h(977): here\n";
+    std::cout << "                  instantiation of \"void test_GetTransposition<Matrix>() [with Matrix=Dense_host_int]\"\n";
+    std::cout << "              /home/lukas/tnl-dev/src/UnitTests/Matrices/DenseMatrixTest.h(1420): here\n\n";
+    std::cout << "AND this message: \n";
+    std::cout << "      /home/lukas/tnl-dev/src/TNL/Matrices/Dense_impl.h(852): error: no instance of function template \"TNL::Matrices::DenseTranspositionNonAlignedKernel\" matches the argument list\n";
+    std::cout << "              argument types are: (TNL::Matrices::Dense<int, TNL::Devices::Host, int> *, Dense_host_int *, const int, int, int)\n";
+    std::cout << "          detected during:\n";
+    std::cout << "              instantiation of \"void TNL::Matrices::Dense<Real, Device, Index>::getTransposition(const Matrix &, const TNL::Matrices::Dense<Real, Device, Index>::RealType &) [with Real=int, Device=TNL::Devices::Host, Index=int, Matrix=Dense_host_int, tileDim=32]\"\n";
+    std::cout << "              /home/lukas/tnl-dev/src/UnitTests/Matrices/DenseMatrixTest.h(977): here\n";
+    std::cout << "                  instantiation of \"void test_GetTransposition<Matrix>() [with Matrix=Dense_host_int]\"\n";
+    std::cout << "              /home/lukas/tnl-dev/src/UnitTests/Matrices/DenseMatrixTest.h(1420): here\n\n";
+}
+
+#ifdef HAVE_CUDA
+TEST( DenseMatrixTest, Dense_getTranspositionTest_Cuda )
+{
+//    test_GetTransposition< Dense_cuda_int >();
+    bool testRan = false;
+    EXPECT_TRUE( testRan );
+    std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n";
+    std::cout << "If launched on GPU, this test throws the following message: \n";
+    std::cout << "  Assertion 'row >= 0 && row < this->getRows() && column >= 0 && column < this->getColumns()' failed !!!\n";
+    std::cout << "      File: /home/lukas/tnl-dev/src/TNL/Matrices/Dense_impl.h \n";
+    std::cout << "      Line: 329 \n";
+    std::cout << "      Diagnostics: Not supported with CUDA.\n";
+    std::cout << "  Assertion 'row >= 0 && row < this->getRows() && column >= 0 && column < this->getColumns()' failed !!! \n";
+    std::cout << "      File: /home/lukas/tnl-dev/src/TNL/Matrices/Dense_impl.h \n";
+    std::cout << "      Line: 329 \n";
+    std::cout << "      Diagnostics: Not supported with CUDA.\n";
+    std::cout << "  Assertion 'row >= 0 && row < this->getRows() && column >= 0 && column < this->getColumns()' failed !!! \n";
+    std::cout << "      File: /home/lukas/tnl-dev/src/TNL/Matrices/Dense_impl.h \n";
+    std::cout << "      Line: 329 \n";
+    std::cout << "      Diagnostics: Not supported with CUDA.\n";
+    std::cout << "  Assertion 'row >= 0 && row < this->getRows() && column >= 0 && column < this->getColumns()' failed !!! \n";
+    std::cout << "      File: /home/lukas/tnl-dev/src/TNL/Matrices/Dense_impl.h \n";
+    std::cout << "      Line: 329 \n";
+    std::cout << "      Diagnostics: Not supported with CUDA.\n";
+    std::cout << "  terminate called after throwing an instance of 'TNL::Exceptions::CudaRuntimeError'\n";
+    std::cout << "          what():  CUDA ERROR 4 (cudaErrorLaunchFailure): unspecified launch failure.\n";
+    std::cout << "  Source: line 57 in /home/lukas/tnl-dev/src/TNL/Containers/Algorithms/ArrayOperationsCuda_impl.h: unspecified launch failure\n";
+    std::cout << "  [1]    4003 abort (core dumped)  ./DenseMatrixTest-dbg\n";
+}
+#endif
+
+TEST( DenseMatrixTest, Dense_performSORIterationTest_Host )
+{
+    test_PerformSORIteration< Dense_host_float >();
+}
+
+#ifdef HAVE_CUDA
+TEST( DenseMatrixTest, Dense_performSORIterationTest_Cuda )
+{
+//    test_PerformSORIteration< Dense_cuda_float >();
+    bool testRan = false;
+    EXPECT_TRUE( testRan );
+    std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n";
+    std::cout << "If launched, this test throws the following message: \n";
+    std::cout << "      [1]    6992 segmentation fault (core dumped)  ./SparseMatrixTest-dbg\n\n";
+    std::cout << "\n THIS IS NOT IMPLEMENTED FOR CUDA YET!!\n\n";
+}
+#endif
+ * */
+
+#endif // HAVE_GTEST
+
+#include "../main.h"
-- 
GitLab


From e12f893516ec2bc3b08447516306c8a1da7c65cc Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Wed, 8 Jan 2020 18:27:21 +0100
Subject: [PATCH 075/179] Added tridiagonal matrix unit tests.

---
 .../Matrices/TridiagonalMatrixTest.cpp        |   8 +-
 .../Matrices/TridiagonalMatrixTest.cu         |   8 +-
 .../Matrices/TridiagonalMatrixTest.h          | 191 +++++++++---------
 3 files changed, 102 insertions(+), 105 deletions(-)

diff --git a/src/UnitTests/Matrices/TridiagonalMatrixTest.cpp b/src/UnitTests/Matrices/TridiagonalMatrixTest.cpp
index a56349360..3248d3526 100644
--- a/src/UnitTests/Matrices/TridiagonalMatrixTest.cpp
+++ b/src/UnitTests/Matrices/TridiagonalMatrixTest.cpp
@@ -1,11 +1,11 @@
 /***************************************************************************
-                          DenseMatrixTest.cpp -  description
+                          TridiagonalMatrixTest.cpp -  description
                              -------------------
-    begin                : Nov 10, 2018
-    copyright            : (C) 2018 by Tomas Oberhuber et al.
+    begin                : Jan 8, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber et al.
     email                : tomas.oberhuber@fjfi.cvut.cz
  ***************************************************************************/
 
 /* See Copyright Notice in tnl/Copyright */
 
-#include "DenseMatrixTest.h"
\ No newline at end of file
+#include "TridiagonalMatrixTest.h"
\ No newline at end of file
diff --git a/src/UnitTests/Matrices/TridiagonalMatrixTest.cu b/src/UnitTests/Matrices/TridiagonalMatrixTest.cu
index 11d45efdb..16f909fa7 100644
--- a/src/UnitTests/Matrices/TridiagonalMatrixTest.cu
+++ b/src/UnitTests/Matrices/TridiagonalMatrixTest.cu
@@ -1,11 +1,11 @@
 /***************************************************************************
-                          DenseMatrixTest.cu -  description
+                          TridiagonalMatrixTest.cu -  description
                              -------------------
-    begin                : Nov 10, 2018
-    copyright            : (C) 2018 by Tomas Oberhuber et al.
+    begin                : Jan 8, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber et al.
     email                : tomas.oberhuber@fjfi.cvut.cz
  ***************************************************************************/
 
 /* See Copyright Notice in tnl/Copyright */
 
-#include "DenseMatrixTest.h"
\ No newline at end of file
+#include "TridiagonalMatrixTest.h"
\ No newline at end of file
diff --git a/src/UnitTests/Matrices/TridiagonalMatrixTest.h b/src/UnitTests/Matrices/TridiagonalMatrixTest.h
index 8c3132caf..40cecb2bd 100644
--- a/src/UnitTests/Matrices/TridiagonalMatrixTest.h
+++ b/src/UnitTests/Matrices/TridiagonalMatrixTest.h
@@ -1,8 +1,8 @@
 /***************************************************************************
-                          DenseMatrixTest.h -  description
+                          TridiagonalMatrixTest.h -  description
                              -------------------
-    begin                : Nov 10, 2018
-    copyright            : (C) 2018 by Tomas Oberhuber et al.
+    begin                : Jan 8, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber et al.
     email                : tomas.oberhuber@fjfi.cvut.cz
  ***************************************************************************/
 
@@ -10,7 +10,7 @@
 
 #include <TNL/Devices/Host.h>
 #include <TNL/Matrices/Matrix.h>
-#include <TNL/Matrices/Dense.h>
+#include <TNL/Matrices/Tridiagonal.h>
 #include <TNL/Containers/Array.h>
 
 #include <TNL/Containers/Vector.h>
@@ -18,37 +18,29 @@
 #include <TNL/Math.h>
 #include <iostream>
 
-using Dense_host_float = TNL::Matrices::Dense< float, TNL::Devices::Host, int >;
-using Dense_host_int = TNL::Matrices::Dense< int, TNL::Devices::Host, int >;
+using Tridiagonal_host_float = TNL::Matrices::Tridiagonal< float, TNL::Devices::Host, int >;
+using Tridiagonal_host_int = TNL::Matrices::Tridiagonal< int, TNL::Devices::Host, int >;
 
-using Dense_cuda_float = TNL::Matrices::Dense< float, TNL::Devices::Cuda, int >;
-using Dense_cuda_int = TNL::Matrices::Dense< int, TNL::Devices::Cuda, int >;
+using Tridiagonal_cuda_float = TNL::Matrices::Tridiagonal< float, TNL::Devices::Cuda, int >;
+using Tridiagonal_cuda_int = TNL::Matrices::Tridiagonal< int, TNL::Devices::Cuda, int >;
 
-static const char* TEST_FILE_NAME = "test_DenseMatrixTest.tnl";
+static const char* TEST_FILE_NAME = "test_TridiagonalMatrixTest.tnl";
 
 #ifdef HAVE_GTEST
 #include <type_traits>
 
 #include <gtest/gtest.h>
 
-template< typename MatrixHostFloat, typename MatrixHostInt >
-void host_test_GetType()
+void test_GetSerializationType()
 {
-    MatrixHostFloat mtrxHostFloat;
-    MatrixHostInt mtrxHostInt;
-
-    EXPECT_EQ( mtrxHostFloat.getType(), TNL::String( "Matrices::Dense< float, Devices::Host, int >" ) );
-    EXPECT_EQ( mtrxHostInt.getType(), TNL::String( "Matrices::Dense< int, Devices::Host, int >" ) );
-}
-
-template< typename MatrixCudaFloat, typename MatrixCudaInt >
-void cuda_test_GetType()
-{
-    MatrixCudaFloat mtrxCudaFloat;
-    MatrixCudaInt mtrxCudaInt;
-
-    EXPECT_EQ( mtrxCudaFloat.getType(), TNL::String( "Matrices::Dense< float, Devices::Cuda, int >" ) );
-    EXPECT_EQ( mtrxCudaInt.getType(), TNL::String( "Matrices::Dense< int, Devices::Cuda, int >" ) );
+   EXPECT_EQ( ( TNL::Matrices::Tridiagonal< float, TNL::Devices::Host, int, true >::getSerializationType() ), TNL::String( "Matrices::Tridiagonal< float, [any_device], int, true, [any_allocator] >" ) );
+   EXPECT_EQ( ( TNL::Matrices::Tridiagonal< int,   TNL::Devices::Host, int, true >::getSerializationType() ), TNL::String( "Matrices::Tridiagonal< int, [any_device], int, true, [any_allocator] >" ) );
+   EXPECT_EQ( ( TNL::Matrices::Tridiagonal< float, TNL::Devices::Cuda, int, true >::getSerializationType() ), TNL::String( "Matrices::Tridiagonal< float, [any_device], int, true, [any_allocator] >" ) );
+   EXPECT_EQ( ( TNL::Matrices::Tridiagonal< int,   TNL::Devices::Cuda, int, true >::getSerializationType() ), TNL::String( "Matrices::Tridiagonal< int, [any_device], int, true, [any_allocator] >" ) );
+   EXPECT_EQ( ( TNL::Matrices::Tridiagonal< float, TNL::Devices::Host, int, false >::getSerializationType() ), TNL::String( "Matrices::Tridiagonal< float, [any_device], int, false, [any_allocator] >" ) );
+   EXPECT_EQ( ( TNL::Matrices::Tridiagonal< int,   TNL::Devices::Host, int, false >::getSerializationType() ), TNL::String( "Matrices::Tridiagonal< int, [any_device], int, false, [any_allocator] >" ) );
+   EXPECT_EQ( ( TNL::Matrices::Tridiagonal< float, TNL::Devices::Cuda, int, false >::getSerializationType() ), TNL::String( "Matrices::Tridiagonal< float, [any_device], int, false, [any_allocator] >" ) );
+   EXPECT_EQ( ( TNL::Matrices::Tridiagonal< int,   TNL::Devices::Cuda, int, false >::getSerializationType() ), TNL::String( "Matrices::Tridiagonal< int, [any_device], int, false, [any_allocator] >" ) );
 }
 
 template< typename Matrix >
@@ -1173,11 +1165,11 @@ void test_AssignmentOperator()
    using DeviceType = typename Matrix::DeviceType;
    using IndexType = typename Matrix::IndexType;
 
-   using DenseHost = TNL::Matrices::Dense< RealType, TNL::Devices::Host, IndexType >;
-   using DenseCuda = TNL::Matrices::Dense< RealType, TNL::Devices::Cuda, IndexType >;
+   using TridiagonalHost = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Host, IndexType >;
+   using TridiagonalCuda = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Cuda, IndexType >;
 
    const IndexType rows( 10 ), columns( 10 );
-   DenseHost hostMatrix( rows, columns );
+   TridiagonalHost hostMatrix( rows, columns );
    for( IndexType i = 0; i < columns; i++ )
       for( IndexType j = 0; j <= i; j++ )
          hostMatrix( i, j ) = i + j;
@@ -1195,7 +1187,7 @@ void test_AssignmentOperator()
       }
 
 #ifdef HAVE_CUDA
-   DenseCuda cudaMatrix( rows, columns );
+   TridiagonalCuda cudaMatrix( rows, columns );
    for( IndexType i = 0; i < columns; i++ )
       for( IndexType j = 0; j <= i; j++ )
          cudaMatrix.setElement( i, j, i + j );
@@ -1347,36 +1339,41 @@ protected:
 // types for which MatrixTest is instantiated
 using MatrixTypes = ::testing::Types
 <
-    TNL::Matrices::Dense< int,    TNL::Devices::Host, short >,
-    TNL::Matrices::Dense< long,   TNL::Devices::Host, short >,
-    TNL::Matrices::Dense< float,  TNL::Devices::Host, short >,
-    TNL::Matrices::Dense< double, TNL::Devices::Host, short >,
-    TNL::Matrices::Dense< int,    TNL::Devices::Host, int >,
-    TNL::Matrices::Dense< long,   TNL::Devices::Host, int >,
-    TNL::Matrices::Dense< float,  TNL::Devices::Host, int >,
-    TNL::Matrices::Dense< double, TNL::Devices::Host, int >,
-    TNL::Matrices::Dense< int,    TNL::Devices::Host, long >,
-    TNL::Matrices::Dense< long,   TNL::Devices::Host, long >,
-    TNL::Matrices::Dense< float,  TNL::Devices::Host, long >,
-    TNL::Matrices::Dense< double, TNL::Devices::Host, long >
+    TNL::Matrices::Tridiagonal< int,    TNL::Devices::Host, short >,
+    TNL::Matrices::Tridiagonal< long,   TNL::Devices::Host, short >,
+    TNL::Matrices::Tridiagonal< float,  TNL::Devices::Host, short >,
+    TNL::Matrices::Tridiagonal< double, TNL::Devices::Host, short >,
+    TNL::Matrices::Tridiagonal< int,    TNL::Devices::Host, int >,
+    TNL::Matrices::Tridiagonal< long,   TNL::Devices::Host, int >,
+    TNL::Matrices::Tridiagonal< float,  TNL::Devices::Host, int >,
+    TNL::Matrices::Tridiagonal< double, TNL::Devices::Host, int >,
+    TNL::Matrices::Tridiagonal< int,    TNL::Devices::Host, long >,
+    TNL::Matrices::Tridiagonal< long,   TNL::Devices::Host, long >,
+    TNL::Matrices::Tridiagonal< float,  TNL::Devices::Host, long >,
+    TNL::Matrices::Tridiagonal< double, TNL::Devices::Host, long >
 #ifdef HAVE_CUDA
-    ,TNL::Matrices::Dense< int,    TNL::Devices::Cuda, short >,
-    TNL::Matrices::Dense< long,   TNL::Devices::Cuda, short >,
-    TNL::Matrices::Dense< float,  TNL::Devices::Cuda, short >,
-    TNL::Matrices::Dense< double, TNL::Devices::Cuda, short >,
-    TNL::Matrices::Dense< int,    TNL::Devices::Cuda, int >,
-    TNL::Matrices::Dense< long,   TNL::Devices::Cuda, int >,
-    TNL::Matrices::Dense< float,  TNL::Devices::Cuda, int >,
-    TNL::Matrices::Dense< double, TNL::Devices::Cuda, int >,
-    TNL::Matrices::Dense< int,    TNL::Devices::Cuda, long >,
-    TNL::Matrices::Dense< long,   TNL::Devices::Cuda, long >,
-    TNL::Matrices::Dense< float,  TNL::Devices::Cuda, long >,
-    TNL::Matrices::Dense< double, TNL::Devices::Cuda, long >
+    ,TNL::Matrices::Tridiagonal< int,    TNL::Devices::Cuda, short >,
+    TNL::Matrices::Tridiagonal< long,   TNL::Devices::Cuda, short >,
+    TNL::Matrices::Tridiagonal< float,  TNL::Devices::Cuda, short >,
+    TNL::Matrices::Tridiagonal< double, TNL::Devices::Cuda, short >,
+    TNL::Matrices::Tridiagonal< int,    TNL::Devices::Cuda, int >,
+    TNL::Matrices::Tridiagonal< long,   TNL::Devices::Cuda, int >,
+    TNL::Matrices::Tridiagonal< float,  TNL::Devices::Cuda, int >,
+    TNL::Matrices::Tridiagonal< double, TNL::Devices::Cuda, int >,
+    TNL::Matrices::Tridiagonal< int,    TNL::Devices::Cuda, long >,
+    TNL::Matrices::Tridiagonal< long,   TNL::Devices::Cuda, long >,
+    TNL::Matrices::Tridiagonal< float,  TNL::Devices::Cuda, long >,
+    TNL::Matrices::Tridiagonal< double, TNL::Devices::Cuda, long >
 #endif
 >;
 
 TYPED_TEST_SUITE( MatrixTest, MatrixTypes );
 
+TYPED_TEST( Matrix, getSerializationType )
+{
+   test_GetSerializationType();
+}
+
 TYPED_TEST( MatrixTest, setDimensionsTest )
 {
     using MatrixType = typename TestFixture::MatrixType;
@@ -1491,114 +1488,114 @@ TYPED_TEST( MatrixTest, printTest )
 
 //// test_getType is not general enough yet. DO NOT TEST IT YET.
 
-//TEST( DenseMatrixTest, Dense_GetTypeTest_Host )
+//TEST( TridiagonalMatrixTest, Tridiagonal_GetTypeTest_Host )
 //{
-//    host_test_GetType< Dense_host_float, Dense_host_int >();
+//    host_test_GetType< Tridiagonal_host_float, Tridiagonal_host_int >();
 //}
 //
 //#ifdef HAVE_CUDA
-//TEST( DenseMatrixTest, Dense_GetTypeTest_Cuda )
+//TEST( TridiagonalMatrixTest, Tridiagonal_GetTypeTest_Cuda )
 //{
-//    cuda_test_GetType< Dense_cuda_float, Dense_cuda_int >();
+//    cuda_test_GetType< Tridiagonal_cuda_float, Tridiagonal_cuda_int >();
 //}
 //#endif
 
-/*TEST( DenseMatrixTest, Dense_getMatrixProductTest_Host )
+/*TEST( TridiagonalMatrixTest, Tridiagonal_getMatrixProductTest_Host )
 {
     bool testRan = false;
     EXPECT_TRUE( testRan );
     std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n";
     std::cout << "If launched on CPU, this test will not build, but will print the following message: \n";
-    std::cout << "      /home/lukas/tnl-dev/src/TNL/Matrices/Dense_impl.h(609): error: no instance of function template \"TNL::Matrices::DenseMatrixProductKernel\" matches the argument list\n";
-    std::cout << "              argument types are: (TNL::Matrices::Dense<int, TNL::Devices::Host, int> *, Dense_host_int *, Dense_host_int *, const int, const int, int, int)\n";
+    std::cout << "      /home/lukas/tnl-dev/src/TNL/Matrices/Tridiagonal_impl.h(609): error: no instance of function template \"TNL::Matrices::TridiagonalMatrixProductKernel\" matches the argument list\n";
+    std::cout << "              argument types are: (TNL::Matrices::Tridiagonal<int, TNL::Devices::Host, int> *, Tridiagonal_host_int *, Tridiagonal_host_int *, const int, const int, int, int)\n";
     std::cout << "          detected during:\n";
-    std::cout << "              instantiation of \"void TNL::Matrices::Dense<Real, Device, Index>::getMatrixProduct(const Matrix1 &, const Matrix2 &, const TNL::Matrices::Dense<Real, Device, Index>::RealType &, const TNL::Matrices::Dense<Real, Device, Index>::RealType &) [with Real=int, Device=TNL::Devices::Host, Index=int, Matrix1=Dense_host_int, Matrix2=Dense_host_int, tileDim=32]\"\n";
-    std::cout << "              /home/lukas/tnl-dev/src/UnitTests/Matrices/DenseMatrixTest.h(901): here\n";
-    std::cout << "                  instantiation of \"void test_GetMatrixProduct<Matrix>() [with Matrix=Dense_host_int]\"\n";
-    std::cout << "              /home/lukas/tnl-dev/src/UnitTests/Matrices/DenseMatrixTest.h(1315): here\n\n";
+    std::cout << "              instantiation of \"void TNL::Matrices::Tridiagonal<Real, Device, Index>::getMatrixProduct(const Matrix1 &, const Matrix2 &, const TNL::Matrices::Tridiagonal<Real, Device, Index>::RealType &, const TNL::Matrices::Tridiagonal<Real, Device, Index>::RealType &) [with Real=int, Device=TNL::Devices::Host, Index=int, Matrix1=Tridiagonal_host_int, Matrix2=Tridiagonal_host_int, tileDim=32]\"\n";
+    std::cout << "              /home/lukas/tnl-dev/src/UnitTests/Matrices/TridiagonalMatrixTest.h(901): here\n";
+    std::cout << "                  instantiation of \"void test_GetMatrixProduct<Matrix>() [with Matrix=Tridiagonal_host_int]\"\n";
+    std::cout << "              /home/lukas/tnl-dev/src/UnitTests/Matrices/TridiagonalMatrixTest.h(1315): here\n\n";
 }
 
 #ifdef HAVE_CUDA
-TEST( DenseMatrixTest, Dense_getMatrixProductTest_Cuda )
+TEST( TridiagonalMatrixTest, Tridiagonal_getMatrixProductTest_Cuda )
 {
     bool testRan = false;
     EXPECT_TRUE( testRan );
     std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n";
     std::cout << "If launched on GPU, this test will not build, but will print the following message: \n";
-    std::cout << "      /home/lukas/tnl-dev/src/TNL/Matrices/Dense_impl.h(510): error: identifier \"tnlCudaMin\" is undefined\n";
+    std::cout << "      /home/lukas/tnl-dev/src/TNL/Matrices/Tridiagonal_impl.h(510): error: identifier \"tnlCudaMin\" is undefined\n";
     std::cout << "          detected during:\n";
-    std::cout << "              instantiation of \"void TNL::Matrices::DenseMatrixProductKernel<Real,Index,Matrix1,Matrix2,tileDim,tileRowBlockSize>(TNL::Matrices::Dense<Real, TNL::Devices::Cuda, Index> *, const Matrix1 *, const Matrix2 *, Real, Real, Index, Index) [with Real=int, Index=int, Matrix1=Dense_cuda_int, Matrix2=Dense_cuda_int, tileDim=32, tileRowBlockSize=8]\"\n";
-    std::cout << "              instantiation of \"void TNL::Matrices::Dense<Real, Device, Index>::getMatrixProduct(const Matrix1 &, const Matrix2 &, const TNL::Matrices::Dense<Real, Device, Index>::RealType &, const TNL::Matrices::Dense<Real, Device, Index>::RealType &) [with Real=int, Device=TNL::Devices::Cuda, Index=int, Matrix1=Dense_cuda_int, Matrix2=Dense_cuda_int, tileDim=32]\"\n";
-    std::cout << "              /home/lukas/tnl-dev/src/UnitTests/Matrices/DenseMatrixTest.h(901): here\n";
-    std::cout << "                  instantiation of \"void test_GetMatrixProduct<Matrix>() [with Matrix=Dense_cuda_int]\"\n";
-    std::cout << "              /home/lukas/tnl-dev/src/UnitTests/Matrices/DenseMatrixTest.h(1332): here\n\n";
+    std::cout << "              instantiation of \"void TNL::Matrices::TridiagonalMatrixProductKernel<Real,Index,Matrix1,Matrix2,tileDim,tileRowBlockSize>(TNL::Matrices::Tridiagonal<Real, TNL::Devices::Cuda, Index> *, const Matrix1 *, const Matrix2 *, Real, Real, Index, Index) [with Real=int, Index=int, Matrix1=Tridiagonal_cuda_int, Matrix2=Tridiagonal_cuda_int, tileDim=32, tileRowBlockSize=8]\"\n";
+    std::cout << "              instantiation of \"void TNL::Matrices::Tridiagonal<Real, Device, Index>::getMatrixProduct(const Matrix1 &, const Matrix2 &, const TNL::Matrices::Tridiagonal<Real, Device, Index>::RealType &, const TNL::Matrices::Tridiagonal<Real, Device, Index>::RealType &) [with Real=int, Device=TNL::Devices::Cuda, Index=int, Matrix1=Tridiagonal_cuda_int, Matrix2=Tridiagonal_cuda_int, tileDim=32]\"\n";
+    std::cout << "              /home/lukas/tnl-dev/src/UnitTests/Matrices/TridiagonalMatrixTest.h(901): here\n";
+    std::cout << "                  instantiation of \"void test_GetMatrixProduct<Matrix>() [with Matrix=Tridiagonal_cuda_int]\"\n";
+    std::cout << "              /home/lukas/tnl-dev/src/UnitTests/Matrices/TridiagonalMatrixTest.h(1332): here\n\n";
 }
 #endif
 
-TEST( DenseMatrixTest, Dense_getTranspositionTest_Host )
+TEST( TridiagonalMatrixTest, Tridiagonal_getTranspositionTest_Host )
 {
-//    test_GetTransposition< Dense_host_int >();
+//    test_GetTransposition< Tridiagonal_host_int >();
     bool testRan = false;
     EXPECT_TRUE( testRan );
     std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n";
     std::cout << "If launched on CPU, this test will not build, but will print the following message: \n";
-    std::cout << "      /home/lukas/tnl-dev/src/TNL/Matrices/Dense_impl.h(836): error: no instance of function template \"TNL::Matrices::DenseTranspositionAlignedKernel\" matches the argument list\n";
-    std::cout << "              argument types are: (TNL::Matrices::Dense<int, TNL::Devices::Host, int> *, Dense_host_int *, const int, int, int)\n";
+    std::cout << "      /home/lukas/tnl-dev/src/TNL/Matrices/Tridiagonal_impl.h(836): error: no instance of function template \"TNL::Matrices::TridiagonalTranspositionAlignedKernel\" matches the argument list\n";
+    std::cout << "              argument types are: (TNL::Matrices::Tridiagonal<int, TNL::Devices::Host, int> *, Tridiagonal_host_int *, const int, int, int)\n";
     std::cout << "          detected during:\n";
-    std::cout << "              instantiation of \"void TNL::Matrices::Dense<Real, Device, Index>::getTransposition(const Matrix &, const TNL::Matrices::Dense<Real, Device, Index>::RealType &) [with Real=int, Device=TNL::Devices::Host, Index=int, Matrix=Dense_host_int, tileDim=32]\"\n";
-    std::cout << "              /home/lukas/tnl-dev/src/UnitTests/Matrices/DenseMatrixTest.h(977): here\n";
-    std::cout << "                  instantiation of \"void test_GetTransposition<Matrix>() [with Matrix=Dense_host_int]\"\n";
-    std::cout << "              /home/lukas/tnl-dev/src/UnitTests/Matrices/DenseMatrixTest.h(1420): here\n\n";
+    std::cout << "              instantiation of \"void TNL::Matrices::Tridiagonal<Real, Device, Index>::getTransposition(const Matrix &, const TNL::Matrices::Tridiagonal<Real, Device, Index>::RealType &) [with Real=int, Device=TNL::Devices::Host, Index=int, Matrix=Tridiagonal_host_int, tileDim=32]\"\n";
+    std::cout << "              /home/lukas/tnl-dev/src/UnitTests/Matrices/TridiagonalMatrixTest.h(977): here\n";
+    std::cout << "                  instantiation of \"void test_GetTransposition<Matrix>() [with Matrix=Tridiagonal_host_int]\"\n";
+    std::cout << "              /home/lukas/tnl-dev/src/UnitTests/Matrices/TridiagonalMatrixTest.h(1420): here\n\n";
     std::cout << "AND this message: \n";
-    std::cout << "      /home/lukas/tnl-dev/src/TNL/Matrices/Dense_impl.h(852): error: no instance of function template \"TNL::Matrices::DenseTranspositionNonAlignedKernel\" matches the argument list\n";
-    std::cout << "              argument types are: (TNL::Matrices::Dense<int, TNL::Devices::Host, int> *, Dense_host_int *, const int, int, int)\n";
+    std::cout << "      /home/lukas/tnl-dev/src/TNL/Matrices/Tridiagonal_impl.h(852): error: no instance of function template \"TNL::Matrices::TridiagonalTranspositionNonAlignedKernel\" matches the argument list\n";
+    std::cout << "              argument types are: (TNL::Matrices::Tridiagonal<int, TNL::Devices::Host, int> *, Tridiagonal_host_int *, const int, int, int)\n";
     std::cout << "          detected during:\n";
-    std::cout << "              instantiation of \"void TNL::Matrices::Dense<Real, Device, Index>::getTransposition(const Matrix &, const TNL::Matrices::Dense<Real, Device, Index>::RealType &) [with Real=int, Device=TNL::Devices::Host, Index=int, Matrix=Dense_host_int, tileDim=32]\"\n";
-    std::cout << "              /home/lukas/tnl-dev/src/UnitTests/Matrices/DenseMatrixTest.h(977): here\n";
-    std::cout << "                  instantiation of \"void test_GetTransposition<Matrix>() [with Matrix=Dense_host_int]\"\n";
-    std::cout << "              /home/lukas/tnl-dev/src/UnitTests/Matrices/DenseMatrixTest.h(1420): here\n\n";
+    std::cout << "              instantiation of \"void TNL::Matrices::Tridiagonal<Real, Device, Index>::getTransposition(const Matrix &, const TNL::Matrices::Tridiagonal<Real, Device, Index>::RealType &) [with Real=int, Device=TNL::Devices::Host, Index=int, Matrix=Tridiagonal_host_int, tileDim=32]\"\n";
+    std::cout << "              /home/lukas/tnl-dev/src/UnitTests/Matrices/TridiagonalMatrixTest.h(977): here\n";
+    std::cout << "                  instantiation of \"void test_GetTransposition<Matrix>() [with Matrix=Tridiagonal_host_int]\"\n";
+    std::cout << "              /home/lukas/tnl-dev/src/UnitTests/Matrices/TridiagonalMatrixTest.h(1420): here\n\n";
 }
 
 #ifdef HAVE_CUDA
-TEST( DenseMatrixTest, Dense_getTranspositionTest_Cuda )
+TEST( TridiagonalMatrixTest, Tridiagonal_getTranspositionTest_Cuda )
 {
-//    test_GetTransposition< Dense_cuda_int >();
+//    test_GetTransposition< Tridiagonal_cuda_int >();
     bool testRan = false;
     EXPECT_TRUE( testRan );
     std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n";
     std::cout << "If launched on GPU, this test throws the following message: \n";
     std::cout << "  Assertion 'row >= 0 && row < this->getRows() && column >= 0 && column < this->getColumns()' failed !!!\n";
-    std::cout << "      File: /home/lukas/tnl-dev/src/TNL/Matrices/Dense_impl.h \n";
+    std::cout << "      File: /home/lukas/tnl-dev/src/TNL/Matrices/Tridiagonal_impl.h \n";
     std::cout << "      Line: 329 \n";
     std::cout << "      Diagnostics: Not supported with CUDA.\n";
     std::cout << "  Assertion 'row >= 0 && row < this->getRows() && column >= 0 && column < this->getColumns()' failed !!! \n";
-    std::cout << "      File: /home/lukas/tnl-dev/src/TNL/Matrices/Dense_impl.h \n";
+    std::cout << "      File: /home/lukas/tnl-dev/src/TNL/Matrices/Tridiagonal_impl.h \n";
     std::cout << "      Line: 329 \n";
     std::cout << "      Diagnostics: Not supported with CUDA.\n";
     std::cout << "  Assertion 'row >= 0 && row < this->getRows() && column >= 0 && column < this->getColumns()' failed !!! \n";
-    std::cout << "      File: /home/lukas/tnl-dev/src/TNL/Matrices/Dense_impl.h \n";
+    std::cout << "      File: /home/lukas/tnl-dev/src/TNL/Matrices/Tridiagonal_impl.h \n";
     std::cout << "      Line: 329 \n";
     std::cout << "      Diagnostics: Not supported with CUDA.\n";
     std::cout << "  Assertion 'row >= 0 && row < this->getRows() && column >= 0 && column < this->getColumns()' failed !!! \n";
-    std::cout << "      File: /home/lukas/tnl-dev/src/TNL/Matrices/Dense_impl.h \n";
+    std::cout << "      File: /home/lukas/tnl-dev/src/TNL/Matrices/Tridiagonal_impl.h \n";
     std::cout << "      Line: 329 \n";
     std::cout << "      Diagnostics: Not supported with CUDA.\n";
     std::cout << "  terminate called after throwing an instance of 'TNL::Exceptions::CudaRuntimeError'\n";
     std::cout << "          what():  CUDA ERROR 4 (cudaErrorLaunchFailure): unspecified launch failure.\n";
     std::cout << "  Source: line 57 in /home/lukas/tnl-dev/src/TNL/Containers/Algorithms/ArrayOperationsCuda_impl.h: unspecified launch failure\n";
-    std::cout << "  [1]    4003 abort (core dumped)  ./DenseMatrixTest-dbg\n";
+    std::cout << "  [1]    4003 abort (core dumped)  ./TridiagonalMatrixTest-dbg\n";
 }
 #endif
 
-TEST( DenseMatrixTest, Dense_performSORIterationTest_Host )
+TEST( TridiagonalMatrixTest, Tridiagonal_performSORIterationTest_Host )
 {
-    test_PerformSORIteration< Dense_host_float >();
+    test_PerformSORIteration< Tridiagonal_host_float >();
 }
 
 #ifdef HAVE_CUDA
-TEST( DenseMatrixTest, Dense_performSORIterationTest_Cuda )
+TEST( TridiagonalMatrixTest, Tridiagonal_performSORIterationTest_Cuda )
 {
-//    test_PerformSORIteration< Dense_cuda_float >();
+//    test_PerformSORIteration< Tridiagonal_cuda_float >();
     bool testRan = false;
     EXPECT_TRUE( testRan );
     std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n";
-- 
GitLab


From 72972cfed69a49860bcd71d94fbe34a1dcefde33 Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Wed, 8 Jan 2020 18:27:50 +0100
Subject: [PATCH 076/179] Added TridiagonalMatrixView.

---
 src/TNL/Matrices/TridiagonalMatrixView.h   | 209 ++++++
 src/TNL/Matrices/TridiagonalMatrixView.hpp | 759 +++++++++++++++++++++
 2 files changed, 968 insertions(+)
 create mode 100644 src/TNL/Matrices/TridiagonalMatrixView.h
 create mode 100644 src/TNL/Matrices/TridiagonalMatrixView.hpp

diff --git a/src/TNL/Matrices/TridiagonalMatrixView.h b/src/TNL/Matrices/TridiagonalMatrixView.h
new file mode 100644
index 000000000..3f57fe1c3
--- /dev/null
+++ b/src/TNL/Matrices/TridiagonalMatrixView.h
@@ -0,0 +1,209 @@
+/***************************************************************************
+                          Tridiagonal.h  -  description
+                             -------------------
+    begin                : Nov 30, 2013
+    copyright            : (C) 2013 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Matrices/Matrix.h>
+#include <TNL/Containers/Vector.h>
+#include <TNL/Matrices/TridiagonalRow.h>
+
+namespace TNL {
+namespace Matrices {   
+
+template< typename Device >
+class TridiagonalDeviceDependentCode;
+
+template< typename Real = double,
+          typename Device = Devices::Host,
+          typename Index = int >
+class Tridiagonal : public Matrix< Real, Device, Index >
+{
+private:
+   // convenient template alias for controlling the selection of copy-assignment operator
+   template< typename Device2 >
+   using Enabler = std::enable_if< ! std::is_same< Device2, Device >::value >;
+
+   // friend class will be needed for templated assignment operators
+   template< typename Real2, typename Device2, typename Index2 >
+   friend class Tridiagonal;
+
+public:
+   typedef Real RealType;
+   typedef Device DeviceType;
+   typedef Index IndexType;
+   typedef typename Matrix< Real, Device, Index >::CompressedRowLengthsVector CompressedRowLengthsVector;
+   typedef typename Matrix< Real, Device, Index >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView;
+   typedef Matrix< Real, Device, Index > BaseType;
+   typedef TridiagonalRow< Real, Index > MatrixRow;
+
+   template< typename _Real = Real,
+             typename _Device = Device,
+             typename _Index = Index >
+   using Self = Tridiagonal< _Real, _Device, _Index >;
+
+   Tridiagonal();
+
+   static String getSerializationType();
+
+   virtual String getSerializationTypeVirtual() const;
+
+   void setDimensions( const IndexType rows,
+                       const IndexType columns );
+
+   void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths );
+
+   IndexType getRowLength( const IndexType row ) const;
+
+   __cuda_callable__
+   IndexType getRowLengthFast( const IndexType row ) const;
+
+   IndexType getMaxRowLength() const;
+
+   template< typename Real2, typename Device2, typename Index2 >
+   void setLike( const Tridiagonal< Real2, Device2, Index2 >& m );
+
+   IndexType getNumberOfMatrixElements() const;
+
+   IndexType getNumberOfNonzeroMatrixElements() const;
+
+   IndexType getMaxRowlength() const;
+
+   void reset();
+
+   template< typename Real2, typename Device2, typename Index2 >
+   bool operator == ( const Tridiagonal< Real2, Device2, Index2 >& matrix ) const;
+
+   template< typename Real2, typename Device2, typename Index2 >
+   bool operator != ( const Tridiagonal< Real2, Device2, Index2 >& matrix ) const;
+
+   void setValue( const RealType& v );
+
+   __cuda_callable__
+   bool setElementFast( const IndexType row,
+                        const IndexType column,
+                        const RealType& value );
+
+   bool setElement( const IndexType row,
+                    const IndexType column,
+                    const RealType& value );
+
+   __cuda_callable__
+   bool addElementFast( const IndexType row,
+                        const IndexType column,
+                        const RealType& value,
+                        const RealType& thisElementMultiplicator = 1.0 );
+
+   bool addElement( const IndexType row,
+                    const IndexType column,
+                    const RealType& value,
+                    const RealType& thisElementMultiplicator = 1.0 );
+
+   __cuda_callable__
+   bool setRowFast( const IndexType row,
+                    const IndexType* columns,
+                    const RealType* values,
+                    const IndexType elements );
+
+   bool setRow( const IndexType row,
+                const IndexType* columns,
+                const RealType* values,
+                const IndexType elements );
+
+   __cuda_callable__
+   bool addRowFast( const IndexType row,
+                    const IndexType* columns,
+                    const RealType* values,
+                    const IndexType elements,
+                    const RealType& thisRowMultiplicator = 1.0 );
+
+   bool addRow( const IndexType row,
+                const IndexType* columns,
+                const RealType* values,
+                const IndexType elements,
+                const RealType& thisRowMultiplicator = 1.0 );
+
+   __cuda_callable__
+   RealType getElementFast( const IndexType row,
+                            const IndexType column ) const;
+
+   RealType getElement( const IndexType row,
+                        const IndexType column ) const;
+
+   __cuda_callable__
+   void getRowFast( const IndexType row,
+                    IndexType* columns,
+                    RealType* values ) const;
+
+   __cuda_callable__
+   MatrixRow getRow( const IndexType rowIndex );
+
+   __cuda_callable__
+   const MatrixRow getRow( const IndexType rowIndex ) const;
+
+   template< typename Vector >
+   __cuda_callable__
+   typename Vector::RealType rowVectorProduct( const IndexType row,
+                                               const Vector& vector ) const;
+
+   template< typename InVector,
+             typename OutVector >
+   void vectorProduct( const InVector& inVector,
+                       OutVector& outVector ) const;
+
+   template< typename Real2, typename Index2 >
+   void addMatrix( const Tridiagonal< Real2, Device, Index2 >& matrix,
+                   const RealType& matrixMultiplicator = 1.0,
+                   const RealType& thisMatrixMultiplicator = 1.0 );
+
+   template< typename Real2, typename Index2 >
+   void getTransposition( const Tridiagonal< Real2, Device, Index2 >& matrix,
+                          const RealType& matrixMultiplicator = 1.0 );
+
+   template< typename Vector1, typename Vector2 >
+   __cuda_callable__
+   void performSORIteration( const Vector1& b,
+                             const IndexType row,
+                             Vector2& x,
+                             const RealType& omega = 1.0 ) const;
+
+   // copy assignment
+   Tridiagonal& operator=( const Tridiagonal& matrix );
+
+   // cross-device copy assignment
+   template< typename Real2, typename Device2, typename Index2,
+             typename = typename Enabler< Device2 >::type >
+   Tridiagonal& operator=( const Tridiagonal< Real2, Device2, Index2 >& matrix );
+
+   void save( File& file ) const;
+
+   void load( File& file );
+
+   void save( const String& fileName ) const;
+
+   void load( const String& fileName );
+
+   void print( std::ostream& str ) const;
+
+protected:
+
+   __cuda_callable__
+   IndexType getElementIndex( const IndexType row,
+                              const IndexType column ) const;
+
+   Containers::Vector< RealType, DeviceType, IndexType > values;
+
+   typedef TridiagonalDeviceDependentCode< DeviceType > DeviceDependentCode;
+   friend class TridiagonalDeviceDependentCode< DeviceType >;
+};
+
+} // namespace Matrices
+} // namespace TNL
+
+#include <TNL/Matrices/Tridiagonal_impl.h>
diff --git a/src/TNL/Matrices/TridiagonalMatrixView.hpp b/src/TNL/Matrices/TridiagonalMatrixView.hpp
new file mode 100644
index 000000000..2752f6850
--- /dev/null
+++ b/src/TNL/Matrices/TridiagonalMatrixView.hpp
@@ -0,0 +1,759 @@
+/***************************************************************************
+                          Tridiagonal_impl.h  -  description
+                             -------------------
+    begin                : Nov 30, 2013
+    copyright            : (C) 2013 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Assert.h>
+#include <TNL/Matrices/Tridiagonal.h>
+#include <TNL/Exceptions/NotImplementedError.h>
+
+namespace TNL {
+namespace Matrices {   
+
+template< typename Device >
+class TridiagonalDeviceDependentCode;
+
+template< typename Real,
+          typename Device,
+          typename Index >
+Tridiagonal< Real, Device, Index >::Tridiagonal()
+{
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+String Tridiagonal< Real, Device, Index >::getType()
+{
+   return String( "Matrices::Tridiagonal< " ) +
+          String( TNL::getType< Real >() ) +
+          String( ", " ) +
+          String( Device :: getDeviceType() ) +
+          String( ", " ) +
+          String( TNL::getType< Index >() ) +
+          String( " >" );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+String Tridiagonal< Real, Device, Index >::getTypeVirtual() const
+{
+   return this->getType();
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+String Tridiagonal< Real, Device, Index >::getSerializationType()
+{
+   return String( "Matrices::Tridiagonal< " ) +
+          getType< RealType >() + ", " +
+          getType< Device >() + ", " +
+          getType< IndexType >() + " >";
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+String Tridiagonal< Real, Device, Index >::getSerializationTypeVirtual() const
+{
+   return this->getSerializationType();
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+void Tridiagonal< Real, Device, Index >::setDimensions( const IndexType rows,
+                                                        const IndexType columns )
+{
+   Matrix< Real, Device, Index >::setDimensions( rows, columns );
+   values.setSize( 3*min( rows, columns ) );
+   this->values.setValue( 0.0 );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+void Tridiagonal< Real, Device, Index >::setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths )
+{
+   if( rowLengths[ 0 ] > 2 )
+      throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." );
+   const IndexType diagonalLength = min( this->getRows(), this->getColumns() );
+   for( Index i = 1; i < diagonalLength-1; i++ )
+      if( rowLengths[ i ] > 3 )
+         throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." );
+   if( this->getRows() > this->getColumns() )
+      if( rowLengths[ this->getRows()-1 ] > 1 )
+         throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." );
+   if( this->getRows() == this->getColumns() )
+      if( rowLengths[ this->getRows()-1 ] > 2 )
+         throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." );
+   if( this->getRows() < this->getColumns() )
+      if( rowLengths[ this->getRows()-1 ] > 3 )
+         throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+Index Tridiagonal< Real, Device, Index >::getRowLength( const IndexType row ) const
+{
+   return this->getRowLengthFast( row );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+__cuda_callable__
+Index Tridiagonal< Real, Device, Index >::getRowLengthFast( const IndexType row ) const
+{
+   const IndexType diagonalLength = min( this->getRows(), this->getColumns() );
+   if( row == 0 )
+      return 2;
+   if( row > 0 && row < diagonalLength - 1 )
+      return 3;
+   if( this->getRows() > this->getColumns() )
+      return 1;
+   if( this->getRows() == this->getColumns() )
+      return 2;
+   return 3;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+Index Tridiagonal< Real, Device, Index >::getMaxRowLength() const
+{
+   return 3;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+   template< typename Real2, typename Device2, typename Index2 >
+void Tridiagonal< Real, Device, Index >::setLike( const Tridiagonal< Real2, Device2, Index2 >& m )
+{
+   this->setDimensions( m.getRows(), m.getColumns() );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+Index Tridiagonal< Real, Device, Index >::getNumberOfMatrixElements() const
+{
+   return 3 * min( this->getRows(), this->getColumns() );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+Index Tridiagonal< Real, Device, Index > :: getNumberOfNonzeroMatrixElements() const
+{
+   IndexType nonzeroElements = 0;
+   for( IndexType i = 0; i < this->values.getSize(); i++ )
+      if( this->values.getElement( i ) != 0 )
+         nonzeroElements++;
+   return nonzeroElements;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+Index
+Tridiagonal< Real, Device, Index >::
+getMaxRowlength() const
+{
+   return 3;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+void Tridiagonal< Real, Device, Index >::reset()
+{
+   Matrix< Real, Device, Index >::reset();
+   this->values.reset();
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+   template< typename Real2, typename Device2, typename Index2 >
+bool Tridiagonal< Real, Device, Index >::operator == ( const Tridiagonal< Real2, Device2, Index2 >& matrix ) const
+{
+   return this->values == matrix.values;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+   template< typename Real2, typename Device2, typename Index2 >
+bool Tridiagonal< Real, Device, Index >::operator != ( const Tridiagonal< Real2, Device2, Index2 >& matrix ) const
+{
+   return this->values != matrix.values;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+void Tridiagonal< Real, Device, Index >::setValue( const RealType& v )
+{
+   this->values.setValue( v );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+__cuda_callable__
+bool Tridiagonal< Real, Device, Index >::setElementFast( const IndexType row,
+                                                                  const IndexType column,
+                                                                  const RealType& value )
+{
+   this->values[ this->getElementIndex( row, column ) ] = value;
+   return true;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+bool Tridiagonal< Real, Device, Index >::setElement( const IndexType row,
+                                                              const IndexType column,
+                                                              const RealType& value )
+{
+   this->values.setElement( this->getElementIndex( row, column ), value );
+   return true;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+__cuda_callable__
+bool Tridiagonal< Real, Device, Index >::addElementFast( const IndexType row,
+                                                                  const IndexType column,
+                                                                  const RealType& value,
+                                                                  const RealType& thisElementMultiplicator )
+{
+   const Index i = this->getElementIndex( row, column );
+   this->values[ i ] = thisElementMultiplicator*this->values[ i ] + value;
+   return true;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+bool Tridiagonal< Real, Device, Index >::addElement( const IndexType row,
+                                                              const IndexType column,
+                                                              const RealType& value,
+                                                              const RealType& thisElementMultiplicator )
+{
+   const Index i = this->getElementIndex( row, column );
+   this->values.setElement( i, thisElementMultiplicator * this->values.getElement( i ) + value );
+   return true;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+__cuda_callable__
+bool Tridiagonal< Real, Device, Index >::setRowFast( const IndexType row,
+                                                              const IndexType* columns,
+                                                              const RealType* values,
+                                                              const IndexType elements )
+{
+   TNL_ASSERT( elements <= this->columns,
+            std::cerr << " elements = " << elements
+                 << " this->columns = " << this->columns );
+   return this->addRowFast( row, columns, values, elements, 0.0 );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+bool Tridiagonal< Real, Device, Index >::setRow( const IndexType row,
+                                                          const IndexType* columns,
+                                                          const RealType* values,
+                                                          const IndexType elements )
+{
+   TNL_ASSERT( elements <= this->columns,
+            std::cerr << " elements = " << elements
+                 << " this->columns = " << this->columns );
+   return this->addRow( row, columns, values, elements, 0.0 );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+__cuda_callable__
+bool Tridiagonal< Real, Device, Index >::addRowFast( const IndexType row,
+                                                              const IndexType* columns,
+                                                              const RealType* values,
+                                                              const IndexType elements,
+                                                              const RealType& thisRowMultiplicator )
+{
+   TNL_ASSERT( elements <= this->columns,
+            std::cerr << " elements = " << elements
+                 << " this->columns = " << this->columns );
+   if( elements > 3 )
+      return false;
+   for( IndexType i = 0; i < elements; i++ )
+   {
+      const IndexType& column = columns[ i ];
+      if( column < row - 1 || column > row + 1 )
+         return false;
+      addElementFast( row, column, values[ i ], thisRowMultiplicator );
+   }
+   return true;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+bool Tridiagonal< Real, Device, Index >::addRow( const IndexType row,
+                                                          const IndexType* columns,
+                                                          const RealType* values,
+                                                          const IndexType elements,
+                                                          const RealType& thisRowMultiplicator )
+{
+   TNL_ASSERT( elements <= this->columns,
+            std::cerr << " elements = " << elements
+                 << " this->columns = " << this->columns );
+   if( elements > 3 )
+      return false;
+   for( IndexType i = 0; i < elements; i++ )
+   {
+      const IndexType column = columns[ i ];
+      if( column < row - 1 || column > row + 1 )
+         return false;
+      addElement( row, column, values[ i ], thisRowMultiplicator );
+   }
+   return true;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+__cuda_callable__
+Real Tridiagonal< Real, Device, Index >::getElementFast( const IndexType row,
+                                                                  const IndexType column ) const
+{
+   if( abs( column - row ) > 1 )
+      return 0.0;
+   return this->values[ this->getElementIndex( row, column ) ];
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+Real Tridiagonal< Real, Device, Index >::getElement( const IndexType row,
+                                                              const IndexType column ) const
+{
+   if( abs( column - row ) > 1 )
+      return 0.0;
+   return this->values.getElement( this->getElementIndex( row, column ) );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+__cuda_callable__
+void Tridiagonal< Real, Device, Index >::getRowFast( const IndexType row,
+                                                              IndexType* columns,
+                                                              RealType* values ) const
+{
+   IndexType elementPointer( 0 );
+   for( IndexType i = -1; i <= 1; i++ )
+   {
+      const IndexType column = row + 1;
+      if( column >= 0 && column < this->getColumns() )
+      {
+         columns[ elementPointer ] = column;
+         values[ elementPointer ] = this->values[ this->getElementIndex( row, column ) ];
+         elementPointer++;
+      }
+   }
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+__cuda_callable__
+typename Tridiagonal< Real, Device, Index >::MatrixRow
+Tridiagonal< Real, Device, Index >::
+getRow( const IndexType rowIndex )
+{
+   if( std::is_same< Device, Devices::Host >::value )
+      return MatrixRow( &this->values.getData()[ this->getElementIndex( rowIndex, rowIndex ) ],
+                        rowIndex,
+                        this->getColumns(),
+                        1 );
+   if( std::is_same< Device, Devices::Cuda >::value )
+      return MatrixRow( &this->values.getData()[ this->getElementIndex( rowIndex, rowIndex ) ],
+                        rowIndex,
+                        this->getColumns(),
+                        this->rows );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+__cuda_callable__
+const typename Tridiagonal< Real, Device, Index >::MatrixRow
+Tridiagonal< Real, Device, Index >::
+getRow( const IndexType rowIndex ) const
+{
+   throw Exceptions::NotImplementedError();
+}
+
+
+template< typename Real,
+          typename Device,
+          typename Index >
+template< typename Vector >
+__cuda_callable__
+typename Vector::RealType Tridiagonal< Real, Device, Index >::rowVectorProduct( const IndexType row,
+                                                                                         const Vector& vector ) const
+{
+   return TridiagonalDeviceDependentCode< Device >::
+             rowVectorProduct( this->rows,
+                               this->values,
+                               row,
+                               vector );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+   template< typename InVector,
+             typename OutVector >
+void Tridiagonal< Real, Device, Index >::vectorProduct( const InVector& inVector,
+                                                                 OutVector& outVector ) const
+{
+   TNL_ASSERT( this->getColumns() == inVector.getSize(),
+            std::cerr << "Matrix columns: " << this->getColumns() << std::endl
+                 << "Vector size: " << inVector.getSize() << std::endl );
+   TNL_ASSERT( this->getRows() == outVector.getSize(),
+               std::cerr << "Matrix rows: " << this->getRows() << std::endl
+                    << "Vector size: " << outVector.getSize() << std::endl );
+
+   DeviceDependentCode::vectorProduct( *this, inVector, outVector );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+   template< typename Real2, typename Index2 >
+void Tridiagonal< Real, Device, Index >::addMatrix( const Tridiagonal< Real2, Device, Index2 >& matrix,
+                                                    const RealType& matrixMultiplicator,
+                                                    const RealType& thisMatrixMultiplicator )
+{
+   TNL_ASSERT( this->getRows() == matrix.getRows(),
+            std::cerr << "This matrix columns: " << this->getColumns() << std::endl
+                 << "This matrix rows: " << this->getRows() << std::endl );
+
+   if( thisMatrixMultiplicator == 1.0 )
+      this->values += matrixMultiplicator * matrix.values;
+   else
+      this->values = thisMatrixMultiplicator * this->values + matrixMultiplicator * matrix.values;
+}
+
+#ifdef HAVE_CUDA
+template< typename Real,
+          typename Real2,
+          typename Index,
+          typename Index2 >
+__global__ void TridiagonalTranspositionCudaKernel( const Tridiagonal< Real2, Devices::Cuda, Index2 >* inMatrix,
+                                                             Tridiagonal< Real, Devices::Cuda, Index >* outMatrix,
+                                                             const Real matrixMultiplicator,
+                                                             const Index gridIdx )
+{
+   const Index rowIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
+   if( rowIdx < inMatrix->getRows() )
+   {
+      if( rowIdx > 0 )
+        outMatrix->setElementFast( rowIdx-1,
+                                   rowIdx,
+                                   matrixMultiplicator * inMatrix->getElementFast( rowIdx, rowIdx-1 ) );
+      outMatrix->setElementFast( rowIdx,
+                                 rowIdx,
+                                 matrixMultiplicator * inMatrix->getElementFast( rowIdx, rowIdx ) );
+      if( rowIdx < inMatrix->getRows()-1 )
+         outMatrix->setElementFast( rowIdx+1,
+                                    rowIdx,
+                                    matrixMultiplicator * inMatrix->getElementFast( rowIdx, rowIdx+1 ) );
+   }
+}
+#endif
+
+template< typename Real,
+          typename Device,
+          typename Index >
+   template< typename Real2, typename Index2 >
+void Tridiagonal< Real, Device, Index >::getTransposition( const Tridiagonal< Real2, Device, Index2 >& matrix,
+                                                                    const RealType& matrixMultiplicator )
+{
+   TNL_ASSERT( this->getRows() == matrix.getRows(),
+               std::cerr << "This matrix rows: " << this->getRows() << std::endl
+                    << "That matrix rows: " << matrix.getRows() << std::endl );
+   if( std::is_same< Device, Devices::Host >::value )
+   {
+      const IndexType& rows = matrix.getRows();
+      for( IndexType i = 1; i < rows; i++ )
+      {
+         RealType aux = matrix. getElement( i, i - 1 );
+         this->setElement( i, i - 1, matrix.getElement( i - 1, i ) );
+         this->setElement( i, i, matrix.getElement( i, i ) );
+         this->setElement( i - 1, i, aux );
+      }
+   }
+   if( std::is_same< Device, Devices::Cuda >::value )
+   {
+#ifdef HAVE_CUDA
+      Tridiagonal* kernel_this = Cuda::passToDevice( *this );
+      typedef  Tridiagonal< Real2, Device, Index2 > InMatrixType;
+      InMatrixType* kernel_inMatrix = Cuda::passToDevice( matrix );
+      dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() );
+      const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x );
+      const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() );
+      for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
+      {
+         if( gridIdx == cudaGrids - 1 )
+            cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize();
+         TridiagonalTranspositionCudaKernel<<< cudaGridSize, cudaBlockSize >>>
+                                                    ( kernel_inMatrix,
+                                                      kernel_this,
+                                                      matrixMultiplicator,
+                                                      gridIdx );
+      }
+      Cuda::freeFromDevice( kernel_this );
+      Cuda::freeFromDevice( kernel_inMatrix );
+      TNL_CHECK_CUDA_DEVICE;
+#endif
+   }
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+   template< typename Vector1, typename Vector2 >
+__cuda_callable__
+void Tridiagonal< Real, Device, Index >::performSORIteration( const Vector1& b,
+                                                              const IndexType row,
+                                                              Vector2& x,
+                                                              const RealType& omega ) const
+{
+   RealType sum( 0.0 );
+   if( row > 0 )
+      sum += this->getElementFast( row, row - 1 ) * x[ row - 1 ];
+   if( row < this->getColumns() - 1 )
+      sum += this->getElementFast( row, row + 1 ) * x[ row + 1 ];
+   x[ row ] = ( 1.0 - omega ) * x[ row ] + omega / this->getElementFast( row, row ) * ( b[ row ] - sum );
+}
+
+
+// copy assignment
+template< typename Real,
+          typename Device,
+          typename Index >
+Tridiagonal< Real, Device, Index >&
+Tridiagonal< Real, Device, Index >::operator=( const Tridiagonal& matrix )
+{
+   this->setLike( matrix );
+   this->values = matrix.values;
+   return *this;
+}
+
+// cross-device copy assignment
+template< typename Real,
+          typename Device,
+          typename Index >
+   template< typename Real2, typename Device2, typename Index2, typename >
+Tridiagonal< Real, Device, Index >&
+Tridiagonal< Real, Device, Index >::operator=( const Tridiagonal< Real2, Device2, Index2 >& matrix )
+{
+   static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Cuda >::value,
+                  "unknown device" );
+   static_assert( std::is_same< Device2, Devices::Host >::value || std::is_same< Device2, Devices::Cuda >::value,
+                  "unknown device" );
+
+   this->setLike( matrix );
+
+   throw Exceptions::NotImplementedError("Cross-device assignment for the Tridiagonal format is not implemented yet.");
+}
+
+
+template< typename Real,
+          typename Device,
+          typename Index >
+void Tridiagonal< Real, Device, Index >::save( File& file ) const
+{
+   Matrix< Real, Device, Index >::save( file );
+   file << this->values;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+void Tridiagonal< Real, Device, Index >::load( File& file )
+{
+   Matrix< Real, Device, Index >::load( file );
+   file >> this->values;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+void Tridiagonal< Real, Device, Index >::save( const String& fileName ) const
+{
+   Object::save( fileName );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+void Tridiagonal< Real, Device, Index >::load( const String& fileName )
+{
+   Object::load( fileName );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+void Tridiagonal< Real, Device, Index >::print( std::ostream& str ) const
+{
+   for( IndexType row = 0; row < this->getRows(); row++ )
+   {
+      str <<"Row: " << row << " -> ";
+      for( IndexType column = row - 1; column < row + 2; column++ )
+         if( column >= 0 && column < this->columns )
+            str << " Col:" << column << "->" << this->getElement( row, column ) << "\t";
+      str << std::endl;
+   }
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+__cuda_callable__
+Index Tridiagonal< Real, Device, Index >::getElementIndex( const IndexType row,
+                                                                    const IndexType column ) const
+{
+   TNL_ASSERT( row >= 0 && column >= 0 && row < this->rows && column < this->rows,
+              std::cerr << " this->rows = " << this->rows
+                   << " row = " << row << " column = " << column );
+   TNL_ASSERT( abs( row - column ) < 2,
+              std::cerr << "row = " << row << " column = " << column << std::endl );
+   return TridiagonalDeviceDependentCode< Device >::getElementIndex( this->rows, row, column );
+}
+
+template<>
+class TridiagonalDeviceDependentCode< Devices::Host >
+{
+   public:
+
+      typedef Devices::Host Device;
+
+      template< typename Index >
+      __cuda_callable__
+      static Index getElementIndex( const Index rows,
+                                    const Index row,
+                                    const Index column )
+      {
+         return 2*row + column;
+      }
+
+      template< typename Vector,
+                typename Index,
+                typename ValuesType  >
+      __cuda_callable__
+      static typename Vector::RealType rowVectorProduct( const Index rows,
+                                                         const ValuesType& values,
+                                                         const Index row,
+                                                         const Vector& vector )
+      {
+         if( row == 0 )
+            return vector[ 0 ] * values[ 0 ] +
+                   vector[ 1 ] * values[ 1 ];
+         Index i = 3 * row;
+         if( row == rows - 1 )
+            return vector[ row - 1 ] * values[ i - 1 ] +
+                   vector[ row ] * values[ i ];
+         return vector[ row - 1 ] * values[ i - 1 ] +
+                vector[ row ] * values[ i ] +
+                vector[ row + 1 ] * values[ i + 1 ];
+      }
+
+      template< typename Real,
+                typename Index,
+                typename InVector,
+                typename OutVector >
+      static void vectorProduct( const Tridiagonal< Real, Device, Index >& matrix,
+                                 const InVector& inVector,
+                                 OutVector& outVector )
+      {
+#ifdef HAVE_OPENMP
+#pragma omp parallel for if( Devices::Host::isOMPEnabled() )
+#endif
+         for( Index row = 0; row < matrix.getRows(); row ++ )
+            outVector[ row ] = matrix.rowVectorProduct( row, inVector );
+      }
+};
+
+template<>
+class TridiagonalDeviceDependentCode< Devices::Cuda >
+{
+   public:
+ 
+      typedef Devices::Cuda Device;
+
+      template< typename Index >
+      __cuda_callable__
+      static Index getElementIndex( const Index rows,
+                                    const Index row,
+                                    const Index column )
+      {
+         return ( column - row + 1 )*rows + row - 1;
+      }
+
+      template< typename Vector,
+                typename Index,
+                typename ValuesType >
+      __cuda_callable__
+      static typename Vector::RealType rowVectorProduct( const Index rows,
+                                                         const ValuesType& values,
+                                                         const Index row,
+                                                         const Vector& vector )
+      {
+         if( row == 0 )
+            return vector[ 0 ] * values[ 0 ] +
+                   vector[ 1 ] * values[ rows - 1 ];
+         Index i = row - 1;
+         if( row == rows - 1 )
+            return vector[ row - 1 ] * values[ i ] +
+                   vector[ row ] * values[ i + rows ];
+         return vector[ row - 1 ] * values[ i ] +
+                vector[ row ] * values[ i + rows ] +
+                vector[ row + 1 ] * values[ i + 2*rows ];
+      }
+
+      template< typename Real,
+                typename Index,
+                typename InVector,
+                typename OutVector >
+      static void vectorProduct( const Tridiagonal< Real, Device, Index >& matrix,
+                                 const InVector& inVector,
+                                 OutVector& outVector )
+      {
+         MatrixVectorProductCuda( matrix, inVector, outVector );
+      }
+};
+
+} // namespace Matrices
+} // namespace TNL
-- 
GitLab


From 97888bf1ec2d2f97c67a9ff25d92b1957a66d526 Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Wed, 8 Jan 2020 18:28:18 +0100
Subject: [PATCH 077/179] Added tridiagonal matrix unit tests to
 CMakeLists.txt.

---
 src/UnitTests/Matrices/CMakeLists.txt | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/UnitTests/Matrices/CMakeLists.txt b/src/UnitTests/Matrices/CMakeLists.txt
index e4616f23b..333dee952 100644
--- a/src/UnitTests/Matrices/CMakeLists.txt
+++ b/src/UnitTests/Matrices/CMakeLists.txt
@@ -10,8 +10,9 @@ IF( BUILD_CUDA )
    CUDA_ADD_EXECUTABLE( DenseMatrixTest DenseMatrixTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
    TARGET_LINK_LIBRARIES( DenseMatrixTest ${GTEST_BOTH_LIBRARIES} )
 
-   ####
-   # Segments tests
+   CUDA_ADD_EXECUTABLE( TridiagonalMatrixTest TridiagonalMatrixTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( TridiagonalMatrixTest ${GTEST_BOTH_LIBRARIES} )
+
    CUDA_ADD_EXECUTABLE( SparseMatrixTest_CSR_segments SparseMatrixTest_CSR_segments.cu OPTIONS ${CXX_TESTS_FLAGS} )
    TARGET_LINK_LIBRARIES( SparseMatrixTest_CSR_segments ${GTEST_BOTH_LIBRARIES} )
 
@@ -34,8 +35,10 @@ ELSE(  BUILD_CUDA )
    TARGET_COMPILE_OPTIONS( DenseMatrixTest PRIVATE ${CXX_TESTS_FLAGS} )
    TARGET_LINK_LIBRARIES( DenseMatrixTest ${GTEST_BOTH_LIBRARIES} )
 
-   ####
-   # Segments tests
+   ADD_EXECUTABLE( TridiagonalMatrixTest TridiagonalMatrixTest.cpp )
+   TARGET_COMPILE_OPTIONS( TridiagonalMatrixTest PRIVATE ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( TridiagonalMatrixTest ${GTEST_BOTH_LIBRARIES} )
+
    ADD_EXECUTABLE( SparseMatrixTest_CSR_segments SparseMatrixTest_CSR_segments.cpp )
    TARGET_COMPILE_OPTIONS( SparseMatrixTest_CSR_segments PRIVATE ${CXX_TESTS_FLAGS} )
    TARGET_LINK_LIBRARIES( SparseMatrixTest_CSR_segments ${GTEST_BOTH_LIBRARIES} )
@@ -53,12 +56,9 @@ ENDIF( BUILD_CUDA )
 
 ADD_TEST( SparseMatrixCopyTest ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixCopyTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( SparseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} )
-# TODO: Uncomment the following when AdEllpack works
-#ADD_TEST( SparseMatrixTest_AdEllpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_AdEllpack${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( DenseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/DenseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} )
+ADD_TEST( TridiagonalMatrixTest ${EXECUTABLE_OUTPUT_PATH}/TridiagonalMatrixTest${CMAKE_EXECUTABLE_SUFFIX} )
 
-####
-# Segments tests
 ADD_TEST( SparseMatrixTest_CSR_segments ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_CSR_segments${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( SparseMatrixTest_Ellpack_segments ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_Ellpack_segments${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( SparseMatrixTest_SlicedEllpack_segments ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_SlicedEllpack_segments${CMAKE_EXECUTABLE_SUFFIX} )
-- 
GitLab


From b3d86f895ee6e994909b719a08705b0673f84011 Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Wed, 8 Jan 2020 18:28:57 +0100
Subject: [PATCH 078/179] Updating API of tridiagonal matrix.

---
 src/TNL/Matrices/Tridiagonal.h   | 246 ++++++++--------
 src/TNL/Matrices/Tridiagonal.hpp | 484 ++++++++++++++++---------------
 2 files changed, 366 insertions(+), 364 deletions(-)

diff --git a/src/TNL/Matrices/Tridiagonal.h b/src/TNL/Matrices/Tridiagonal.h
index 3f57fe1c3..f80bc4c18 100644
--- a/src/TNL/Matrices/Tridiagonal.h
+++ b/src/TNL/Matrices/Tridiagonal.h
@@ -13,197 +13,179 @@
 #include <TNL/Matrices/Matrix.h>
 #include <TNL/Containers/Vector.h>
 #include <TNL/Matrices/TridiagonalRow.h>
+#include <TNL/Containers/Segments/Ellpack.h>
 
 namespace TNL {
-namespace Matrices {   
+namespace Matrices {
 
 template< typename Device >
 class TridiagonalDeviceDependentCode;
 
 template< typename Real = double,
           typename Device = Devices::Host,
-          typename Index = int >
-class Tridiagonal : public Matrix< Real, Device, Index >
+          typename Index = int,
+          bool RowMajorOrder = std::is_same< Device, Devices::Host >::value,
+          typename RealAllocator = typename Allocators::Default< Device >::template Allocator< Real > >
+class Tridiagonal : public Matrix< Real, Device, Index, RealAllocator >
 {
-private:
-   // convenient template alias for controlling the selection of copy-assignment operator
-   template< typename Device2 >
-   using Enabler = std::enable_if< ! std::is_same< Device2, Device >::value >;
+   private:
+      // convenient template alias for controlling the selection of copy-assignment operator
+      template< typename Device2 >
+      using Enabler = std::enable_if< ! std::is_same< Device2, Device >::value >;
 
-   // friend class will be needed for templated assignment operators
-   template< typename Real2, typename Device2, typename Index2 >
-   friend class Tridiagonal;
+      // friend class will be needed for templated assignment operators
+      template< typename Real2, typename Device2, typename Index2 >
+      friend class Tridiagonal;
 
-public:
-   typedef Real RealType;
-   typedef Device DeviceType;
-   typedef Index IndexType;
-   typedef typename Matrix< Real, Device, Index >::CompressedRowLengthsVector CompressedRowLengthsVector;
-   typedef typename Matrix< Real, Device, Index >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView;
-   typedef Matrix< Real, Device, Index > BaseType;
-   typedef TridiagonalRow< Real, Index > MatrixRow;
+   public:
+      using RealType = Real;
+      using DeviceType = Device;
+      using IndexType = Index;
+      using RealAllocatorType = RealAllocator;
+      using BaseType = Matrix< Real, Device, Index, RealAllocator >;
+      using ValuesType = typename BaseType::ValuesVector;
+      using ValuesViewType = typename ValuesType::ViewType;
+      //using ViewType = TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >;
+      //using ConstViewType = TridiagonalMatrixView< typename std::add_const< Real >::type, Device, Index, RowMajorOrder >;
+      using RowView = TridiagonalMatrixRowView< SegmentViewType, ValuesViewType >;
 
-   template< typename _Real = Real,
-             typename _Device = Device,
-             typename _Index = Index >
-   using Self = Tridiagonal< _Real, _Device, _Index >;
 
-   Tridiagonal();
+      template< typename _Real = Real,
+                typename _Device = Device,
+                typename _Index = Index >
+      using Self = Tridiagonal< _Real, _Device, _Index >;
 
-   static String getSerializationType();
+      Tridiagonal();
 
-   virtual String getSerializationTypeVirtual() const;
+      Tridiagonal( const IndexType rows, const IndexType columns );
 
-   void setDimensions( const IndexType rows,
-                       const IndexType columns );
+      ViewType getView();
 
-   void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths );
+      ConstViewType getConstView() const;
 
-   IndexType getRowLength( const IndexType row ) const;
+      static String getSerializationType();
 
-   __cuda_callable__
-   IndexType getRowLengthFast( const IndexType row ) const;
+      virtual String getSerializationTypeVirtual() const;
 
-   IndexType getMaxRowLength() const;
+      void setDimensions( const IndexType rows,
+                          const IndexType columns );
 
-   template< typename Real2, typename Device2, typename Index2 >
-   void setLike( const Tridiagonal< Real2, Device2, Index2 >& m );
+      void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths );
 
-   IndexType getNumberOfMatrixElements() const;
+      template< typename Vector >
+      void getCompressedRowLengths( Vector& rowLengths ) const;
 
-   IndexType getNumberOfNonzeroMatrixElements() const;
+      [[deprecated]]
+      IndexType getRowLength( const IndexType row ) const;
 
-   IndexType getMaxRowlength() const;
+      IndexType getMaxRowLength() const;
 
-   void reset();
+      template< typename Real2, typename Device2, typename Index2 >
+      void setLike( const Tridiagonal< Real2, Device2, Index2 >& m );
 
-   template< typename Real2, typename Device2, typename Index2 >
-   bool operator == ( const Tridiagonal< Real2, Device2, Index2 >& matrix ) const;
+      IndexType getNumberOfMatrixElements() const;
 
-   template< typename Real2, typename Device2, typename Index2 >
-   bool operator != ( const Tridiagonal< Real2, Device2, Index2 >& matrix ) const;
+      IndexType getNumberOfNonzeroMatrixElements() const;
 
-   void setValue( const RealType& v );
+      IndexType getMaxRowlength() const;
 
-   __cuda_callable__
-   bool setElementFast( const IndexType row,
-                        const IndexType column,
-                        const RealType& value );
+      void reset();
 
-   bool setElement( const IndexType row,
-                    const IndexType column,
-                    const RealType& value );
+      template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ >
+      bool operator == ( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_ >& matrix ) const;
 
-   __cuda_callable__
-   bool addElementFast( const IndexType row,
-                        const IndexType column,
-                        const RealType& value,
-                        const RealType& thisElementMultiplicator = 1.0 );
+      template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ >
+      bool operator != ( const Tridiagonal< Real_, Device_, Index_ >& matrix ) const;
 
-   bool addElement( const IndexType row,
-                    const IndexType column,
-                    const RealType& value,
-                    const RealType& thisElementMultiplicator = 1.0 );
+      void setValue( const RealType& v );
 
-   __cuda_callable__
-   bool setRowFast( const IndexType row,
-                    const IndexType* columns,
-                    const RealType* values,
-                    const IndexType elements );
+      bool setElement( const IndexType row,
+                       const IndexType column,
+                       const RealType& value );
 
-   bool setRow( const IndexType row,
-                const IndexType* columns,
-                const RealType* values,
-                const IndexType elements );
+      bool addElement( const IndexType row,
+                       const IndexType column,
+                       const RealType& value,
+                       const RealType& thisElementMultiplicator = 1.0 );
 
-   __cuda_callable__
-   bool addRowFast( const IndexType row,
-                    const IndexType* columns,
-                    const RealType* values,
-                    const IndexType elements,
-                    const RealType& thisRowMultiplicator = 1.0 );
+      RealType getElement( const IndexType row,
+                           const IndexType column ) const;
 
-   bool addRow( const IndexType row,
-                const IndexType* columns,
-                const RealType* values,
-                const IndexType elements,
-                const RealType& thisRowMultiplicator = 1.0 );
+      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+      void rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
 
-   __cuda_callable__
-   RealType getElementFast( const IndexType row,
-                            const IndexType column ) const;
+      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+      void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
 
-   RealType getElement( const IndexType row,
-                        const IndexType column ) const;
+      template< typename Function >
+      void forRows( IndexType first, IndexType last, Function& function ) const;
 
-   __cuda_callable__
-   void getRowFast( const IndexType row,
-                    IndexType* columns,
-                    RealType* values ) const;
+      template< typename Function >
+      void forRows( IndexType first, IndexType last, Function& function );
 
-   __cuda_callable__
-   MatrixRow getRow( const IndexType rowIndex );
+      template< typename Function >
+      void forAllRows( Function& function ) const;
 
-   __cuda_callable__
-   const MatrixRow getRow( const IndexType rowIndex ) const;
+      template< typename Function >
+      void forAllRows( Function& function );
 
-   template< typename Vector >
-   __cuda_callable__
-   typename Vector::RealType rowVectorProduct( const IndexType row,
-                                               const Vector& vector ) const;
+      template< typename Vector >
+      __cuda_callable__
+      typename Vector::RealType rowVectorProduct( const IndexType row,
+                                                  const Vector& vector ) const;
 
-   template< typename InVector,
-             typename OutVector >
-   void vectorProduct( const InVector& inVector,
-                       OutVector& outVector ) const;
+      template< typename InVector,
+                typename OutVector >
+      void vectorProduct( const InVector& inVector,
+                          OutVector& outVector ) const;
 
-   template< typename Real2, typename Index2 >
-   void addMatrix( const Tridiagonal< Real2, Device, Index2 >& matrix,
-                   const RealType& matrixMultiplicator = 1.0,
-                   const RealType& thisMatrixMultiplicator = 1.0 );
+      template< typename Real2, typename Index2 >
+      void addMatrix( const Tridiagonal< Real2, Device, Index2 >& matrix,
+                      const RealType& matrixMultiplicator = 1.0,
+                      const RealType& thisMatrixMultiplicator = 1.0 );
 
-   template< typename Real2, typename Index2 >
-   void getTransposition( const Tridiagonal< Real2, Device, Index2 >& matrix,
-                          const RealType& matrixMultiplicator = 1.0 );
+      template< typename Real2, typename Index2 >
+      void getTransposition( const Tridiagonal< Real2, Device, Index2 >& matrix,
+                             const RealType& matrixMultiplicator = 1.0 );
 
-   template< typename Vector1, typename Vector2 >
-   __cuda_callable__
-   void performSORIteration( const Vector1& b,
-                             const IndexType row,
-                             Vector2& x,
-                             const RealType& omega = 1.0 ) const;
+      template< typename Vector1, typename Vector2 >
+      __cuda_callable__
+      void performSORIteration( const Vector1& b,
+                                const IndexType row,
+                                Vector2& x,
+                                const RealType& omega = 1.0 ) const;
 
-   // copy assignment
-   Tridiagonal& operator=( const Tridiagonal& matrix );
+      // copy assignment
+      Tridiagonal& operator=( const Tridiagonal& matrix );
 
-   // cross-device copy assignment
-   template< typename Real2, typename Device2, typename Index2,
-             typename = typename Enabler< Device2 >::type >
-   Tridiagonal& operator=( const Tridiagonal< Real2, Device2, Index2 >& matrix );
+      // cross-device copy assignment
+      template< typename Real2, typename Device2, typename Index2,
+                typename = typename Enabler< Device2 >::type >
+      Tridiagonal& operator=( const Tridiagonal< Real2, Device2, Index2 >& matrix );
 
-   void save( File& file ) const;
+      void save( File& file ) const;
 
-   void load( File& file );
+      void load( File& file );
 
-   void save( const String& fileName ) const;
+      void save( const String& fileName ) const;
 
-   void load( const String& fileName );
+      void load( const String& fileName );
 
-   void print( std::ostream& str ) const;
+      void print( std::ostream& str ) const;
 
-protected:
+   protected:
 
-   __cuda_callable__
-   IndexType getElementIndex( const IndexType row,
-                              const IndexType column ) const;
+      __cuda_callable__
+      IndexType getElementIndex( const IndexType row,
+                                 const IndexType column ) const;
 
-   Containers::Vector< RealType, DeviceType, IndexType > values;
+      Containers::Vector< RealType, DeviceType, IndexType > values;
 
-   typedef TridiagonalDeviceDependentCode< DeviceType > DeviceDependentCode;
-   friend class TridiagonalDeviceDependentCode< DeviceType >;
+      typedef TridiagonalDeviceDependentCode< DeviceType > DeviceDependentCode;
+      friend class TridiagonalDeviceDependentCode< DeviceType >;
 };
 
 } // namespace Matrices
 } // namespace TNL
 
-#include <TNL/Matrices/Tridiagonal_impl.h>
+#include <TNL/Matrices/Tridiagonal.hpp>
diff --git a/src/TNL/Matrices/Tridiagonal.hpp b/src/TNL/Matrices/Tridiagonal.hpp
index 2752f6850..c36edec0b 100644
--- a/src/TNL/Matrices/Tridiagonal.hpp
+++ b/src/TNL/Matrices/Tridiagonal.hpp
@@ -15,74 +15,81 @@
 #include <TNL/Exceptions/NotImplementedError.h>
 
 namespace TNL {
-namespace Matrices {   
+namespace Matrices {
 
 template< typename Device >
 class TridiagonalDeviceDependentCode;
 
 template< typename Real,
           typename Device,
-          typename Index >
-Tridiagonal< Real, Device, Index >::Tridiagonal()
-{
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-String Tridiagonal< Real, Device, Index >::getType()
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+Tridiagonal()
 {
-   return String( "Matrices::Tridiagonal< " ) +
-          String( TNL::getType< Real >() ) +
-          String( ", " ) +
-          String( Device :: getDeviceType() ) +
-          String( ", " ) +
-          String( TNL::getType< Index >() ) +
-          String( " >" );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-String Tridiagonal< Real, Device, Index >::getTypeVirtual() const
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+Tridiagonal( const IndexType rows, const IndexType columns )
 {
-   return this->getType();
+   this->setDimensions( rows, columns );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-String Tridiagonal< Real, Device, Index >::getSerializationType()
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+String
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+getSerializationType()
 {
    return String( "Matrices::Tridiagonal< " ) +
-          getType< RealType >() + ", " +
-          getType< Device >() + ", " +
-          getType< IndexType >() + " >";
+          TNL::getSerializationType< RealType >() + ", [any_device], " +
+          TNL::getSerializationType< IndexType >() + ", " +
+          ( RowMajorOrder ? "true" : "false" ) + ", [any_allocator] >";
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-String Tridiagonal< Real, Device, Index >::getSerializationTypeVirtual() const
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+String
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+getSerializationTypeVirtual() const
 {
    return this->getSerializationType();
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-void Tridiagonal< Real, Device, Index >::setDimensions( const IndexType rows,
-                                                        const IndexType columns )
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+void
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+setDimensions( const IndexType rows, const IndexType columns )
 {
    Matrix< Real, Device, Index >::setDimensions( rows, columns );
    values.setSize( 3*min( rows, columns ) );
-   this->values.setValue( 0.0 );
+   this->values = 0.0;
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-void Tridiagonal< Real, Device, Index >::setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths )
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+void
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths )
 {
    if( rowLengths[ 0 ] > 2 )
       throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." );
@@ -103,17 +110,12 @@ void Tridiagonal< Real, Device, Index >::setCompressedRowLengths( ConstCompresse
 
 template< typename Real,
           typename Device,
-          typename Index >
-Index Tridiagonal< Real, Device, Index >::getRowLength( const IndexType row ) const
-{
-   return this->getRowLengthFast( row );
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-__cuda_callable__
-Index Tridiagonal< Real, Device, Index >::getRowLengthFast( const IndexType row ) const
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+Index
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+getRowLength( const IndexType row ) const
 {
    const IndexType diagonalLength = min( this->getRows(), this->getColumns() );
    if( row == 0 )
@@ -129,46 +131,64 @@ Index Tridiagonal< Real, Device, Index >::getRowLengthFast( const IndexType row
 
 template< typename Real,
           typename Device,
-          typename Index >
-Index Tridiagonal< Real, Device, Index >::getMaxRowLength() const
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+Index
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+getMaxRowLength() const
 {
    return 3;
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
    template< typename Real2, typename Device2, typename Index2 >
-void Tridiagonal< Real, Device, Index >::setLike( const Tridiagonal< Real2, Device2, Index2 >& m )
+void
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+setLike( const Tridiagonal< Real2, Device2, Index2 >& m )
 {
    this->setDimensions( m.getRows(), m.getColumns() );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-Index Tridiagonal< Real, Device, Index >::getNumberOfMatrixElements() const
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+Index
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+getNumberOfMatrixElements() const
 {
    return 3 * min( this->getRows(), this->getColumns() );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-Index Tridiagonal< Real, Device, Index > :: getNumberOfNonzeroMatrixElements() const
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+Index
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+getNumberOfNonzeroMatrixElements() const
 {
-   IndexType nonzeroElements = 0;
-   for( IndexType i = 0; i < this->values.getSize(); i++ )
-      if( this->values.getElement( i ) != 0 )
-         nonzeroElements++;
-   return nonzeroElements;
+   const auto values_view = this->values.getConstView();
+   auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType {
+      return ( values_view[ i ] != 0.0 );
+   };
+   return Algorithms::Reduction< DeviceType >::reduce( this->values.getSize(), std::plus<>{}, fetch, 0 );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
 Index
-Tridiagonal< Real, Device, Index >::
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
 getMaxRowlength() const
 {
    return 3;
@@ -176,8 +196,12 @@ getMaxRowlength() const
 
 template< typename Real,
           typename Device,
-          typename Index >
-void Tridiagonal< Real, Device, Index >::reset()
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+void
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+reset()
 {
    Matrix< Real, Device, Index >::reset();
    this->values.reset();
@@ -185,48 +209,55 @@ void Tridiagonal< Real, Device, Index >::reset()
 
 template< typename Real,
           typename Device,
-          typename Index >
-   template< typename Real2, typename Device2, typename Index2 >
-bool Tridiagonal< Real, Device, Index >::operator == ( const Tridiagonal< Real2, Device2, Index2 >& matrix ) const
-{
-   return this->values == matrix.values;
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-   template< typename Real2, typename Device2, typename Index2 >
-bool Tridiagonal< Real, Device, Index >::operator != ( const Tridiagonal< Real2, Device2, Index2 >& matrix ) const
-{
-   return this->values != matrix.values;
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+   template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ >
+bool
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+operator == ( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix ) const
+{
+   if( RowMajorOrder == RowMajorOrder_ )
+      return this->values == matrix.values;
+   else
+   {
+      TNL_ASSERT( false, "TODO" );
+   }
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-void Tridiagonal< Real, Device, Index >::setValue( const RealType& v )
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+   template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ >
+bool
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+operator != ( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix ) const
 {
-   this->values.setValue( v );
+   return ! this->operator==( matrix );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-__cuda_callable__
-bool Tridiagonal< Real, Device, Index >::setElementFast( const IndexType row,
-                                                                  const IndexType column,
-                                                                  const RealType& value )
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+void
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+setValue( const RealType& v )
 {
-   this->values[ this->getElementIndex( row, column ) ] = value;
-   return true;
+   this->values = v;
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-bool Tridiagonal< Real, Device, Index >::setElement( const IndexType row,
-                                                              const IndexType column,
-                                                              const RealType& value )
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+bool
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+setElement( const IndexType row, const IndexType column, const RealType& value )
 {
    this->values.setElement( this->getElementIndex( row, column ), value );
    return true;
@@ -234,159 +265,120 @@ bool Tridiagonal< Real, Device, Index >::setElement( const IndexType row,
 
 template< typename Real,
           typename Device,
-          typename Index >
-__cuda_callable__
-bool Tridiagonal< Real, Device, Index >::addElementFast( const IndexType row,
-                                                                  const IndexType column,
-                                                                  const RealType& value,
-                                                                  const RealType& thisElementMultiplicator )
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+bool
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+addElement( const IndexType row,
+            const IndexType column,
+            const RealType& value,
+            const RealType& thisElementMultiplicator )
 {
    const Index i = this->getElementIndex( row, column );
-   this->values[ i ] = thisElementMultiplicator*this->values[ i ] + value;
+   this->values.setElement( i, thisElementMultiplicator * this->values.getElement( i ) + value );
    return true;
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-bool Tridiagonal< Real, Device, Index >::addElement( const IndexType row,
-                                                              const IndexType column,
-                                                              const RealType& value,
-                                                              const RealType& thisElementMultiplicator )
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+Real
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+getElement( const IndexType row, const IndexType column ) const
 {
-   const Index i = this->getElementIndex( row, column );
-   this->values.setElement( i, thisElementMultiplicator * this->values.getElement( i ) + value );
-   return true;
+   if( abs( column - row ) > 1 )
+      return 0.0;
+   return this->values.getElement( this->getElementIndex( row, column ) );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-__cuda_callable__
-bool Tridiagonal< Real, Device, Index >::setRowFast( const IndexType row,
-                                                              const IndexType* columns,
-                                                              const RealType* values,
-                                                              const IndexType elements )
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+   template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+void
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const
 {
-   TNL_ASSERT( elements <= this->columns,
-            std::cerr << " elements = " << elements
-                 << " this->columns = " << this->columns );
-   return this->addRowFast( row, columns, values, elements, 0.0 );
+
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-bool Tridiagonal< Real, Device, Index >::setRow( const IndexType row,
-                                                          const IndexType* columns,
-                                                          const RealType* values,
-                                                          const IndexType elements )
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+   template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+void
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const
 {
-   TNL_ASSERT( elements <= this->columns,
-            std::cerr << " elements = " << elements
-                 << " this->columns = " << this->columns );
-   return this->addRow( row, columns, values, elements, 0.0 );
-}
 
-template< typename Real,
-          typename Device,
-          typename Index >
-__cuda_callable__
-bool Tridiagonal< Real, Device, Index >::addRowFast( const IndexType row,
-                                                              const IndexType* columns,
-                                                              const RealType* values,
-                                                              const IndexType elements,
-                                                              const RealType& thisRowMultiplicator )
-{
-   TNL_ASSERT( elements <= this->columns,
-            std::cerr << " elements = " << elements
-                 << " this->columns = " << this->columns );
-   if( elements > 3 )
-      return false;
-   for( IndexType i = 0; i < elements; i++ )
-   {
-      const IndexType& column = columns[ i ];
-      if( column < row - 1 || column > row + 1 )
-         return false;
-      addElementFast( row, column, values[ i ], thisRowMultiplicator );
-   }
-   return true;
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-bool Tridiagonal< Real, Device, Index >::addRow( const IndexType row,
-                                                          const IndexType* columns,
-                                                          const RealType* values,
-                                                          const IndexType elements,
-                                                          const RealType& thisRowMultiplicator )
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+   template< typename Function >
+void
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+forRows( IndexType first, IndexType last, Function& function ) const
 {
-   TNL_ASSERT( elements <= this->columns,
-            std::cerr << " elements = " << elements
-                 << " this->columns = " << this->columns );
-   if( elements > 3 )
-      return false;
-   for( IndexType i = 0; i < elements; i++ )
-   {
-      const IndexType column = columns[ i ];
-      if( column < row - 1 || column > row + 1 )
-         return false;
-      addElement( row, column, values[ i ], thisRowMultiplicator );
-   }
-   return true;
+
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-__cuda_callable__
-Real Tridiagonal< Real, Device, Index >::getElementFast( const IndexType row,
-                                                                  const IndexType column ) const
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+  template< typename Function >
+void
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+forRows( IndexType first, IndexType last, Function& function )
 {
-   if( abs( column - row ) > 1 )
-      return 0.0;
-   return this->values[ this->getElementIndex( row, column ) ];
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-Real Tridiagonal< Real, Device, Index >::getElement( const IndexType row,
-                                                              const IndexType column ) const
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+   template< typename Function >
+void
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+forAllRows( Function& function ) const
 {
-   if( abs( column - row ) > 1 )
-      return 0.0;
-   return this->values.getElement( this->getElementIndex( row, column ) );
+
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-__cuda_callable__
-void Tridiagonal< Real, Device, Index >::getRowFast( const IndexType row,
-                                                              IndexType* columns,
-                                                              RealType* values ) const
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+   template< typename Function >
+void
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+forAllRows( Function& function )
 {
-   IndexType elementPointer( 0 );
-   for( IndexType i = -1; i <= 1; i++ )
-   {
-      const IndexType column = row + 1;
-      if( column >= 0 && column < this->getColumns() )
-      {
-         columns[ elementPointer ] = column;
-         values[ elementPointer ] = this->values[ this->getElementIndex( row, column ) ];
-         elementPointer++;
-      }
-   }
+
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
 __cuda_callable__
-typename Tridiagonal< Real, Device, Index >::MatrixRow
-Tridiagonal< Real, Device, Index >::
+typename Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::MatrixRow
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
 getRow( const IndexType rowIndex )
 {
    if( std::is_same< Device, Devices::Host >::value )
@@ -403,10 +395,12 @@ getRow( const IndexType rowIndex )
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
 __cuda_callable__
-const typename Tridiagonal< Real, Device, Index >::MatrixRow
-Tridiagonal< Real, Device, Index >::
+const typename Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::MatrixRow
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
 getRow( const IndexType rowIndex ) const
 {
    throw Exceptions::NotImplementedError();
@@ -415,10 +409,12 @@ getRow( const IndexType rowIndex ) const
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
 template< typename Vector >
 __cuda_callable__
-typename Vector::RealType Tridiagonal< Real, Device, Index >::rowVectorProduct( const IndexType row,
+typename Vector::RealType Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::rowVectorProduct( const IndexType row,
                                                                                          const Vector& vector ) const
 {
    return TridiagonalDeviceDependentCode< Device >::
@@ -430,10 +426,12 @@ typename Vector::RealType Tridiagonal< Real, Device, Index >::rowVectorProduct(
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
    template< typename InVector,
              typename OutVector >
-void Tridiagonal< Real, Device, Index >::vectorProduct( const InVector& inVector,
+void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::vectorProduct( const InVector& inVector,
                                                                  OutVector& outVector ) const
 {
    TNL_ASSERT( this->getColumns() == inVector.getSize(),
@@ -448,9 +446,11 @@ void Tridiagonal< Real, Device, Index >::vectorProduct( const InVector& inVector
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
    template< typename Real2, typename Index2 >
-void Tridiagonal< Real, Device, Index >::addMatrix( const Tridiagonal< Real2, Device, Index2 >& matrix,
+void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::addMatrix( const Tridiagonal< Real2, Device, Index2 >& matrix,
                                                     const RealType& matrixMultiplicator,
                                                     const RealType& thisMatrixMultiplicator )
 {
@@ -494,9 +494,11 @@ __global__ void TridiagonalTranspositionCudaKernel( const Tridiagonal< Real2, De
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
    template< typename Real2, typename Index2 >
-void Tridiagonal< Real, Device, Index >::getTransposition( const Tridiagonal< Real2, Device, Index2 >& matrix,
+void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::getTransposition( const Tridiagonal< Real2, Device, Index2 >& matrix,
                                                                     const RealType& matrixMultiplicator )
 {
    TNL_ASSERT( this->getRows() == matrix.getRows(),
@@ -541,10 +543,12 @@ void Tridiagonal< Real, Device, Index >::getTransposition( const Tridiagonal< Re
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
    template< typename Vector1, typename Vector2 >
 __cuda_callable__
-void Tridiagonal< Real, Device, Index >::performSORIteration( const Vector1& b,
+void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::performSORIteration( const Vector1& b,
                                                               const IndexType row,
                                                               Vector2& x,
                                                               const RealType& omega ) const
@@ -561,9 +565,11 @@ void Tridiagonal< Real, Device, Index >::performSORIteration( const Vector1& b,
 // copy assignment
 template< typename Real,
           typename Device,
-          typename Index >
-Tridiagonal< Real, Device, Index >&
-Tridiagonal< Real, Device, Index >::operator=( const Tridiagonal& matrix )
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >&
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::operator=( const Tridiagonal& matrix )
 {
    this->setLike( matrix );
    this->values = matrix.values;
@@ -573,10 +579,12 @@ Tridiagonal< Real, Device, Index >::operator=( const Tridiagonal& matrix )
 // cross-device copy assignment
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
    template< typename Real2, typename Device2, typename Index2, typename >
-Tridiagonal< Real, Device, Index >&
-Tridiagonal< Real, Device, Index >::operator=( const Tridiagonal< Real2, Device2, Index2 >& matrix )
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >&
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::operator=( const Tridiagonal< Real2, Device2, Index2 >& matrix )
 {
    static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Cuda >::value,
                   "unknown device" );
@@ -591,8 +599,10 @@ Tridiagonal< Real, Device, Index >::operator=( const Tridiagonal< Real2, Device2
 
 template< typename Real,
           typename Device,
-          typename Index >
-void Tridiagonal< Real, Device, Index >::save( File& file ) const
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::save( File& file ) const
 {
    Matrix< Real, Device, Index >::save( file );
    file << this->values;
@@ -600,8 +610,10 @@ void Tridiagonal< Real, Device, Index >::save( File& file ) const
 
 template< typename Real,
           typename Device,
-          typename Index >
-void Tridiagonal< Real, Device, Index >::load( File& file )
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::load( File& file )
 {
    Matrix< Real, Device, Index >::load( file );
    file >> this->values;
@@ -609,24 +621,30 @@ void Tridiagonal< Real, Device, Index >::load( File& file )
 
 template< typename Real,
           typename Device,
-          typename Index >
-void Tridiagonal< Real, Device, Index >::save( const String& fileName ) const
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::save( const String& fileName ) const
 {
    Object::save( fileName );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-void Tridiagonal< Real, Device, Index >::load( const String& fileName )
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::load( const String& fileName )
 {
    Object::load( fileName );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-void Tridiagonal< Real, Device, Index >::print( std::ostream& str ) const
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::print( std::ostream& str ) const
 {
    for( IndexType row = 0; row < this->getRows(); row++ )
    {
@@ -640,9 +658,11 @@ void Tridiagonal< Real, Device, Index >::print( std::ostream& str ) const
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
 __cuda_callable__
-Index Tridiagonal< Real, Device, Index >::getElementIndex( const IndexType row,
+Index Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::getElementIndex( const IndexType row,
                                                                     const IndexType column ) const
 {
    TNL_ASSERT( row >= 0 && column >= 0 && row < this->rows && column < this->rows,
@@ -694,7 +714,7 @@ class TridiagonalDeviceDependentCode< Devices::Host >
                 typename Index,
                 typename InVector,
                 typename OutVector >
-      static void vectorProduct( const Tridiagonal< Real, Device, Index >& matrix,
+      static void vectorProduct( const Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >& matrix,
                                  const InVector& inVector,
                                  OutVector& outVector )
       {
@@ -710,7 +730,7 @@ template<>
 class TridiagonalDeviceDependentCode< Devices::Cuda >
 {
    public:
- 
+
       typedef Devices::Cuda Device;
 
       template< typename Index >
@@ -747,7 +767,7 @@ class TridiagonalDeviceDependentCode< Devices::Cuda >
                 typename Index,
                 typename InVector,
                 typename OutVector >
-      static void vectorProduct( const Tridiagonal< Real, Device, Index >& matrix,
+      static void vectorProduct( const Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >& matrix,
                                  const InVector& inVector,
                                  OutVector& outVector )
       {
-- 
GitLab


From 9a565126aca84cb100eb7d19b2274b6ff10b2ddd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Thu, 9 Jan 2020 22:11:58 +0100
Subject: [PATCH 079/179] Reimplementing tridiagonal matrix.

---
 src/TNL/Matrices/SparseMatrix.hpp             |   1 -
 src/TNL/Matrices/Tridiagonal.h                |  62 +-
 src/TNL/Matrices/Tridiagonal.hpp              | 303 +++++--
 src/TNL/Matrices/TridiagonalMatrixRowView.h   |  59 ++
 src/TNL/Matrices/TridiagonalMatrixRowView.hpp |  75 ++
 src/TNL/Matrices/TridiagonalMatrixView.h      | 255 +++---
 src/TNL/Matrices/TridiagonalMatrixView.hpp    | 798 ++++++++----------
 src/TNL/Matrices/TridiagonalRow.h             |  51 --
 src/TNL/Matrices/TridiagonalRow_impl.h        |  78 --
 .../details/TridiagonalMatrixIndexer.h        |  90 ++
 .../Matrices/TridiagonalMatrixTest.h          |   6 +-
 11 files changed, 964 insertions(+), 814 deletions(-)
 create mode 100644 src/TNL/Matrices/TridiagonalMatrixRowView.h
 create mode 100644 src/TNL/Matrices/TridiagonalMatrixRowView.hpp
 delete mode 100644 src/TNL/Matrices/TridiagonalRow.h
 delete mode 100644 src/TNL/Matrices/TridiagonalRow_impl.h
 create mode 100644 src/TNL/Matrices/details/TridiagonalMatrixIndexer.h

diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp
index 8dbe53f4d..6189d43d3 100644
--- a/src/TNL/Matrices/SparseMatrix.hpp
+++ b/src/TNL/Matrices/SparseMatrix.hpp
@@ -515,7 +515,6 @@ forRows( IndexType first, IndexType last, Function& function )
       return true;
    };
    this->segments.forSegments( first, last, f );
-
 }
 
 template< typename Real,
diff --git a/src/TNL/Matrices/Tridiagonal.h b/src/TNL/Matrices/Tridiagonal.h
index f80bc4c18..51e05c899 100644
--- a/src/TNL/Matrices/Tridiagonal.h
+++ b/src/TNL/Matrices/Tridiagonal.h
@@ -12,15 +12,14 @@
 
 #include <TNL/Matrices/Matrix.h>
 #include <TNL/Containers/Vector.h>
-#include <TNL/Matrices/TridiagonalRow.h>
+#include <TNL/Matrices/TridiagonalMatrixRowView.h>
 #include <TNL/Containers/Segments/Ellpack.h>
+#include <TNL/Matrices/details/TridiagonalMatrixIndexer.h>
+#include <TNL/Matrices/TridiagonalMatrixView.h>
 
 namespace TNL {
 namespace Matrices {
 
-template< typename Device >
-class TridiagonalDeviceDependentCode;
-
 template< typename Real = double,
           typename Device = Devices::Host,
           typename Index = int,
@@ -28,27 +27,23 @@ template< typename Real = double,
           typename RealAllocator = typename Allocators::Default< Device >::template Allocator< Real > >
 class Tridiagonal : public Matrix< Real, Device, Index, RealAllocator >
 {
-   private:
-      // convenient template alias for controlling the selection of copy-assignment operator
-      template< typename Device2 >
-      using Enabler = std::enable_if< ! std::is_same< Device2, Device >::value >;
-
-      // friend class will be needed for templated assignment operators
-      template< typename Real2, typename Device2, typename Index2 >
-      friend class Tridiagonal;
-
    public:
       using RealType = Real;
       using DeviceType = Device;
       using IndexType = Index;
       using RealAllocatorType = RealAllocator;
       using BaseType = Matrix< Real, Device, Index, RealAllocator >;
+      using IndexerType = details::TridiagonalMatrixIndexer< IndexType, RowMajorOrder >;
       using ValuesType = typename BaseType::ValuesVector;
       using ValuesViewType = typename ValuesType::ViewType;
-      //using ViewType = TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >;
-      //using ConstViewType = TridiagonalMatrixView< typename std::add_const< Real >::type, Device, Index, RowMajorOrder >;
-      using RowView = TridiagonalMatrixRowView< SegmentViewType, ValuesViewType >;
+      using ViewType = TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >;
+      using ConstViewType = TridiagonalMatrixView< typename std::add_const< Real >::type, Device, Index, RowMajorOrder >;
+      using RowView = TridiagonalMatrixRowView< ValuesViewType, IndexerType >;
 
+      // TODO: remove this - it is here only for compatibility with original matrix implementation
+      typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector;
+      typedef Containers::VectorView< IndexType, DeviceType, IndexType > CompressedRowLengthsVectorView;
+      typedef typename CompressedRowLengthsVectorView::ConstViewType ConstCompressedRowLengthsVectorView;
 
       template< typename _Real = Real,
                 typename _Device = Device,
@@ -70,7 +65,8 @@ class Tridiagonal : public Matrix< Real, Device, Index, RealAllocator >
       void setDimensions( const IndexType rows,
                           const IndexType columns );
 
-      void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths );
+      //template< typename Vector >
+      void setCompressedRowLengths( const ConstCompressedRowLengthsVectorView rowCapacities );
 
       template< typename Vector >
       void getCompressedRowLengths( Vector& rowLengths ) const;
@@ -80,8 +76,8 @@ class Tridiagonal : public Matrix< Real, Device, Index, RealAllocator >
 
       IndexType getMaxRowLength() const;
 
-      template< typename Real2, typename Device2, typename Index2 >
-      void setLike( const Tridiagonal< Real2, Device2, Index2 >& m );
+      template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ >
+      void setLike( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& m );
 
       IndexType getNumberOfMatrixElements() const;
 
@@ -91,11 +87,15 @@ class Tridiagonal : public Matrix< Real, Device, Index, RealAllocator >
 
       void reset();
 
-      template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ >
-      bool operator == ( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_ >& matrix ) const;
+      template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ >
+      bool operator == ( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix ) const;
+
+      template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ >
+      bool operator != ( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix ) const;
 
-      template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ >
-      bool operator != ( const Tridiagonal< Real_, Device_, Index_ >& matrix ) const;
+      RowView getRow( const IndexType& rowIdx );
+
+      const RowView getRow( const IndexType& rowIdx ) const;
 
       void setValue( const RealType& v );
 
@@ -139,8 +139,8 @@ class Tridiagonal : public Matrix< Real, Device, Index, RealAllocator >
       void vectorProduct( const InVector& inVector,
                           OutVector& outVector ) const;
 
-      template< typename Real2, typename Index2 >
-      void addMatrix( const Tridiagonal< Real2, Device, Index2 >& matrix,
+      template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ >
+      void addMatrix( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix,
                       const RealType& matrixMultiplicator = 1.0,
                       const RealType& thisMatrixMultiplicator = 1.0 );
 
@@ -159,9 +159,8 @@ class Tridiagonal : public Matrix< Real, Device, Index, RealAllocator >
       Tridiagonal& operator=( const Tridiagonal& matrix );
 
       // cross-device copy assignment
-      template< typename Real2, typename Device2, typename Index2,
-                typename = typename Enabler< Device2 >::type >
-      Tridiagonal& operator=( const Tridiagonal< Real2, Device2, Index2 >& matrix );
+      template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ >
+      Tridiagonal& operator=( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix );
 
       void save( File& file ) const;
 
@@ -177,12 +176,9 @@ class Tridiagonal : public Matrix< Real, Device, Index, RealAllocator >
 
       __cuda_callable__
       IndexType getElementIndex( const IndexType row,
-                                 const IndexType column ) const;
-
-      Containers::Vector< RealType, DeviceType, IndexType > values;
+                                 const IndexType localIdx ) const;
 
-      typedef TridiagonalDeviceDependentCode< DeviceType > DeviceDependentCode;
-      friend class TridiagonalDeviceDependentCode< DeviceType >;
+      IndexerType indexer;
 };
 
 } // namespace Matrices
diff --git a/src/TNL/Matrices/Tridiagonal.hpp b/src/TNL/Matrices/Tridiagonal.hpp
index c36edec0b..a7178f86e 100644
--- a/src/TNL/Matrices/Tridiagonal.hpp
+++ b/src/TNL/Matrices/Tridiagonal.hpp
@@ -1,5 +1,5 @@
 /***************************************************************************
-                          Tridiagonal_impl.h  -  description
+                          Tridiagonal.hpp  -  description
                              -------------------
     begin                : Nov 30, 2013
     copyright            : (C) 2013 by Tomas Oberhuber
@@ -41,6 +41,30 @@ Tridiagonal( const IndexType rows, const IndexType columns )
    this->setDimensions( rows, columns );
 }
 
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+auto
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+getView() -> ViewType
+{
+   return ViewType( this->values.getView(), indexer );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+auto
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+getConstView() const -> ConstViewType
+{
+   return ConstViewType( this->values.getConstView(), indexer );
+}
+
 template< typename Real,
           typename Device,
           typename Index,
@@ -78,7 +102,8 @@ Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
 setDimensions( const IndexType rows, const IndexType columns )
 {
    Matrix< Real, Device, Index >::setDimensions( rows, columns );
-   values.setSize( 3*min( rows, columns ) );
+   this->indexer.setDimensions( rows, columns );
+   this->values.setSize( this->indexer.getStorageSize() );
    this->values = 0.0;
 }
 
@@ -87,24 +112,24 @@ template< typename Real,
           typename Index,
           bool RowMajorOrder,
           typename RealAllocator >
+ //  template< typename Vector >
 void
 Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
-setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths )
+setCompressedRowLengths( const ConstCompressedRowLengthsVectorView rowLengths )
 {
-   if( rowLengths[ 0 ] > 2 )
+   if( max( rowLengths ) > 3 )
+      throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." );
+   if( rowLengths.getElement( 0 ) > 2 )
       throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." );
    const IndexType diagonalLength = min( this->getRows(), this->getColumns() );
-   for( Index i = 1; i < diagonalLength-1; i++ )
-      if( rowLengths[ i ] > 3 )
-         throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." );
    if( this->getRows() > this->getColumns() )
-      if( rowLengths[ this->getRows()-1 ] > 1 )
+      if( rowLengths.getElement( this->getRows()-1 ) > 1 )
          throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." );
    if( this->getRows() == this->getColumns() )
-      if( rowLengths[ this->getRows()-1 ] > 2 )
+      if( rowLengths.getElement( this->getRows()-1 ) > 2 )
          throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." );
    if( this->getRows() < this->getColumns() )
-      if( rowLengths[ this->getRows()-1 ] > 3 )
+      if( rowLengths.getElement( this->getRows()-1 ) > 3 )
          throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." );
 }
 
@@ -146,10 +171,10 @@ template< typename Real,
           typename Index,
           bool RowMajorOrder,
           typename RealAllocator >
-   template< typename Real2, typename Device2, typename Index2 >
+   template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ >
 void
 Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
-setLike( const Tridiagonal< Real2, Device2, Index2 >& m )
+setLike( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& m )
 {
    this->setDimensions( m.getRows(), m.getColumns() );
 }
@@ -250,6 +275,32 @@ setValue( const RealType& v )
    this->values = v;
 }
 
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+__cuda_callable__
+auto
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+getRow( const IndexType& rowIdx ) const -> const RowView
+{
+   return RowView( this->values.getView(), this->indexer );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+__cuda_callable__
+auto
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+getRow( const IndexType& rowIdx ) -> RowView
+{
+   return RowView( this->values.getView(), this->indexer );
+}
+
 template< typename Real,
           typename Device,
           typename Index,
@@ -259,6 +310,12 @@ bool
 Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
 setElement( const IndexType row, const IndexType column, const RealType& value )
 {
+   TNL_ASSERT_GE( row, 0, "" );
+   TNL_ASSERT_LT( row, this->getRows(), "" );
+   TNL_ASSERT_GE( column, 0, "" );
+   TNL_ASSERT_LT( column, this->getColumns(), "" );
+   if( abs( row - column ) > 1 )
+      throw std::logic_error( "Wrong matrix element coordinates in tridiagonal matrix." );
    this->values.setElement( this->getElementIndex( row, column ), value );
    return true;
 }
@@ -275,6 +332,12 @@ addElement( const IndexType row,
             const RealType& value,
             const RealType& thisElementMultiplicator )
 {
+   TNL_ASSERT_GE( row, 0, "" );
+   TNL_ASSERT_LT( row, this->getRows(), "" );
+   TNL_ASSERT_GE( column, 0, "" );
+   TNL_ASSERT_LT( column, this->getColumns(), "" );
+   if( abs( row - column ) > 1 )
+      throw std::logic_error( "Wrong matrix element coordinates in tridiagonal matrix." );
    const Index i = this->getElementIndex( row, column );
    this->values.setElement( i, thisElementMultiplicator * this->values.getElement( i ) + value );
    return true;
@@ -289,6 +352,11 @@ Real
 Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
 getElement( const IndexType row, const IndexType column ) const
 {
+   TNL_ASSERT_GE( row, 0, "" );
+   TNL_ASSERT_LT( row, this->getRows(), "" );
+   TNL_ASSERT_GE( column, 0, "" );
+   TNL_ASSERT_LT( column, this->getColumns(), "" );
+
    if( abs( column - row ) > 1 )
       return 0.0;
    return this->values.getElement( this->getElementIndex( row, column ) );
@@ -304,7 +372,46 @@ void
 Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
 rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const
 {
-
+   const auto values_view = this->values.getConstView();
+   const auto indexer_ = this->indexer;
+   const auto rows = this->getRows();
+   const auto columns = this->getColumns();
+   const auto size = this->size;
+   auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
+      //bool compute;
+      if( rowIdx == 0 )
+      {
+         IndexType i_0 = indexer.getGlobalIndex( 0, 0 );
+         IndexType i_1 = indexer.getGlobalIndex( 0, 1 );
+         keep( 0, reduce( fetch( 0, 0, i_0, values_view[ i_0 ] ),
+                          fetch( 0, 1, i_1, values_view[ i_1 ] ) ) );
+         return;
+      }
+      if( rowIdx < size || columns > rows )
+      {
+         IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 );
+         IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 );
+         IndexType i_2 = indexer.getGlobalIndex( rowIdx, 2 );
+
+         keep( rowIdx, reduce( reduce( fetch( rowIdx, rowIdx - 1, i_0, values_view[ i_0 ] ),
+                                       fetch( rowIdx, rowIdx, i_1, values_view[ i_1 ] ) ),
+                               fetch( rowIdx, rowIdx + 1, i_2, values_view[ i_2] ) ) );
+         return;
+      }
+      if( rows == columns )
+      {
+         IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 );
+         IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 );
+         keep( rowIdx, reduce( fetch( rowIdx, rowIdx - 1, i_0, values_view[ i_0 ] ),
+                               fetch( rowIdx, rowIdx, i_1, values_view[ i_1 ] ) ) );
+      }
+      else
+      {
+         IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 );
+         keep( rowIdx, fetch( rowIdx, rowIdx, i_0, values_view[ i_0 ] ) );
+      }
+   };
+   Algorithms::ParallelFor< DeviceType >::exec( first, last, f );
 }
 
 template< typename Real,
@@ -317,7 +424,7 @@ void
 Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
 allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const
 {
-
+   this->rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero );
 }
 
 template< typename Real,
@@ -330,7 +437,45 @@ void
 Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
 forRows( IndexType first, IndexType last, Function& function ) const
 {
-
+   const auto values_view = this->values.getConstView();
+   const auto indexer_ = this->indexer;
+   const auto rows = this->getRows();
+   const auto columns = this->getColumns();
+   const auto size = this->size;
+   auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
+      //bool compute;
+      if( rowIdx == 0 )
+      {
+         IndexType i_0 = indexer.getGlobalIndex( 0, 0 );
+         IndexType i_1 = indexer.getGlobalIndex( 0, 1 );
+         function( 0, 1, rowIdx,     values_view[ i_0 ] );
+         function( 0, 2, rowIdx + 1, values_view[ i_1 ] );
+         return;
+      }
+      if( rowIdx < size || columns > rows )
+      {
+         IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 );
+         IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 );
+         IndexType i_2 = indexer.getGlobalIndex( rowIdx, 2 );
+         function( rowIdx, 0, rowIdx - 1, values_view[ i_0 ] );
+         function( rowIdx, 1, rowIdx,     values_view[ i_1 ] );
+         function( rowIdx, 2, rowIdx + 1, values_view[ i_2 ] );
+         return;
+      }
+      if( rows == columns )
+      {
+         IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 );
+         IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 );
+         function( rowIdx, 0, rowIdx - 1, values_view[ i_0 ] );
+         function( rowIdx, 1, rowIdx,     values_view[ i_1 ] );
+      }
+      else
+      {
+         IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 );
+         function( rowIdx, 0, rowIdx, values_view[ i_0 ] );
+      }
+   };
+   Algorithms::ParallelFor< DeviceType >::exec( first, last, f );
 }
 
 template< typename Real,
@@ -343,6 +488,45 @@ void
 Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
 forRows( IndexType first, IndexType last, Function& function )
 {
+   const auto values_view = this->values.getConstView();
+   const auto indexer_ = this->indexer;
+   const auto rows = this->getRows();
+   const auto columns = this->getColumns();
+   const auto size = this->size;
+   auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
+      //bool compute;
+      if( rowIdx == 0 )
+      {
+         IndexType i_0 = indexer.getGlobalIndex( 0, 0 );
+         IndexType i_1 = indexer.getGlobalIndex( 0, 1 );
+         function( 0, 1, rowIdx,     values_view[ i_0 ] );
+         function( 0, 2, rowIdx + 1, values_view[ i_1 ] );
+         return;
+      }
+      if( rowIdx < size || columns > rows )
+      {
+         IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 );
+         IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 );
+         IndexType i_2 = indexer.getGlobalIndex( rowIdx, 2 );
+         function( rowIdx, 0, rowIdx - 1, values_view[ i_0 ] );
+         function( rowIdx, 1, rowIdx,     values_view[ i_1 ] );
+         function( rowIdx, 2, rowIdx + 1, values_view[ i_2 ] );
+         return;
+      }
+      if( rows == columns )
+      {
+         IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 );
+         IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 );
+         function( rowIdx, 0, rowIdx - 1, values_view[ i_0 ] );
+         function( rowIdx, 1, rowIdx,     values_view[ i_1 ] );
+      }
+      else
+      {
+         IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 );
+         function( rowIdx, 0, rowIdx, values_view[ i_0 ] );
+      }
+   };
+   Algorithms::ParallelFor< DeviceType >::exec( first, last, f );
 }
 
 template< typename Real,
@@ -355,7 +539,7 @@ void
 Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
 forAllRows( Function& function ) const
 {
-
+   this->forRows( 0, this->getRows(), function );
 }
 
 template< typename Real,
@@ -368,45 +552,9 @@ void
 Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
 forAllRows( Function& function )
 {
-
-}
-
-template< typename Real,
-          typename Device,
-          typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
-__cuda_callable__
-typename Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::MatrixRow
-Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
-getRow( const IndexType rowIndex )
-{
-   if( std::is_same< Device, Devices::Host >::value )
-      return MatrixRow( &this->values.getData()[ this->getElementIndex( rowIndex, rowIndex ) ],
-                        rowIndex,
-                        this->getColumns(),
-                        1 );
-   if( std::is_same< Device, Devices::Cuda >::value )
-      return MatrixRow( &this->values.getData()[ this->getElementIndex( rowIndex, rowIndex ) ],
-                        rowIndex,
-                        this->getColumns(),
-                        this->rows );
+   this->forRows( 0, this->getRows(), function );
 }
 
-template< typename Real,
-          typename Device,
-          typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
-__cuda_callable__
-const typename Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::MatrixRow
-Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
-getRow( const IndexType rowIndex ) const
-{
-   throw Exceptions::NotImplementedError();
-}
-
-
 template< typename Real,
           typename Device,
           typename Index,
@@ -414,8 +562,9 @@ template< typename Real,
           typename RealAllocator >
 template< typename Vector >
 __cuda_callable__
-typename Vector::RealType Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::rowVectorProduct( const IndexType row,
-                                                                                         const Vector& vector ) const
+typename Vector::RealType 
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+rowVectorProduct( const IndexType row, const Vector& vector ) const
 {
    return TridiagonalDeviceDependentCode< Device >::
              rowVectorProduct( this->rows,
@@ -431,8 +580,9 @@ template< typename Real,
           typename RealAllocator >
    template< typename InVector,
              typename OutVector >
-void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::vectorProduct( const InVector& inVector,
-                                                                 OutVector& outVector ) const
+void 
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+vectorProduct( const InVector& inVector, OutVector& outVector ) const
 {
    TNL_ASSERT( this->getColumns() == inVector.getSize(),
             std::cerr << "Matrix columns: " << this->getColumns() << std::endl
@@ -441,7 +591,7 @@ void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::vectorPro
                std::cerr << "Matrix rows: " << this->getRows() << std::endl
                     << "Vector size: " << outVector.getSize() << std::endl );
 
-   DeviceDependentCode::vectorProduct( *this, inVector, outVector );
+   //DeviceDependentCode::vectorProduct( *this, inVector, outVector );
 }
 
 template< typename Real,
@@ -449,10 +599,12 @@ template< typename Real,
           typename Index,
           bool RowMajorOrder,
           typename RealAllocator >
-   template< typename Real2, typename Index2 >
-void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::addMatrix( const Tridiagonal< Real2, Device, Index2 >& matrix,
-                                                    const RealType& matrixMultiplicator,
-                                                    const RealType& thisMatrixMultiplicator )
+   template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ >
+void
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+addMatrix( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix,
+           const RealType& matrixMultiplicator,
+           const RealType& thisMatrixMultiplicator )
 {
    TNL_ASSERT( this->getRows() == matrix.getRows(),
             std::cerr << "This matrix columns: " << this->getColumns() << std::endl
@@ -582,13 +734,14 @@ template< typename Real,
           typename Index,
           bool RowMajorOrder,
           typename RealAllocator >
-   template< typename Real2, typename Device2, typename Index2, typename >
+   template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ >
 Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >&
-Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::operator=( const Tridiagonal< Real2, Device2, Index2 >& matrix )
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+operator=( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix )
 {
    static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Cuda >::value,
                   "unknown device" );
-   static_assert( std::is_same< Device2, Devices::Host >::value || std::is_same< Device2, Devices::Cuda >::value,
+   static_assert( std::is_same< Device_, Devices::Host >::value || std::is_same< Device_, Devices::Cuda >::value,
                   "unknown device" );
 
    this->setLike( matrix );
@@ -605,7 +758,6 @@ template< typename Real,
 void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::save( File& file ) const
 {
    Matrix< Real, Device, Index >::save( file );
-   file << this->values;
 }
 
 template< typename Real,
@@ -616,7 +768,7 @@ template< typename Real,
 void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::load( File& file )
 {
    Matrix< Real, Device, Index >::load( file );
-   file >> this->values;
+   this->indexer.setDimensions( this->getRows(), this->getColumns() );
 }
 
 template< typename Real,
@@ -662,17 +814,17 @@ template< typename Real,
           bool RowMajorOrder,
           typename RealAllocator >
 __cuda_callable__
-Index Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::getElementIndex( const IndexType row,
-                                                                    const IndexType column ) const
+Index Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+getElementIndex( const IndexType row, const IndexType localIdx ) const
 {
-   TNL_ASSERT( row >= 0 && column >= 0 && row < this->rows && column < this->rows,
-              std::cerr << " this->rows = " << this->rows
-                   << " row = " << row << " column = " << column );
-   TNL_ASSERT( abs( row - column ) < 2,
-              std::cerr << "row = " << row << " column = " << column << std::endl );
-   return TridiagonalDeviceDependentCode< Device >::getElementIndex( this->rows, row, column );
+   TNL_ASSERT_GE( row, 0, "" );
+   TNL_ASSERT_LT( row, this->getRows(), "" );
+   TNL_ASSERT_GE( localIdx, 0, "" );
+   TNL_ASSERT_LT( localIdx, 3, "" );
+   return this->indexer.getGlobalIndex( row, localIdx );
 }
 
+/*
 template<>
 class TridiagonalDeviceDependentCode< Devices::Host >
 {
@@ -774,6 +926,7 @@ class TridiagonalDeviceDependentCode< Devices::Cuda >
          MatrixVectorProductCuda( matrix, inVector, outVector );
       }
 };
+ */
 
 } // namespace Matrices
 } // namespace TNL
diff --git a/src/TNL/Matrices/TridiagonalMatrixRowView.h b/src/TNL/Matrices/TridiagonalMatrixRowView.h
new file mode 100644
index 000000000..e77d826e0
--- /dev/null
+++ b/src/TNL/Matrices/TridiagonalMatrixRowView.h
@@ -0,0 +1,59 @@
+/***************************************************************************
+                          TridiagonalMatrixRowView.h  -  description
+                             -------------------
+    begin                : Dec 31, 2014
+    copyright            : (C) 2014 by oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+namespace TNL {
+namespace Matrices {   
+
+template< typename ValuesView,
+          typename Indexer >
+class TridiagonalMatrixRowView
+{
+   public:
+
+      using RealType = typename ValuesView::RealType;
+      using IndexType = typename ValuesView::IndexType;
+      using ValuesViewType = ValuesView;
+      using IndexerType = Indexer;
+
+      __cuda_callable__
+      TridiagonalMatrixRowView( const IndexType rowIdx,
+                                const ValuesViewType& values,
+                                const IndexerType& indexer );
+
+      __cuda_callable__
+      IndexType getSize() const;
+
+      __cuda_callable__
+      const IndexType getColumnIndex( const IndexType localIdx ) const;
+
+      __cuda_callable__
+      const RealType& getValue( const IndexType localIdx ) const;
+
+      __cuda_callable__
+      RealType& getValue( const IndexType localIdx );
+
+      __cuda_callable__
+      void setElement( const IndexType localIdx,
+                       const RealType& value );
+   protected:
+
+      IndexType rowIdx;
+
+      ValuesViewType values;
+
+      Indexer indexer;
+};
+
+} // namespace Matrices
+} // namespace TNL
+
+#include <TNL/Matrices/TridiagonalMatrixRowView.hpp>
diff --git a/src/TNL/Matrices/TridiagonalMatrixRowView.hpp b/src/TNL/Matrices/TridiagonalMatrixRowView.hpp
new file mode 100644
index 000000000..ba60876b9
--- /dev/null
+++ b/src/TNL/Matrices/TridiagonalMatrixRowView.hpp
@@ -0,0 +1,75 @@
+/***************************************************************************
+                          TridiagonalMatrixRowView.hpp  -  description
+                             -------------------
+    begin                : Dec 31, 2014
+    copyright            : (C) 2014 by oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+namespace TNL {
+namespace Matrices {   
+
+template< typename ValuesView, typename Indexer >
+__cuda_callable__
+TridiagonalMatrixRowView< ValuesView, Indexer >::
+TridiagonalMatrixRowView( const IndexType rowIdx,
+                          const ValuesViewType& values,
+                          const IndexerType& indexer )
+: rowIdx( rowIdx ), values( values ), indexer( indexer )
+{
+}
+
+template< typename ValuesView, typename Indexer >
+__cuda_callable__
+auto
+TridiagonalMatrixRowView< ValuesView, Indexer >::
+getSize() const -> IndexType
+{
+   return indexer.getRowSize();
+}
+
+template< typename ValuesView, typename Indexer >
+__cuda_callable__
+auto
+TridiagonalMatrixRowView< ValuesView, Indexer >::
+getColumnIndex( const IndexType localIdx ) const -> const IndexType
+{
+   TNL_ASSERT_GE( localIdx, 0, "" );
+   TNL_ASSERT_LT( localIdx, 3, "" );
+   return rowIdx + localIdx - 1;
+}
+
+template< typename ValuesView, typename Indexer >
+__cuda_callable__
+auto
+TridiagonalMatrixRowView< ValuesView, Indexer >::
+getValue( const IndexType localIdx ) const -> const RealType&
+{
+   return this->values[ this->indexer.getGlobalIndex( rowIdx, localIdx ) ];
+}
+
+template< typename ValuesView, typename Indexer >
+__cuda_callable__
+auto
+TridiagonalMatrixRowView< ValuesView, Indexer >::
+getValue( const IndexType localIdx ) -> RealType&
+{
+   return this->values[ this->indexer.getGlobalIndex( rowIdx, localIdx ) ];
+}
+
+template< typename ValuesView, typename Indexer >
+__cuda_callable__
+void 
+TridiagonalMatrixRowView< ValuesView, Indexer >::
+setElement( const IndexType localIdx,
+            const RealType& value )
+{
+   this->values[ indexer.getGlobalIndex( rowIdx, localIdx ) ] = value;
+}
+
+} // namespace Matrices
+} // namespace TNL
diff --git a/src/TNL/Matrices/TridiagonalMatrixView.h b/src/TNL/Matrices/TridiagonalMatrixView.h
index 3f57fe1c3..05f7663c9 100644
--- a/src/TNL/Matrices/TridiagonalMatrixView.h
+++ b/src/TNL/Matrices/TridiagonalMatrixView.h
@@ -1,8 +1,8 @@
 /***************************************************************************
-                          Tridiagonal.h  -  description
+                          TridiagonalMatrixView.h  -  description
                              -------------------
-    begin                : Nov 30, 2013
-    copyright            : (C) 2013 by Tomas Oberhuber
+    begin                : Jan 9, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber
     email                : tomas.oberhuber@fjfi.cvut.cz
  ***************************************************************************/
 
@@ -10,200 +10,163 @@
 
 #pragma once
 
-#include <TNL/Matrices/Matrix.h>
+#include <TNL/Matrices/MatrixView.h>
 #include <TNL/Containers/Vector.h>
-#include <TNL/Matrices/TridiagonalRow.h>
+#include <TNL/Matrices/TridiagonalMatrixRowView.h>
+#include <TNL/Containers/Segments/Ellpack.h>
+#include <TNL/Matrices/details/TridiagonalMatrixIndexer.h>
 
 namespace TNL {
-namespace Matrices {   
-
-template< typename Device >
-class TridiagonalDeviceDependentCode;
+namespace Matrices {
 
 template< typename Real = double,
           typename Device = Devices::Host,
-          typename Index = int >
-class Tridiagonal : public Matrix< Real, Device, Index >
+          typename Index = int,
+          bool RowMajorOrder = std::is_same< Device, Devices::Host >::value >
+class TridiagonalMatrixView : public MatrixView< Real, Device, Index >
 {
-private:
-   // convenient template alias for controlling the selection of copy-assignment operator
-   template< typename Device2 >
-   using Enabler = std::enable_if< ! std::is_same< Device2, Device >::value >;
-
-   // friend class will be needed for templated assignment operators
-   template< typename Real2, typename Device2, typename Index2 >
-   friend class Tridiagonal;
-
-public:
-   typedef Real RealType;
-   typedef Device DeviceType;
-   typedef Index IndexType;
-   typedef typename Matrix< Real, Device, Index >::CompressedRowLengthsVector CompressedRowLengthsVector;
-   typedef typename Matrix< Real, Device, Index >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView;
-   typedef Matrix< Real, Device, Index > BaseType;
-   typedef TridiagonalRow< Real, Index > MatrixRow;
-
-   template< typename _Real = Real,
-             typename _Device = Device,
-             typename _Index = Index >
-   using Self = Tridiagonal< _Real, _Device, _Index >;
-
-   Tridiagonal();
-
-   static String getSerializationType();
-
-   virtual String getSerializationTypeVirtual() const;
+   public:
+      using RealType = Real;
+      using DeviceType = Device;
+      using IndexType = Index;
+      using BaseType = MatrixView< Real, Device, Index >;
+      using IndexerType = details::TridiagonalMatrixIndexer< IndexType, RowMajorOrder >;
+      using ValuesViewType = typename BaseType::ValuesView;
+      using ViewType = TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >;
+      using ConstViewType = TridiagonalMatrixView< typename std::add_const< Real >::type, Device, Index, RowMajorOrder >;
+      using RowView = TridiagonalMatrixRowView< ValuesViewType, IndexerType >;
 
-   void setDimensions( const IndexType rows,
-                       const IndexType columns );
+      // TODO: remove this - it is here only for compatibility with original matrix implementation
+      typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector;
+      typedef Containers::VectorView< IndexType, DeviceType, IndexType > CompressedRowLengthsVectorView;
+      typedef typename CompressedRowLengthsVectorView::ConstViewType ConstCompressedRowLengthsVectorView;
 
-   void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths );
+      template< typename _Real = Real,
+                typename _Device = Device,
+                typename _Index = Index,
+                bool RowMajorOrder_ = std::is_same< Device, Devices::Host >::value >
+      using Self = TridiagonalMatrixView< _Real, _Device, _Index, RowMajorOrder_ >;
 
-   IndexType getRowLength( const IndexType row ) const;
+      TridiagonalMatrixView();
 
-   __cuda_callable__
-   IndexType getRowLengthFast( const IndexType row ) const;
+      TridiagonalMatrixView( const ValuesViewType& values, const IndexerType& indexer );
 
-   IndexType getMaxRowLength() const;
+      ViewType getView();
 
-   template< typename Real2, typename Device2, typename Index2 >
-   void setLike( const Tridiagonal< Real2, Device2, Index2 >& m );
+      ConstViewType getConstView() const;
 
-   IndexType getNumberOfMatrixElements() const;
+      static String getSerializationType();
 
-   IndexType getNumberOfNonzeroMatrixElements() const;
+      virtual String getSerializationTypeVirtual() const;
 
-   IndexType getMaxRowlength() const;
+      void setDimensions( const IndexType rows,
+                          const IndexType columns );
 
-   void reset();
+      template< typename Vector >
+      void getCompressedRowLengths( Vector& rowLengths ) const;
 
-   template< typename Real2, typename Device2, typename Index2 >
-   bool operator == ( const Tridiagonal< Real2, Device2, Index2 >& matrix ) const;
+      [[deprecated]]
+      IndexType getRowLength( const IndexType row ) const;
 
-   template< typename Real2, typename Device2, typename Index2 >
-   bool operator != ( const Tridiagonal< Real2, Device2, Index2 >& matrix ) const;
+      IndexType getMaxRowLength() const;
 
-   void setValue( const RealType& v );
+      IndexType getNumberOfMatrixElements() const;
 
-   __cuda_callable__
-   bool setElementFast( const IndexType row,
-                        const IndexType column,
-                        const RealType& value );
+      IndexType getNumberOfNonzeroMatrixElements() const;
 
-   bool setElement( const IndexType row,
-                    const IndexType column,
-                    const RealType& value );
+      IndexType getMaxRowlength() const;
 
-   __cuda_callable__
-   bool addElementFast( const IndexType row,
-                        const IndexType column,
-                        const RealType& value,
-                        const RealType& thisElementMultiplicator = 1.0 );
+      template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ >
+      bool operator == ( const TridiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix ) const;
 
-   bool addElement( const IndexType row,
-                    const IndexType column,
-                    const RealType& value,
-                    const RealType& thisElementMultiplicator = 1.0 );
+      template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ >
+      bool operator != ( const TridiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix ) const;
 
-   __cuda_callable__
-   bool setRowFast( const IndexType row,
-                    const IndexType* columns,
-                    const RealType* values,
-                    const IndexType elements );
+      RowView getRow( const IndexType& rowIdx );
 
-   bool setRow( const IndexType row,
-                const IndexType* columns,
-                const RealType* values,
-                const IndexType elements );
+      const RowView getRow( const IndexType& rowIdx ) const;
 
-   __cuda_callable__
-   bool addRowFast( const IndexType row,
-                    const IndexType* columns,
-                    const RealType* values,
-                    const IndexType elements,
-                    const RealType& thisRowMultiplicator = 1.0 );
+      void setValue( const RealType& v );
 
-   bool addRow( const IndexType row,
-                const IndexType* columns,
-                const RealType* values,
-                const IndexType elements,
-                const RealType& thisRowMultiplicator = 1.0 );
+      bool setElement( const IndexType row,
+                       const IndexType column,
+                       const RealType& value );
 
-   __cuda_callable__
-   RealType getElementFast( const IndexType row,
-                            const IndexType column ) const;
+      bool addElement( const IndexType row,
+                       const IndexType column,
+                       const RealType& value,
+                       const RealType& thisElementMultiplicator = 1.0 );
 
-   RealType getElement( const IndexType row,
-                        const IndexType column ) const;
+      RealType getElement( const IndexType row,
+                           const IndexType column ) const;
 
-   __cuda_callable__
-   void getRowFast( const IndexType row,
-                    IndexType* columns,
-                    RealType* values ) const;
+      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+      void rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
 
-   __cuda_callable__
-   MatrixRow getRow( const IndexType rowIndex );
+      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+      void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
 
-   __cuda_callable__
-   const MatrixRow getRow( const IndexType rowIndex ) const;
+      template< typename Function >
+      void forRows( IndexType first, IndexType last, Function& function ) const;
 
-   template< typename Vector >
-   __cuda_callable__
-   typename Vector::RealType rowVectorProduct( const IndexType row,
-                                               const Vector& vector ) const;
+      template< typename Function >
+      void forRows( IndexType first, IndexType last, Function& function );
 
-   template< typename InVector,
-             typename OutVector >
-   void vectorProduct( const InVector& inVector,
-                       OutVector& outVector ) const;
+      template< typename Function >
+      void forAllRows( Function& function ) const;
 
-   template< typename Real2, typename Index2 >
-   void addMatrix( const Tridiagonal< Real2, Device, Index2 >& matrix,
-                   const RealType& matrixMultiplicator = 1.0,
-                   const RealType& thisMatrixMultiplicator = 1.0 );
+      template< typename Function >
+      void forAllRows( Function& function );
 
-   template< typename Real2, typename Index2 >
-   void getTransposition( const Tridiagonal< Real2, Device, Index2 >& matrix,
-                          const RealType& matrixMultiplicator = 1.0 );
+      template< typename Vector >
+      __cuda_callable__
+      typename Vector::RealType rowVectorProduct( const IndexType row,
+                                                  const Vector& vector ) const;
 
-   template< typename Vector1, typename Vector2 >
-   __cuda_callable__
-   void performSORIteration( const Vector1& b,
-                             const IndexType row,
-                             Vector2& x,
-                             const RealType& omega = 1.0 ) const;
+      template< typename InVector,
+                typename OutVector >
+      void vectorProduct( const InVector& inVector,
+                          OutVector& outVector ) const;
 
-   // copy assignment
-   Tridiagonal& operator=( const Tridiagonal& matrix );
+      template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ >
+      void addMatrix( const TridiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix,
+                      const RealType& matrixMultiplicator = 1.0,
+                      const RealType& thisMatrixMultiplicator = 1.0 );
 
-   // cross-device copy assignment
-   template< typename Real2, typename Device2, typename Index2,
-             typename = typename Enabler< Device2 >::type >
-   Tridiagonal& operator=( const Tridiagonal< Real2, Device2, Index2 >& matrix );
+      template< typename Real2, typename Index2 >
+      void getTransposition( const TridiagonalMatrixView< Real2, Device, Index2 >& matrix,
+                             const RealType& matrixMultiplicator = 1.0 );
 
-   void save( File& file ) const;
+      template< typename Vector1, typename Vector2 >
+      __cuda_callable__
+      void performSORIteration( const Vector1& b,
+                                const IndexType row,
+                                Vector2& x,
+                                const RealType& omega = 1.0 ) const;
 
-   void load( File& file );
+      // copy assignment
+      TridiagonalMatrixView& operator=( const TridiagonalMatrixView& matrix );
 
-   void save( const String& fileName ) const;
+      // cross-device copy assignment
+      template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ >
+      TridiagonalMatrixView& operator=( const TridiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix );
 
-   void load( const String& fileName );
+      void save( File& file ) const;
 
-   void print( std::ostream& str ) const;
+      void save( const String& fileName ) const;
 
-protected:
+      void print( std::ostream& str ) const;
 
-   __cuda_callable__
-   IndexType getElementIndex( const IndexType row,
-                              const IndexType column ) const;
+   protected:
 
-   Containers::Vector< RealType, DeviceType, IndexType > values;
+      __cuda_callable__
+      IndexType getElementIndex( const IndexType row,
+                                 const IndexType localIdx ) const;
 
-   typedef TridiagonalDeviceDependentCode< DeviceType > DeviceDependentCode;
-   friend class TridiagonalDeviceDependentCode< DeviceType >;
+      IndexerType indexer;
 };
 
 } // namespace Matrices
 } // namespace TNL
 
-#include <TNL/Matrices/Tridiagonal_impl.h>
+#include <TNL/Matrices/TridiagonalMatrixView.hpp>
diff --git a/src/TNL/Matrices/TridiagonalMatrixView.hpp b/src/TNL/Matrices/TridiagonalMatrixView.hpp
index 2752f6850..ef893295e 100644
--- a/src/TNL/Matrices/TridiagonalMatrixView.hpp
+++ b/src/TNL/Matrices/TridiagonalMatrixView.hpp
@@ -1,8 +1,8 @@
 /***************************************************************************
-                          Tridiagonal_impl.h  -  description
+                          TridiagonalMatrixView.hpp  -  description
                              -------------------
-    begin                : Nov 30, 2013
-    copyright            : (C) 2013 by Tomas Oberhuber
+    begin                : Jan 9, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber
     email                : tomas.oberhuber@fjfi.cvut.cz
  ***************************************************************************/
 
@@ -11,109 +11,85 @@
 #pragma once
 
 #include <TNL/Assert.h>
-#include <TNL/Matrices/Tridiagonal.h>
+#include <TNL/Matrices/TridiagonalMatrixView.h>
 #include <TNL/Exceptions/NotImplementedError.h>
 
 namespace TNL {
-namespace Matrices {   
-
-template< typename Device >
-class TridiagonalDeviceDependentCode;
-
-template< typename Real,
-          typename Device,
-          typename Index >
-Tridiagonal< Real, Device, Index >::Tridiagonal()
-{
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-String Tridiagonal< Real, Device, Index >::getType()
-{
-   return String( "Matrices::Tridiagonal< " ) +
-          String( TNL::getType< Real >() ) +
-          String( ", " ) +
-          String( Device :: getDeviceType() ) +
-          String( ", " ) +
-          String( TNL::getType< Index >() ) +
-          String( " >" );
-}
+namespace Matrices {
 
 template< typename Real,
           typename Device,
-          typename Index >
-String Tridiagonal< Real, Device, Index >::getTypeVirtual() const
+          typename Index,
+          bool RowMajorOrder >
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+TridiagonalMatrixView()
 {
-   return this->getType();
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-String Tridiagonal< Real, Device, Index >::getSerializationType()
+          typename Index,
+          bool RowMajorOrder >
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+TridiagonalMatrixView( const ValuesViewType& values, const IndexerType& indexer )
+: MatrixView< Real, Device, Index >( indexer.getRows(), indexer.getColumns(), values ), indexer( indexer )
 {
-   return String( "Matrices::Tridiagonal< " ) +
-          getType< RealType >() + ", " +
-          getType< Device >() + ", " +
-          getType< IndexType >() + " >";
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-String Tridiagonal< Real, Device, Index >::getSerializationTypeVirtual() const
+          typename Index,
+          bool RowMajorOrder >
+auto
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getView() -> ViewType
 {
-   return this->getSerializationType();
+   return ViewType( this->values.getView(), indexer );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-void Tridiagonal< Real, Device, Index >::setDimensions( const IndexType rows,
-                                                        const IndexType columns )
+          typename Index,
+          bool RowMajorOrder >
+auto
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getConstView() const -> ConstViewType
 {
-   Matrix< Real, Device, Index >::setDimensions( rows, columns );
-   values.setSize( 3*min( rows, columns ) );
-   this->values.setValue( 0.0 );
+   return ConstViewType( this->values.getConstView(), indexer );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-void Tridiagonal< Real, Device, Index >::setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths )
+          typename Index,
+          bool RowMajorOrder >
+String
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getSerializationType()
 {
-   if( rowLengths[ 0 ] > 2 )
-      throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." );
-   const IndexType diagonalLength = min( this->getRows(), this->getColumns() );
-   for( Index i = 1; i < diagonalLength-1; i++ )
-      if( rowLengths[ i ] > 3 )
-         throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." );
-   if( this->getRows() > this->getColumns() )
-      if( rowLengths[ this->getRows()-1 ] > 1 )
-         throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." );
-   if( this->getRows() == this->getColumns() )
-      if( rowLengths[ this->getRows()-1 ] > 2 )
-         throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." );
-   if( this->getRows() < this->getColumns() )
-      if( rowLengths[ this->getRows()-1 ] > 3 )
-         throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." );
+   return String( "Matrices::Tridiagonal< " ) +
+          TNL::getSerializationType< RealType >() + ", [any_device], " +
+          TNL::getSerializationType< IndexType >() + ", " +
+          ( RowMajorOrder ? "true" : "false" ) + ", [any_allocator] >";
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-Index Tridiagonal< Real, Device, Index >::getRowLength( const IndexType row ) const
+          typename Index,
+          bool RowMajorOrder >
+String
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getSerializationTypeVirtual() const
 {
-   return this->getRowLengthFast( row );
+   return this->getSerializationType();
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-__cuda_callable__
-Index Tridiagonal< Real, Device, Index >::getRowLengthFast( const IndexType row ) const
+          typename Index,
+          bool RowMajorOrder >
+Index
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getRowLength( const IndexType row ) const
 {
    const IndexType diagonalLength = min( this->getRows(), this->getColumns() );
    if( row == 0 )
@@ -129,46 +105,47 @@ Index Tridiagonal< Real, Device, Index >::getRowLengthFast( const IndexType row
 
 template< typename Real,
           typename Device,
-          typename Index >
-Index Tridiagonal< Real, Device, Index >::getMaxRowLength() const
+          typename Index,
+          bool RowMajorOrder >
+Index
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getMaxRowLength() const
 {
    return 3;
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-   template< typename Real2, typename Device2, typename Index2 >
-void Tridiagonal< Real, Device, Index >::setLike( const Tridiagonal< Real2, Device2, Index2 >& m )
-{
-   this->setDimensions( m.getRows(), m.getColumns() );
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-Index Tridiagonal< Real, Device, Index >::getNumberOfMatrixElements() const
+          typename Index,
+          bool RowMajorOrder >
+Index
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getNumberOfMatrixElements() const
 {
    return 3 * min( this->getRows(), this->getColumns() );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-Index Tridiagonal< Real, Device, Index > :: getNumberOfNonzeroMatrixElements() const
+          typename Index,
+          bool RowMajorOrder >
+Index
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getNumberOfNonzeroMatrixElements() const
 {
-   IndexType nonzeroElements = 0;
-   for( IndexType i = 0; i < this->values.getSize(); i++ )
-      if( this->values.getElement( i ) != 0 )
-         nonzeroElements++;
-   return nonzeroElements;
+   const auto values_view = this->values.getConstView();
+   auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType {
+      return ( values_view[ i ] != 0.0 );
+   };
+   return Algorithms::Reduction< DeviceType >::reduce( this->values.getSize(), std::plus<>{}, fetch, 0 );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder >
 Index
-Tridiagonal< Real, Device, Index >::
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
 getMaxRowlength() const
 {
    return 3;
@@ -176,84 +153,103 @@ getMaxRowlength() const
 
 template< typename Real,
           typename Device,
-          typename Index >
-void Tridiagonal< Real, Device, Index >::reset()
-{
-   Matrix< Real, Device, Index >::reset();
-   this->values.reset();
+          typename Index,
+          bool RowMajorOrder >
+   template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ >
+bool
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+operator == ( const TridiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix ) const
+{
+   if( RowMajorOrder == RowMajorOrder_ )
+      return this->values == matrix.values;
+   else
+   {
+      TNL_ASSERT( false, "TODO" );
+   }
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-   template< typename Real2, typename Device2, typename Index2 >
-bool Tridiagonal< Real, Device, Index >::operator == ( const Tridiagonal< Real2, Device2, Index2 >& matrix ) const
+          typename Index,
+          bool RowMajorOrder >
+   template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ >
+bool
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+operator != ( const TridiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix ) const
 {
-   return this->values == matrix.values;
+   return ! this->operator==( matrix );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-   template< typename Real2, typename Device2, typename Index2 >
-bool Tridiagonal< Real, Device, Index >::operator != ( const Tridiagonal< Real2, Device2, Index2 >& matrix ) const
+          typename Index,
+          bool RowMajorOrder >
+void
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+setValue( const RealType& v )
 {
-   return this->values != matrix.values;
+   this->values = v;
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-void Tridiagonal< Real, Device, Index >::setValue( const RealType& v )
+          typename Index,
+          bool RowMajorOrder >
+__cuda_callable__
+auto
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getRow( const IndexType& rowIdx ) const -> const RowView
 {
-   this->values.setValue( v );
+   return RowView( rowIdx, this->values.getView(), this->indexer );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder >
 __cuda_callable__
-bool Tridiagonal< Real, Device, Index >::setElementFast( const IndexType row,
-                                                                  const IndexType column,
-                                                                  const RealType& value )
+auto
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getRow( const IndexType& rowIdx ) -> RowView
 {
-   this->values[ this->getElementIndex( row, column ) ] = value;
-   return true;
+   return RowView( rowIdx, this->values.getView(), this->indexer );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-bool Tridiagonal< Real, Device, Index >::setElement( const IndexType row,
-                                                              const IndexType column,
-                                                              const RealType& value )
-{
+          typename Index,
+          bool RowMajorOrder >
+bool
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+setElement( const IndexType row, const IndexType column, const RealType& value )
+{
+   TNL_ASSERT_GE( row, 0, "" );
+   TNL_ASSERT_LT( row, this->getRows(), "" );
+   TNL_ASSERT_GE( column, 0, "" );
+   TNL_ASSERT_LT( column, this->getColumns(), "" );
+   if( abs( row - column ) > 1 )
+      throw std::logic_error( "Wrong matrix element coordinates in tridiagonal matrix." );
    this->values.setElement( this->getElementIndex( row, column ), value );
    return true;
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-__cuda_callable__
-bool Tridiagonal< Real, Device, Index >::addElementFast( const IndexType row,
-                                                                  const IndexType column,
-                                                                  const RealType& value,
-                                                                  const RealType& thisElementMultiplicator )
-{
-   const Index i = this->getElementIndex( row, column );
-   this->values[ i ] = thisElementMultiplicator*this->values[ i ] + value;
-   return true;
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-bool Tridiagonal< Real, Device, Index >::addElement( const IndexType row,
-                                                              const IndexType column,
-                                                              const RealType& value,
-                                                              const RealType& thisElementMultiplicator )
-{
+          typename Index,
+          bool RowMajorOrder >
+bool
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+addElement( const IndexType row,
+            const IndexType column,
+            const RealType& value,
+            const RealType& thisElementMultiplicator )
+{
+   TNL_ASSERT_GE( row, 0, "" );
+   TNL_ASSERT_LT( row, this->getRows(), "" );
+   TNL_ASSERT_GE( column, 0, "" );
+   TNL_ASSERT_LT( column, this->getColumns(), "" );
+   if( abs( row - column ) > 1 )
+      throw std::logic_error( "Wrong matrix element coordinates in tridiagonal matrix." );
    const Index i = this->getElementIndex( row, column );
    this->values.setElement( i, thisElementMultiplicator * this->values.getElement( i ) + value );
    return true;
@@ -261,180 +257,230 @@ bool Tridiagonal< Real, Device, Index >::addElement( const IndexType row,
 
 template< typename Real,
           typename Device,
-          typename Index >
-__cuda_callable__
-bool Tridiagonal< Real, Device, Index >::setRowFast( const IndexType row,
-                                                              const IndexType* columns,
-                                                              const RealType* values,
-                                                              const IndexType elements )
-{
-   TNL_ASSERT( elements <= this->columns,
-            std::cerr << " elements = " << elements
-                 << " this->columns = " << this->columns );
-   return this->addRowFast( row, columns, values, elements, 0.0 );
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-bool Tridiagonal< Real, Device, Index >::setRow( const IndexType row,
-                                                          const IndexType* columns,
-                                                          const RealType* values,
-                                                          const IndexType elements )
+          typename Index,
+          bool RowMajorOrder >
+Real
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getElement( const IndexType row, const IndexType column ) const
 {
-   TNL_ASSERT( elements <= this->columns,
-            std::cerr << " elements = " << elements
-                 << " this->columns = " << this->columns );
-   return this->addRow( row, columns, values, elements, 0.0 );
-}
+   TNL_ASSERT_GE( row, 0, "" );
+   TNL_ASSERT_LT( row, this->getRows(), "" );
+   TNL_ASSERT_GE( column, 0, "" );
+   TNL_ASSERT_LT( column, this->getColumns(), "" );
 
-template< typename Real,
-          typename Device,
-          typename Index >
-__cuda_callable__
-bool Tridiagonal< Real, Device, Index >::addRowFast( const IndexType row,
-                                                              const IndexType* columns,
-                                                              const RealType* values,
-                                                              const IndexType elements,
-                                                              const RealType& thisRowMultiplicator )
-{
-   TNL_ASSERT( elements <= this->columns,
-            std::cerr << " elements = " << elements
-                 << " this->columns = " << this->columns );
-   if( elements > 3 )
-      return false;
-   for( IndexType i = 0; i < elements; i++ )
-   {
-      const IndexType& column = columns[ i ];
-      if( column < row - 1 || column > row + 1 )
-         return false;
-      addElementFast( row, column, values[ i ], thisRowMultiplicator );
-   }
-   return true;
+   if( abs( column - row ) > 1 )
+      return 0.0;
+   return this->values.getElement( this->getElementIndex( row, column ) );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-bool Tridiagonal< Real, Device, Index >::addRow( const IndexType row,
-                                                          const IndexType* columns,
-                                                          const RealType* values,
-                                                          const IndexType elements,
-                                                          const RealType& thisRowMultiplicator )
-{
-   TNL_ASSERT( elements <= this->columns,
-            std::cerr << " elements = " << elements
-                 << " this->columns = " << this->columns );
-   if( elements > 3 )
-      return false;
-   for( IndexType i = 0; i < elements; i++ )
-   {
-      const IndexType column = columns[ i ];
-      if( column < row - 1 || column > row + 1 )
-         return false;
-      addElement( row, column, values[ i ], thisRowMultiplicator );
-   }
-   return true;
+          typename Index,
+          bool RowMajorOrder >
+   template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+void
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const
+{
+   const auto values_view = this->values.getConstView();
+   const auto indexer_ = this->indexer;
+   const auto rows = this->getRows();
+   const auto columns = this->getColumns();
+   const auto size = this->size;
+   auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
+      //bool compute;
+      if( rowIdx == 0 )
+      {
+         IndexType i_0 = indexer.getGlobalIndex( 0, 0 );
+         IndexType i_1 = indexer.getGlobalIndex( 0, 1 );
+         keep( 0, reduce( fetch( 0, 0, i_0, values_view[ i_0 ] ),
+                          fetch( 0, 1, i_1, values_view[ i_1 ] ) ) );
+         return;
+      }
+      if( rowIdx < size || columns > rows )
+      {
+         IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 );
+         IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 );
+         IndexType i_2 = indexer.getGlobalIndex( rowIdx, 2 );
+
+         keep( rowIdx, reduce( reduce( fetch( rowIdx, rowIdx - 1, i_0, values_view[ i_0 ] ),
+                                       fetch( rowIdx, rowIdx, i_1, values_view[ i_1 ] ) ),
+                               fetch( rowIdx, rowIdx + 1, i_2, values_view[ i_2] ) ) );
+         return;
+      }
+      if( rows == columns )
+      {
+         IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 );
+         IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 );
+         keep( rowIdx, reduce( fetch( rowIdx, rowIdx - 1, i_0, values_view[ i_0 ] ),
+                               fetch( rowIdx, rowIdx, i_1, values_view[ i_1 ] ) ) );
+      }
+      else
+      {
+         IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 );
+         keep( rowIdx, fetch( rowIdx, rowIdx, i_0, values_view[ i_0 ] ) );
+      }
+   };
+   Algorithms::ParallelFor< DeviceType >::exec( first, last, f );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-__cuda_callable__
-Real Tridiagonal< Real, Device, Index >::getElementFast( const IndexType row,
-                                                                  const IndexType column ) const
+          typename Index,
+          bool RowMajorOrder >
+   template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+void
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const
 {
-   if( abs( column - row ) > 1 )
-      return 0.0;
-   return this->values[ this->getElementIndex( row, column ) ];
+   this->rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-Real Tridiagonal< Real, Device, Index >::getElement( const IndexType row,
-                                                              const IndexType column ) const
-{
-   if( abs( column - row ) > 1 )
-      return 0.0;
-   return this->values.getElement( this->getElementIndex( row, column ) );
+          typename Index,
+          bool RowMajorOrder >
+   template< typename Function >
+void
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+forRows( IndexType first, IndexType last, Function& function ) const
+{
+   const auto values_view = this->values.getConstView();
+   const auto indexer_ = this->indexer;
+   const auto rows = this->getRows();
+   const auto columns = this->getColumns();
+   const auto size = this->size;
+   auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
+      //bool compute;
+      if( rowIdx == 0 )
+      {
+         IndexType i_0 = indexer.getGlobalIndex( 0, 0 );
+         IndexType i_1 = indexer.getGlobalIndex( 0, 1 );
+         function( 0, 1, rowIdx,     values_view[ i_0 ] );
+         function( 0, 2, rowIdx + 1, values_view[ i_1 ] );
+         return;
+      }
+      if( rowIdx < size || columns > rows )
+      {
+         IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 );
+         IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 );
+         IndexType i_2 = indexer.getGlobalIndex( rowIdx, 2 );
+         function( rowIdx, 0, rowIdx - 1, values_view[ i_0 ] );
+         function( rowIdx, 1, rowIdx,     values_view[ i_1 ] );
+         function( rowIdx, 2, rowIdx + 1, values_view[ i_2 ] );
+         return;
+      }
+      if( rows == columns )
+      {
+         IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 );
+         IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 );
+         function( rowIdx, 0, rowIdx - 1, values_view[ i_0 ] );
+         function( rowIdx, 1, rowIdx,     values_view[ i_1 ] );
+      }
+      else
+      {
+         IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 );
+         function( rowIdx, 0, rowIdx, values_view[ i_0 ] );
+      }
+   };
+   Algorithms::ParallelFor< DeviceType >::exec( first, last, f );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-__cuda_callable__
-void Tridiagonal< Real, Device, Index >::getRowFast( const IndexType row,
-                                                              IndexType* columns,
-                                                              RealType* values ) const
-{
-   IndexType elementPointer( 0 );
-   for( IndexType i = -1; i <= 1; i++ )
-   {
-      const IndexType column = row + 1;
-      if( column >= 0 && column < this->getColumns() )
+          typename Index,
+          bool RowMajorOrder >
+  template< typename Function >
+void
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+forRows( IndexType first, IndexType last, Function& function )
+{
+   const auto values_view = this->values.getConstView();
+   const auto indexer_ = this->indexer;
+   const auto rows = this->getRows();
+   const auto columns = this->getColumns();
+   const auto size = this->size;
+   auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
+      //bool compute;
+      if( rowIdx == 0 )
       {
-         columns[ elementPointer ] = column;
-         values[ elementPointer ] = this->values[ this->getElementIndex( row, column ) ];
-         elementPointer++;
+         IndexType i_0 = indexer.getGlobalIndex( 0, 0 );
+         IndexType i_1 = indexer.getGlobalIndex( 0, 1 );
+         function( 0, 1, rowIdx,     values_view[ i_0 ] );
+         function( 0, 2, rowIdx + 1, values_view[ i_1 ] );
+         return;
       }
-   }
+      if( rowIdx < size || columns > rows )
+      {
+         IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 );
+         IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 );
+         IndexType i_2 = indexer.getGlobalIndex( rowIdx, 2 );
+         function( rowIdx, 0, rowIdx - 1, values_view[ i_0 ] );
+         function( rowIdx, 1, rowIdx,     values_view[ i_1 ] );
+         function( rowIdx, 2, rowIdx + 1, values_view[ i_2 ] );
+         return;
+      }
+      if( rows == columns )
+      {
+         IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 );
+         IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 );
+         function( rowIdx, 0, rowIdx - 1, values_view[ i_0 ] );
+         function( rowIdx, 1, rowIdx,     values_view[ i_1 ] );
+      }
+      else
+      {
+         IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 );
+         function( rowIdx, 0, rowIdx, values_view[ i_0 ] );
+      }
+   };
+   Algorithms::ParallelFor< DeviceType >::exec( first, last, f );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-__cuda_callable__
-typename Tridiagonal< Real, Device, Index >::MatrixRow
-Tridiagonal< Real, Device, Index >::
-getRow( const IndexType rowIndex )
+          typename Index,
+          bool RowMajorOrder >
+   template< typename Function >
+void
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+forAllRows( Function& function ) const
 {
-   if( std::is_same< Device, Devices::Host >::value )
-      return MatrixRow( &this->values.getData()[ this->getElementIndex( rowIndex, rowIndex ) ],
-                        rowIndex,
-                        this->getColumns(),
-                        1 );
-   if( std::is_same< Device, Devices::Cuda >::value )
-      return MatrixRow( &this->values.getData()[ this->getElementIndex( rowIndex, rowIndex ) ],
-                        rowIndex,
-                        this->getColumns(),
-                        this->rows );
+   this->forRows( 0, this->getRows(), function );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-__cuda_callable__
-const typename Tridiagonal< Real, Device, Index >::MatrixRow
-Tridiagonal< Real, Device, Index >::
-getRow( const IndexType rowIndex ) const
+          typename Index,
+          bool RowMajorOrder >
+   template< typename Function >
+void
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+forAllRows( Function& function )
 {
-   throw Exceptions::NotImplementedError();
+   this->forRows( 0, this->getRows(), function );
 }
 
-
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder >
 template< typename Vector >
 __cuda_callable__
-typename Vector::RealType Tridiagonal< Real, Device, Index >::rowVectorProduct( const IndexType row,
-                                                                                         const Vector& vector ) const
+typename Vector::RealType 
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+rowVectorProduct( const IndexType row, const Vector& vector ) const
 {
-   return TridiagonalDeviceDependentCode< Device >::
-             rowVectorProduct( this->rows,
-                               this->values,
-                               row,
-                               vector );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder >
    template< typename InVector,
              typename OutVector >
-void Tridiagonal< Real, Device, Index >::vectorProduct( const InVector& inVector,
-                                                                 OutVector& outVector ) const
+void 
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+vectorProduct( const InVector& inVector, OutVector& outVector ) const
 {
    TNL_ASSERT( this->getColumns() == inVector.getSize(),
             std::cerr << "Matrix columns: " << this->getColumns() << std::endl
@@ -443,16 +489,19 @@ void Tridiagonal< Real, Device, Index >::vectorProduct( const InVector& inVector
                std::cerr << "Matrix rows: " << this->getRows() << std::endl
                     << "Vector size: " << outVector.getSize() << std::endl );
 
-   DeviceDependentCode::vectorProduct( *this, inVector, outVector );
+   //DeviceDependentCode::vectorProduct( *this, inVector, outVector );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-   template< typename Real2, typename Index2 >
-void Tridiagonal< Real, Device, Index >::addMatrix( const Tridiagonal< Real2, Device, Index2 >& matrix,
-                                                    const RealType& matrixMultiplicator,
-                                                    const RealType& thisMatrixMultiplicator )
+          typename Index,
+          bool RowMajorOrder >
+   template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ >
+void
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+addMatrix( const TridiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix,
+           const RealType& matrixMultiplicator,
+           const RealType& thisMatrixMultiplicator )
 {
    TNL_ASSERT( this->getRows() == matrix.getRows(),
             std::cerr << "This matrix columns: " << this->getColumns() << std::endl
@@ -494,10 +543,13 @@ __global__ void TridiagonalTranspositionCudaKernel( const Tridiagonal< Real2, De
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder >
    template< typename Real2, typename Index2 >
-void Tridiagonal< Real, Device, Index >::getTransposition( const Tridiagonal< Real2, Device, Index2 >& matrix,
-                                                                    const RealType& matrixMultiplicator )
+void
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getTransposition( const TridiagonalMatrixView< Real2, Device, Index2 >& matrix,
+                  const RealType& matrixMultiplicator )
 {
    TNL_ASSERT( this->getRows() == matrix.getRows(),
                std::cerr << "This matrix rows: " << this->getRows() << std::endl
@@ -541,13 +593,16 @@ void Tridiagonal< Real, Device, Index >::getTransposition( const Tridiagonal< Re
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder >
    template< typename Vector1, typename Vector2 >
 __cuda_callable__
-void Tridiagonal< Real, Device, Index >::performSORIteration( const Vector1& b,
-                                                              const IndexType row,
-                                                              Vector2& x,
-                                                              const RealType& omega ) const
+void
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+performSORIteration( const Vector1& b,
+                     const IndexType row,
+                     Vector2& x,
+                     const RealType& omega ) const
 {
    RealType sum( 0.0 );
    if( row > 0 )
@@ -561,9 +616,11 @@ void Tridiagonal< Real, Device, Index >::performSORIteration( const Vector1& b,
 // copy assignment
 template< typename Real,
           typename Device,
-          typename Index >
-Tridiagonal< Real, Device, Index >&
-Tridiagonal< Real, Device, Index >::operator=( const Tridiagonal& matrix )
+          typename Index,
+          bool RowMajorOrder >
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >&
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+operator=( const TridiagonalMatrixView& matrix )
 {
    this->setLike( matrix );
    this->values = matrix.values;
@@ -573,14 +630,16 @@ Tridiagonal< Real, Device, Index >::operator=( const Tridiagonal& matrix )
 // cross-device copy assignment
 template< typename Real,
           typename Device,
-          typename Index >
-   template< typename Real2, typename Device2, typename Index2, typename >
-Tridiagonal< Real, Device, Index >&
-Tridiagonal< Real, Device, Index >::operator=( const Tridiagonal< Real2, Device2, Index2 >& matrix )
+          typename Index,
+          bool RowMajorOrder >
+   template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ >
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >&
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+operator=( const TridiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix )
 {
    static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Cuda >::value,
                   "unknown device" );
-   static_assert( std::is_same< Device2, Devices::Host >::value || std::is_same< Device2, Devices::Cuda >::value,
+   static_assert( std::is_same< Device_, Devices::Host >::value || std::is_same< Device_, Devices::Cuda >::value,
                   "unknown device" );
 
    this->setLike( matrix );
@@ -591,42 +650,29 @@ Tridiagonal< Real, Device, Index >::operator=( const Tridiagonal< Real2, Device2
 
 template< typename Real,
           typename Device,
-          typename Index >
-void Tridiagonal< Real, Device, Index >::save( File& file ) const
-{
-   Matrix< Real, Device, Index >::save( file );
-   file << this->values;
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-void Tridiagonal< Real, Device, Index >::load( File& file )
+          typename Index,
+          bool RowMajorOrder >
+void TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::save( File& file ) const
 {
-   Matrix< Real, Device, Index >::load( file );
-   file >> this->values;
+   MatrixView< Real, Device, Index >::save( file );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-void Tridiagonal< Real, Device, Index >::save( const String& fileName ) const
+          typename Index,
+          bool RowMajorOrder >
+void
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+save( const String& fileName ) const
 {
    Object::save( fileName );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-void Tridiagonal< Real, Device, Index >::load( const String& fileName )
-{
-   Object::load( fileName );
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-void Tridiagonal< Real, Device, Index >::print( std::ostream& str ) const
+          typename Index,
+          bool RowMajorOrder >
+void TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::print( std::ostream& str ) const
 {
    for( IndexType row = 0; row < this->getRows(); row++ )
    {
@@ -640,120 +686,18 @@ void Tridiagonal< Real, Device, Index >::print( std::ostream& str ) const
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder >
 __cuda_callable__
-Index Tridiagonal< Real, Device, Index >::getElementIndex( const IndexType row,
-                                                                    const IndexType column ) const
+Index TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getElementIndex( const IndexType row, const IndexType localIdx ) const
 {
-   TNL_ASSERT( row >= 0 && column >= 0 && row < this->rows && column < this->rows,
-              std::cerr << " this->rows = " << this->rows
-                   << " row = " << row << " column = " << column );
-   TNL_ASSERT( abs( row - column ) < 2,
-              std::cerr << "row = " << row << " column = " << column << std::endl );
-   return TridiagonalDeviceDependentCode< Device >::getElementIndex( this->rows, row, column );
+   TNL_ASSERT_GE( row, 0, "" );
+   TNL_ASSERT_LT( row, this->getRows(), "" );
+   TNL_ASSERT_GE( localIdx, 0, "" );
+   TNL_ASSERT_LT( localIdx, 3, "" );
+   return this->indexer.getGlobalIndex( row, localIdx );
 }
 
-template<>
-class TridiagonalDeviceDependentCode< Devices::Host >
-{
-   public:
-
-      typedef Devices::Host Device;
-
-      template< typename Index >
-      __cuda_callable__
-      static Index getElementIndex( const Index rows,
-                                    const Index row,
-                                    const Index column )
-      {
-         return 2*row + column;
-      }
-
-      template< typename Vector,
-                typename Index,
-                typename ValuesType  >
-      __cuda_callable__
-      static typename Vector::RealType rowVectorProduct( const Index rows,
-                                                         const ValuesType& values,
-                                                         const Index row,
-                                                         const Vector& vector )
-      {
-         if( row == 0 )
-            return vector[ 0 ] * values[ 0 ] +
-                   vector[ 1 ] * values[ 1 ];
-         Index i = 3 * row;
-         if( row == rows - 1 )
-            return vector[ row - 1 ] * values[ i - 1 ] +
-                   vector[ row ] * values[ i ];
-         return vector[ row - 1 ] * values[ i - 1 ] +
-                vector[ row ] * values[ i ] +
-                vector[ row + 1 ] * values[ i + 1 ];
-      }
-
-      template< typename Real,
-                typename Index,
-                typename InVector,
-                typename OutVector >
-      static void vectorProduct( const Tridiagonal< Real, Device, Index >& matrix,
-                                 const InVector& inVector,
-                                 OutVector& outVector )
-      {
-#ifdef HAVE_OPENMP
-#pragma omp parallel for if( Devices::Host::isOMPEnabled() )
-#endif
-         for( Index row = 0; row < matrix.getRows(); row ++ )
-            outVector[ row ] = matrix.rowVectorProduct( row, inVector );
-      }
-};
-
-template<>
-class TridiagonalDeviceDependentCode< Devices::Cuda >
-{
-   public:
- 
-      typedef Devices::Cuda Device;
-
-      template< typename Index >
-      __cuda_callable__
-      static Index getElementIndex( const Index rows,
-                                    const Index row,
-                                    const Index column )
-      {
-         return ( column - row + 1 )*rows + row - 1;
-      }
-
-      template< typename Vector,
-                typename Index,
-                typename ValuesType >
-      __cuda_callable__
-      static typename Vector::RealType rowVectorProduct( const Index rows,
-                                                         const ValuesType& values,
-                                                         const Index row,
-                                                         const Vector& vector )
-      {
-         if( row == 0 )
-            return vector[ 0 ] * values[ 0 ] +
-                   vector[ 1 ] * values[ rows - 1 ];
-         Index i = row - 1;
-         if( row == rows - 1 )
-            return vector[ row - 1 ] * values[ i ] +
-                   vector[ row ] * values[ i + rows ];
-         return vector[ row - 1 ] * values[ i ] +
-                vector[ row ] * values[ i + rows ] +
-                vector[ row + 1 ] * values[ i + 2*rows ];
-      }
-
-      template< typename Real,
-                typename Index,
-                typename InVector,
-                typename OutVector >
-      static void vectorProduct( const Tridiagonal< Real, Device, Index >& matrix,
-                                 const InVector& inVector,
-                                 OutVector& outVector )
-      {
-         MatrixVectorProductCuda( matrix, inVector, outVector );
-      }
-};
-
 } // namespace Matrices
 } // namespace TNL
diff --git a/src/TNL/Matrices/TridiagonalRow.h b/src/TNL/Matrices/TridiagonalRow.h
deleted file mode 100644
index 9d06b39e1..000000000
--- a/src/TNL/Matrices/TridiagonalRow.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/***************************************************************************
-                          TridiagonalRow.h  -  description
-                             -------------------
-    begin                : Dec 31, 2014
-    copyright            : (C) 2014 by oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#pragma once
-
-namespace TNL {
-namespace Matrices {   
-
-template< typename Real, typename Index >
-class TridiagonalRow
-{
-   public:
-
-      __cuda_callable__
-      TridiagonalRow();
-
-      __cuda_callable__
-      TridiagonalRow( Real* values,
-                               const Index row,
-                               const Index columns,
-                               const Index step );
-
-      __cuda_callable__
-      void bind( Real* values,
-                 const Index row,
-                 const Index columns,
-                 const Index step );
-
-      __cuda_callable__
-      void setElement( const Index& elementIndex,
-                       const Index& column,
-                       const Real& value );
-
-   protected:
-
-      Real* values;
-
-      Index row, columns, step;
-};
-
-} // namespace Matrices
-} // namespace TNL
-
-#include <TNL/Matrices/TridiagonalRow_impl.h>
diff --git a/src/TNL/Matrices/TridiagonalRow_impl.h b/src/TNL/Matrices/TridiagonalRow_impl.h
deleted file mode 100644
index f5b7e842a..000000000
--- a/src/TNL/Matrices/TridiagonalRow_impl.h
+++ /dev/null
@@ -1,78 +0,0 @@
-/***************************************************************************
-                          TridiagonalRow_impl.h  -  description
-                             -------------------
-    begin                : Dec 31, 2014
-    copyright            : (C) 2014 by oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#pragma once
-
-namespace TNL {
-namespace Matrices {   
-
-template< typename Real, typename Index >
-__cuda_callable__
-TridiagonalRow< Real, Index >::
-TridiagonalRow()
-: values( 0 ),
-  row( 0 ),
-  columns( 0 ),
-  step( 0 )
-{
-}
-
-template< typename Real, typename Index >
-__cuda_callable__
-TridiagonalRow< Real, Index >::
-TridiagonalRow( Real* values,
-                         const Index row,
-                         const Index columns,
-                         const Index step )
-: values( values ),
-  row( row ),
-  columns( columns ),
-  step( step )
-{
-}
-
-template< typename Real, typename Index >
-__cuda_callable__
-void
-TridiagonalRow< Real, Index >::
-bind( Real* values,
-      const Index row,
-      const Index columns,
-      const Index step )
-{
-   this->values = values;
-   this->row = row;
-   this->columns = columns;
-   this->step = step;
-}
-
-template< typename Real, typename Index >
-__cuda_callable__
-void
-TridiagonalRow< Real, Index >::
-setElement( const Index& elementIndex,
-            const Index& column,
-            const Real& value )
-{
-   TNL_ASSERT( this->values, );
-   TNL_ASSERT( this->step > 0,);
-   TNL_ASSERT( column >= 0 && column < this->columns,
-              std::cerr << "column = " << columns << " this->columns = " << this->columns );
-   TNL_ASSERT( abs( column - row ) <= 1,
-              std::cerr << "column = " << column << " row =  " << row );
-
-   /****
-    * this->values stores an adress of the diagonal element
-    */
-   this->values[ ( column - row ) * this->step ] = value;
-}
-
-} // namespace Matrices
-} // namespace TNL
diff --git a/src/TNL/Matrices/details/TridiagonalMatrixIndexer.h b/src/TNL/Matrices/details/TridiagonalMatrixIndexer.h
new file mode 100644
index 000000000..2f245c38f
--- /dev/null
+++ b/src/TNL/Matrices/details/TridiagonalMatrixIndexer.h
@@ -0,0 +1,90 @@
+/***************************************************************************
+                          TridiagonalMatrixIndexer.h  -  description
+                             -------------------
+    begin                : Jan 9, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+namespace TNL {
+   namespace Matrices {
+      namespace details {
+
+template< typename Index,
+          bool RowMajorOrder >
+class TridiagonalMatrixIndexer
+{
+   public:
+
+      using IndexType = Index;
+
+      static constexpr bool getRowMajorOrder() { return RowMajorOrder; };
+
+      __cuda_callable__
+      TridiagonalMatrixIndexer()
+      : rows( 0 ), columns( 0 ), size( 0 ){};
+
+      __cuda_callable__
+      TridiagonalMatrixIndexer( const IndexType& rows, const IndexType& columns )
+      : rows( rows ), columns( columns ), size( TNL::min( rows, columns ) ) {};
+
+      __cuda_callable__
+      TridiagonalMatrixIndexer( const TridiagonalMatrixIndexer& indexer )
+      : rows( indexer.rows ), columns( indexer.columns ), size( indexer.size ) {};
+
+      void setDimensions( const IndexType& rows, const IndexType& columns )
+      {
+         this->rows = rows;
+         this->columns = columns;
+         this->size = min( rows, columns );
+      };
+
+      __cuda_callable__
+      IndexType getRowSize( const IndexType rowIdx ) const
+      {
+         if( rowIdx == 0 )
+            return 2;
+         if( columns <= rows )
+         {
+            if( rowIdx == columns - 1 )
+               return 2;
+            if( rowIdx == columns )
+               return 1;
+         }
+         return 3;
+      };
+
+      __cuda_callable__
+      IndexType getRows() const { return this->rows; };
+
+      __cuda_callable__
+      IndexType getColumns() const { return this->rows; };
+
+      __cuda_callable__
+      IndexType getStorageSize() const { return 3 * this->size; };
+
+      __cuda_callable__
+      IndexType getGlobalIndex( const Index rowIdx, const Index localIdx ) const
+      {
+         TNL_ASSERT_GE( localIdx, 0, "" );
+         TNL_ASSERT_LT( localIdx, 3, "" );
+         TNL_ASSERT_GE( rowIdx, 0, "" );
+         TNL_ASSERT_LT( rowIdx, this->rows, "" );
+         
+         if( RowMajorOrder )
+            return 3 * rowIdx + localIdx;
+         else
+            return localIdx * size + rowIdx;
+      };
+
+      protected:
+
+         IndexType rows, columns, size;
+};
+      } //namespace details
+   } // namespace Materices
+} // namespace TNL
diff --git a/src/UnitTests/Matrices/TridiagonalMatrixTest.h b/src/UnitTests/Matrices/TridiagonalMatrixTest.h
index 40cecb2bd..962f8c82d 100644
--- a/src/UnitTests/Matrices/TridiagonalMatrixTest.h
+++ b/src/UnitTests/Matrices/TridiagonalMatrixTest.h
@@ -587,7 +587,7 @@ void test_SetRow()
          { 2, 3, 4, 5, 6 } };
       auto row = matrix_view.getRow( rowIdx );
       for( IndexType i = 0; i < 5; i++ )
-        row.setElement( columnIndexes[ rowIdx ][ i ], values[ rowIdx ][ i ] );
+        row.setElement( i, values[ rowIdx ][ i ] );
    };
    TNL::Algorithms::ParallelFor< DeviceType >::exec( 0, 3, f );
 
@@ -1172,7 +1172,7 @@ void test_AssignmentOperator()
    TridiagonalHost hostMatrix( rows, columns );
    for( IndexType i = 0; i < columns; i++ )
       for( IndexType j = 0; j <= i; j++ )
-         hostMatrix( i, j ) = i + j;
+         hostMatrix.setElement( i, j,  i + j );
 
    Matrix matrix( rows, columns );
    matrix.getValues() = 0.0;
@@ -1369,7 +1369,7 @@ using MatrixTypes = ::testing::Types
 
 TYPED_TEST_SUITE( MatrixTest, MatrixTypes );
 
-TYPED_TEST( Matrix, getSerializationType )
+TYPED_TEST( MatrixTest, getSerializationType )
 {
    test_GetSerializationType();
 }
-- 
GitLab


From 57de3baa0541918c960db0c64d99f4738fbfd822 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Thu, 9 Jan 2020 22:12:32 +0100
Subject: [PATCH 080/179] Adding mutlidiagonal matrix unit tests.

---
 src/UnitTests/Matrices/CMakeLists.txt         |    8 +
 .../Matrices/MultidiagonalMatrixTest.cpp      |   11 +
 .../Matrices/MultidiagonalMatrixTest.cu       |   11 +
 .../Matrices/MultidiagonalMatrixTest.h        | 1611 +++++++++++++++++
 4 files changed, 1641 insertions(+)
 create mode 100644 src/UnitTests/Matrices/MultidiagonalMatrixTest.cpp
 create mode 100644 src/UnitTests/Matrices/MultidiagonalMatrixTest.cu
 create mode 100644 src/UnitTests/Matrices/MultidiagonalMatrixTest.h

diff --git a/src/UnitTests/Matrices/CMakeLists.txt b/src/UnitTests/Matrices/CMakeLists.txt
index 333dee952..287495405 100644
--- a/src/UnitTests/Matrices/CMakeLists.txt
+++ b/src/UnitTests/Matrices/CMakeLists.txt
@@ -13,6 +13,9 @@ IF( BUILD_CUDA )
    CUDA_ADD_EXECUTABLE( TridiagonalMatrixTest TridiagonalMatrixTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
    TARGET_LINK_LIBRARIES( TridiagonalMatrixTest ${GTEST_BOTH_LIBRARIES} )
 
+   CUDA_ADD_EXECUTABLE( MultidiagonalMatrixTest MultidiagonalMatrixTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( MultidiagonalMatrixTest ${GTEST_BOTH_LIBRARIES} )
+
    CUDA_ADD_EXECUTABLE( SparseMatrixTest_CSR_segments SparseMatrixTest_CSR_segments.cu OPTIONS ${CXX_TESTS_FLAGS} )
    TARGET_LINK_LIBRARIES( SparseMatrixTest_CSR_segments ${GTEST_BOTH_LIBRARIES} )
 
@@ -39,6 +42,10 @@ ELSE(  BUILD_CUDA )
    TARGET_COMPILE_OPTIONS( TridiagonalMatrixTest PRIVATE ${CXX_TESTS_FLAGS} )
    TARGET_LINK_LIBRARIES( TridiagonalMatrixTest ${GTEST_BOTH_LIBRARIES} )
 
+   ADD_EXECUTABLE( MultidiagonalMatrixTest MultidiagonalMatrixTest.cpp )
+   TARGET_COMPILE_OPTIONS( MultidiagonalMatrixTest PRIVATE ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( MultidiagonalMatrixTest ${GTEST_BOTH_LIBRARIES} )
+
    ADD_EXECUTABLE( SparseMatrixTest_CSR_segments SparseMatrixTest_CSR_segments.cpp )
    TARGET_COMPILE_OPTIONS( SparseMatrixTest_CSR_segments PRIVATE ${CXX_TESTS_FLAGS} )
    TARGET_LINK_LIBRARIES( SparseMatrixTest_CSR_segments ${GTEST_BOTH_LIBRARIES} )
@@ -58,6 +65,7 @@ ADD_TEST( SparseMatrixCopyTest ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixCopyTest${C
 ADD_TEST( SparseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( DenseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/DenseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( TridiagonalMatrixTest ${EXECUTABLE_OUTPUT_PATH}/TridiagonalMatrixTest${CMAKE_EXECUTABLE_SUFFIX} )
+ADD_TEST( MultidiagonalMatrixTest ${EXECUTABLE_OUTPUT_PATH}/MultidiagonalMatrixTest${CMAKE_EXECUTABLE_SUFFIX} )
 
 ADD_TEST( SparseMatrixTest_CSR_segments ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_CSR_segments${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( SparseMatrixTest_Ellpack_segments ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_Ellpack_segments${CMAKE_EXECUTABLE_SUFFIX} )
diff --git a/src/UnitTests/Matrices/MultidiagonalMatrixTest.cpp b/src/UnitTests/Matrices/MultidiagonalMatrixTest.cpp
new file mode 100644
index 000000000..73406d0df
--- /dev/null
+++ b/src/UnitTests/Matrices/MultidiagonalMatrixTest.cpp
@@ -0,0 +1,11 @@
+/***************************************************************************
+                          MultidiagonalMatrixTest.cpp -  description
+                             -------------------
+    begin                : Jan 9, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include "MultidiagonalMatrixTest.h"
\ No newline at end of file
diff --git a/src/UnitTests/Matrices/MultidiagonalMatrixTest.cu b/src/UnitTests/Matrices/MultidiagonalMatrixTest.cu
new file mode 100644
index 000000000..e3dab545c
--- /dev/null
+++ b/src/UnitTests/Matrices/MultidiagonalMatrixTest.cu
@@ -0,0 +1,11 @@
+/***************************************************************************
+                          MultidiagonalMatrixTest.cu -  description
+                             -------------------
+    begin                : Jan 9, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include "MultidiagonalMatrixTest.h"
\ No newline at end of file
diff --git a/src/UnitTests/Matrices/MultidiagonalMatrixTest.h b/src/UnitTests/Matrices/MultidiagonalMatrixTest.h
new file mode 100644
index 000000000..01ae4a518
--- /dev/null
+++ b/src/UnitTests/Matrices/MultidiagonalMatrixTest.h
@@ -0,0 +1,1611 @@
+/***************************************************************************
+                          MultidiagonalMatrixTest.h -  description
+                             -------------------
+    begin                : Jan 9, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include <TNL/Devices/Host.h>
+#include <TNL/Matrices/Matrix.h>
+#include <TNL/Matrices/Multidiagonal.h>
+#include <TNL/Containers/Array.h>
+
+#include <TNL/Containers/Vector.h>
+#include <TNL/Containers/VectorView.h>
+#include <TNL/Math.h>
+#include <iostream>
+
+using Multidiagonal_host_float = TNL::Matrices::Multidiagonal< float, TNL::Devices::Host, int >;
+using Multidiagonal_host_int = TNL::Matrices::Multidiagonal< int, TNL::Devices::Host, int >;
+
+using Multidiagonal_cuda_float = TNL::Matrices::Multidiagonal< float, TNL::Devices::Cuda, int >;
+using Multidiagonal_cuda_int = TNL::Matrices::Multidiagonal< int, TNL::Devices::Cuda, int >;
+
+static const char* TEST_FILE_NAME = "test_MultidiagonalMatrixTest.tnl";
+
+#ifdef HAVE_GTEST
+#include <type_traits>
+
+#include <gtest/gtest.h>
+
+void test_GetSerializationType()
+{
+   EXPECT_EQ( ( TNL::Matrices::Multidiagonal< float, TNL::Devices::Host, int, true >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< float, [any_device], int, true, [any_allocator] >" ) );
+   EXPECT_EQ( ( TNL::Matrices::Multidiagonal< int,   TNL::Devices::Host, int, true >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< int, [any_device], int, true, [any_allocator] >" ) );
+   EXPECT_EQ( ( TNL::Matrices::Multidiagonal< float, TNL::Devices::Cuda, int, true >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< float, [any_device], int, true, [any_allocator] >" ) );
+   EXPECT_EQ( ( TNL::Matrices::Multidiagonal< int,   TNL::Devices::Cuda, int, true >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< int, [any_device], int, true, [any_allocator] >" ) );
+   EXPECT_EQ( ( TNL::Matrices::Multidiagonal< float, TNL::Devices::Host, int, false >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< float, [any_device], int, false, [any_allocator] >" ) );
+   EXPECT_EQ( ( TNL::Matrices::Multidiagonal< int,   TNL::Devices::Host, int, false >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< int, [any_device], int, false, [any_allocator] >" ) );
+   EXPECT_EQ( ( TNL::Matrices::Multidiagonal< float, TNL::Devices::Cuda, int, false >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< float, [any_device], int, false, [any_allocator] >" ) );
+   EXPECT_EQ( ( TNL::Matrices::Multidiagonal< int,   TNL::Devices::Cuda, int, false >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< int, [any_device], int, false, [any_allocator] >" ) );
+}
+
+template< typename Matrix >
+void test_SetDimensions()
+{
+    using RealType = typename Matrix::RealType;
+    using DeviceType = typename Matrix::DeviceType;
+    using IndexType = typename Matrix::IndexType;
+
+    const IndexType rows = 9;
+    const IndexType cols = 8;
+
+    Matrix m;
+    m.setDimensions( rows, cols );
+
+    EXPECT_EQ( m.getRows(), 9 );
+    EXPECT_EQ( m.getColumns(), 8 );
+}
+
+template< typename Matrix1, typename Matrix2 >
+void test_SetLike()
+{
+    using RealType = typename Matrix1::RealType;
+    using DeviceType = typename Matrix1::DeviceType;
+    using IndexType = typename Matrix1::IndexType;
+
+    const IndexType rows = 8;
+    const IndexType cols = 7;
+
+    Matrix1 m1;
+    m1.reset();
+    m1.setDimensions( rows + 1, cols + 2 );
+
+    Matrix2 m2;
+    m2.reset();
+    m2.setDimensions( rows, cols );
+
+    m1.setLike( m2 );
+
+    EXPECT_EQ( m1.getRows(), m2.getRows() );
+    EXPECT_EQ( m1.getColumns(), m2.getColumns() );
+}
+
+template< typename Matrix >
+void test_GetCompressedRowLengths()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   const IndexType rows = 10;
+   const IndexType cols = 11;
+
+    Matrix m( rows, cols );
+
+    // Insert values into the rows.
+    RealType value = 1;
+
+    for( IndexType i = 0; i < 3; i++ )      // 0th row
+        m.setElement( 0, i, value++ );
+
+    for( IndexType i = 0; i < 3; i++ )      // 1st row
+        m.setElement( 1, i, value++ );
+
+    for( IndexType i = 0; i < 1; i++ )      // 2nd row
+        m.setElement( 2, i, value++ );
+
+    for( IndexType i = 0; i < 2; i++ )      // 3rd row
+        m.setElement( 3, i, value++ );
+
+    for( IndexType i = 0; i < 3; i++ )      // 4th row
+        m.setElement( 4, i, value++ );
+
+    for( IndexType i = 0; i < 4; i++ )      // 5th row
+        m.setElement( 5, i, value++ );
+
+    for( IndexType i = 0; i < 5; i++ )      // 6th row
+        m.setElement( 6, i, value++ );
+
+    for( IndexType i = 0; i < 6; i++ )      // 7th row
+        m.setElement( 7, i, value++ );
+
+    for( IndexType i = 0; i < 7; i++ )      // 8th row
+        m.setElement( 8, i, value++ );
+
+    for( IndexType i = 0; i < 8; i++ )      // 9th row
+        m.setElement( 9, i, value++ );
+
+   typename Matrix::CompressedRowLengthsVector rowLengths;
+   rowLengths = 0;
+   m.getCompressedRowLengths( rowLengths );
+   typename Matrix::CompressedRowLengthsVector correctRowLengths{ 3, 3, 1, 2, 3, 4, 5, 6, 7, 8 };
+   EXPECT_EQ( rowLengths, correctRowLengths );
+}
+
+template< typename Matrix >
+void test_GetRowLength()
+{
+    using RealType = typename Matrix::RealType;
+    using DeviceType = typename Matrix::DeviceType;
+    using IndexType = typename Matrix::IndexType;
+
+    const IndexType rows = 8;
+    const IndexType cols = 7;
+
+    Matrix m;
+    m.reset();
+    m.setDimensions( rows, cols );
+
+    EXPECT_EQ( m.getRowLength( 0 ), 7 );
+    EXPECT_EQ( m.getRowLength( 1 ), 7 );
+    EXPECT_EQ( m.getRowLength( 2 ), 7 );
+    EXPECT_EQ( m.getRowLength( 3 ), 7 );
+    EXPECT_EQ( m.getRowLength( 4 ), 7 );
+    EXPECT_EQ( m.getRowLength( 5 ), 7 );
+    EXPECT_EQ( m.getRowLength( 6 ), 7 );
+    EXPECT_EQ( m.getRowLength( 7 ), 7 );
+}
+
+template< typename Matrix >
+void test_GetNumberOfMatrixElements()
+{
+    using RealType = typename Matrix::RealType;
+    using DeviceType = typename Matrix::DeviceType;
+    using IndexType = typename Matrix::IndexType;
+
+    const IndexType rows = 7;
+    const IndexType cols = 6;
+
+    Matrix m;
+    m.reset();
+    m.setDimensions( rows, cols );
+
+    EXPECT_EQ( m.getNumberOfMatrixElements(), 42 );
+}
+
+template< typename Matrix >
+void test_GetNumberOfNonzeroMatrixElements()
+{
+    using RealType = typename Matrix::RealType;
+    using DeviceType = typename Matrix::DeviceType;
+    using IndexType = typename Matrix::IndexType;
+
+/*
+ * Sets up the following 7x6 dense matrix:
+ *
+ *    /  0  2  3  4  5  6 \
+ *    |  7  8  9 10 11 12 |
+ *    | 13 14 15 16 17 18 |
+ *    | 19 20 21 22 23 24 |
+ *    | 25 26 27 28 29 30 |
+ *    | 31 32 33 34 35 36 |
+ *    \ 37 38 39 40 41  0 /
+ */
+    const IndexType rows = 7;
+    const IndexType cols = 6;
+
+    Matrix m;
+    m.reset();
+    m.setDimensions( rows, cols );
+
+    RealType value = 1;
+    for( IndexType i = 0; i < rows; i++ )
+        for( IndexType j = 0; j < cols; j++ )
+            m.setElement( i, j, value++ );
+
+    m.setElement( 0, 0, 0); // Set the first element of the diagonal to 0.
+    m.setElement( 6, 5, 0); // Set the last element of the diagonal to 0.
+
+    EXPECT_EQ( m.getNumberOfNonzeroMatrixElements(), 40 );
+}
+
+template< typename Matrix >
+void test_Reset()
+{
+    using RealType = typename Matrix::RealType;
+    using DeviceType = typename Matrix::DeviceType;
+    using IndexType = typename Matrix::IndexType;
+
+/*
+ * Sets up the following 5x4 dense matrix:
+ *
+ *    /  0  0  0  0 \
+ *    |  0  0  0  0 |
+ *    |  0  0  0  0 |
+ *    |  0  0  0  0 |
+ *    \  0  0  0  0 /
+ */
+    const IndexType rows = 5;
+    const IndexType cols = 4;
+
+    Matrix m;
+    m.setDimensions( rows, cols );
+
+    m.reset();
+
+    EXPECT_EQ( m.getRows(), 0 );
+    EXPECT_EQ( m.getColumns(), 0 );
+}
+
+template< typename Matrix >
+void test_SetValue()
+{
+    using RealType = typename Matrix::RealType;
+    using DeviceType = typename Matrix::DeviceType;
+    using IndexType = typename Matrix::IndexType;
+/*
+ * Sets up the following 7x6 dense matrix:
+ *
+ *    /  1  2  3  4  5  6 \
+ *    |  7  8  9 10 11 12 |
+ *    | 13 14 15 16 17 18 |
+ *    | 19 20 21 22 23 24 |
+ *    | 25 26 27 28 29 30 |
+ *    | 31 32 33 34 35 36 |
+ *    \ 37 38 39 40 41 42 /
+ */
+    const IndexType rows = 7;
+    const IndexType cols = 6;
+
+    Matrix m;
+    m.reset();
+    m.setDimensions( rows, cols );
+
+    RealType value = 1;
+    for( IndexType i = 0; i < rows; i++ )
+        for( IndexType j = 0; j < cols; j++ )
+            m.setElement( i, j, value++ );
+
+    EXPECT_EQ( m.getElement( 0, 0 ),  1 );
+    EXPECT_EQ( m.getElement( 0, 1 ),  2 );
+    EXPECT_EQ( m.getElement( 0, 2 ),  3 );
+    EXPECT_EQ( m.getElement( 0, 3 ),  4 );
+    EXPECT_EQ( m.getElement( 0, 4 ),  5 );
+    EXPECT_EQ( m.getElement( 0, 5 ),  6 );
+
+    EXPECT_EQ( m.getElement( 1, 0 ),  7 );
+    EXPECT_EQ( m.getElement( 1, 1 ),  8 );
+    EXPECT_EQ( m.getElement( 1, 2 ),  9 );
+    EXPECT_EQ( m.getElement( 1, 3 ), 10 );
+    EXPECT_EQ( m.getElement( 1, 4 ), 11 );
+    EXPECT_EQ( m.getElement( 1, 5 ), 12 );
+
+    EXPECT_EQ( m.getElement( 2, 0 ), 13 );
+    EXPECT_EQ( m.getElement( 2, 1 ), 14 );
+    EXPECT_EQ( m.getElement( 2, 2 ), 15 );
+    EXPECT_EQ( m.getElement( 2, 3 ), 16 );
+    EXPECT_EQ( m.getElement( 2, 4 ), 17 );
+    EXPECT_EQ( m.getElement( 2, 5 ), 18 );
+
+    EXPECT_EQ( m.getElement( 3, 0 ), 19 );
+    EXPECT_EQ( m.getElement( 3, 1 ), 20 );
+    EXPECT_EQ( m.getElement( 3, 2 ), 21 );
+    EXPECT_EQ( m.getElement( 3, 3 ), 22 );
+    EXPECT_EQ( m.getElement( 3, 4 ), 23 );
+    EXPECT_EQ( m.getElement( 3, 5 ), 24 );
+
+    EXPECT_EQ( m.getElement( 4, 0 ), 25 );
+    EXPECT_EQ( m.getElement( 4, 1 ), 26 );
+    EXPECT_EQ( m.getElement( 4, 2 ), 27 );
+    EXPECT_EQ( m.getElement( 4, 3 ), 28 );
+    EXPECT_EQ( m.getElement( 4, 4 ), 29 );
+    EXPECT_EQ( m.getElement( 4, 5 ), 30 );
+
+    EXPECT_EQ( m.getElement( 5, 0 ), 31 );
+    EXPECT_EQ( m.getElement( 5, 1 ), 32 );
+    EXPECT_EQ( m.getElement( 5, 2 ), 33 );
+    EXPECT_EQ( m.getElement( 5, 3 ), 34 );
+    EXPECT_EQ( m.getElement( 5, 4 ), 35 );
+    EXPECT_EQ( m.getElement( 5, 5 ), 36 );
+
+    EXPECT_EQ( m.getElement( 6, 0 ), 37 );
+    EXPECT_EQ( m.getElement( 6, 1 ), 38 );
+    EXPECT_EQ( m.getElement( 6, 2 ), 39 );
+    EXPECT_EQ( m.getElement( 6, 3 ), 40 );
+    EXPECT_EQ( m.getElement( 6, 4 ), 41 );
+    EXPECT_EQ( m.getElement( 6, 5 ), 42 );
+
+    // Set the values of all elements to a certain number
+    m.setValue( 42 );
+
+    EXPECT_EQ( m.getElement( 0, 0 ), 42 );
+    EXPECT_EQ( m.getElement( 0, 1 ), 42 );
+    EXPECT_EQ( m.getElement( 0, 2 ), 42 );
+    EXPECT_EQ( m.getElement( 0, 3 ), 42 );
+    EXPECT_EQ( m.getElement( 0, 4 ), 42 );
+    EXPECT_EQ( m.getElement( 0, 5 ), 42 );
+
+    EXPECT_EQ( m.getElement( 1, 0 ), 42 );
+    EXPECT_EQ( m.getElement( 1, 1 ), 42 );
+    EXPECT_EQ( m.getElement( 1, 2 ), 42 );
+    EXPECT_EQ( m.getElement( 1, 3 ), 42 );
+    EXPECT_EQ( m.getElement( 1, 4 ), 42 );
+    EXPECT_EQ( m.getElement( 1, 5 ), 42 );
+
+    EXPECT_EQ( m.getElement( 2, 0 ), 42 );
+    EXPECT_EQ( m.getElement( 2, 1 ), 42 );
+    EXPECT_EQ( m.getElement( 2, 2 ), 42 );
+    EXPECT_EQ( m.getElement( 2, 3 ), 42 );
+    EXPECT_EQ( m.getElement( 2, 4 ), 42 );
+    EXPECT_EQ( m.getElement( 2, 5 ), 42 );
+
+    EXPECT_EQ( m.getElement( 3, 0 ), 42 );
+    EXPECT_EQ( m.getElement( 3, 1 ), 42 );
+    EXPECT_EQ( m.getElement( 3, 2 ), 42 );
+    EXPECT_EQ( m.getElement( 3, 3 ), 42 );
+    EXPECT_EQ( m.getElement( 3, 4 ), 42 );
+    EXPECT_EQ( m.getElement( 3, 5 ), 42 );
+
+    EXPECT_EQ( m.getElement( 4, 0 ), 42 );
+    EXPECT_EQ( m.getElement( 4, 1 ), 42 );
+    EXPECT_EQ( m.getElement( 4, 2 ), 42 );
+    EXPECT_EQ( m.getElement( 4, 3 ), 42 );
+    EXPECT_EQ( m.getElement( 4, 4 ), 42 );
+    EXPECT_EQ( m.getElement( 4, 5 ), 42 );
+
+    EXPECT_EQ( m.getElement( 5, 0 ), 42 );
+    EXPECT_EQ( m.getElement( 5, 1 ), 42 );
+    EXPECT_EQ( m.getElement( 5, 2 ), 42 );
+    EXPECT_EQ( m.getElement( 5, 3 ), 42 );
+    EXPECT_EQ( m.getElement( 5, 4 ), 42 );
+    EXPECT_EQ( m.getElement( 5, 5 ), 42 );
+
+    EXPECT_EQ( m.getElement( 6, 0 ), 42 );
+    EXPECT_EQ( m.getElement( 6, 1 ), 42 );
+    EXPECT_EQ( m.getElement( 6, 2 ), 42 );
+    EXPECT_EQ( m.getElement( 6, 3 ), 42 );
+    EXPECT_EQ( m.getElement( 6, 4 ), 42 );
+    EXPECT_EQ( m.getElement( 6, 5 ), 42 );
+}
+
+template< typename Matrix >
+void test_SetElement()
+{
+    using RealType = typename Matrix::RealType;
+    using DeviceType = typename Matrix::DeviceType;
+    using IndexType = typename Matrix::IndexType;
+/*
+ * Sets up the following 5x5 dense matrix:
+ *
+ *    /  1  2  3  4  5 \
+ *    |  6  7  8  9 10 |
+ *    | 11 12 13 14 15 |
+ *    | 16 17 18 19 20 |
+ *    \ 21 22 23 24 25 /
+ */
+    const IndexType rows = 5;
+    const IndexType cols = 5;
+
+    Matrix m;
+    m.reset();
+    m.setDimensions( rows, cols );
+
+    RealType value = 1;
+    for( IndexType i = 0; i < rows; i++ )
+        for( IndexType j = 0; j < cols; j++ )
+            m.setElement( i, j, value++ );
+
+    EXPECT_EQ( m.getElement( 0, 0 ),  1 );
+    EXPECT_EQ( m.getElement( 0, 1 ),  2 );
+    EXPECT_EQ( m.getElement( 0, 2 ),  3 );
+    EXPECT_EQ( m.getElement( 0, 3 ),  4 );
+    EXPECT_EQ( m.getElement( 0, 4 ),  5 );
+
+    EXPECT_EQ( m.getElement( 1, 0 ),  6 );
+    EXPECT_EQ( m.getElement( 1, 1 ),  7 );
+    EXPECT_EQ( m.getElement( 1, 2 ),  8 );
+    EXPECT_EQ( m.getElement( 1, 3 ),  9 );
+    EXPECT_EQ( m.getElement( 1, 4 ), 10 );
+
+    EXPECT_EQ( m.getElement( 2, 0 ), 11 );
+    EXPECT_EQ( m.getElement( 2, 1 ), 12 );
+    EXPECT_EQ( m.getElement( 2, 2 ), 13 );
+    EXPECT_EQ( m.getElement( 2, 3 ), 14 );
+    EXPECT_EQ( m.getElement( 2, 4 ), 15 );
+
+    EXPECT_EQ( m.getElement( 3, 0 ), 16 );
+    EXPECT_EQ( m.getElement( 3, 1 ), 17 );
+    EXPECT_EQ( m.getElement( 3, 2 ), 18 );
+    EXPECT_EQ( m.getElement( 3, 3 ), 19 );
+    EXPECT_EQ( m.getElement( 3, 4 ), 20 );
+
+    EXPECT_EQ( m.getElement( 4, 0 ), 21 );
+    EXPECT_EQ( m.getElement( 4, 1 ), 22 );
+    EXPECT_EQ( m.getElement( 4, 2 ), 23 );
+    EXPECT_EQ( m.getElement( 4, 3 ), 24 );
+    EXPECT_EQ( m.getElement( 4, 4 ), 25 );
+}
+
+template< typename Matrix >
+void test_AddElement()
+{
+    using RealType = typename Matrix::RealType;
+    using DeviceType = typename Matrix::DeviceType;
+    using IndexType = typename Matrix::IndexType;
+/*
+ * Sets up the following 6x5 dense matrix:
+ *
+ *    /  1  2  3  4  5 \
+ *    |  6  7  8  9 10 |
+ *    | 11 12 13 14 15 |
+ *    | 16 17 18 19 20 |
+ *    | 21 22 23 24 25 |
+ *    \ 26 27 28 29 30 /
+ */
+    const IndexType rows = 6;
+    const IndexType cols = 5;
+
+    Matrix m;
+    m.reset();
+    m.setDimensions( rows, cols );
+
+    RealType value = 1;
+    for( IndexType i = 0; i < rows; i++ )
+        for( IndexType j = 0; j < cols; j++ )
+            m.setElement( i, j, value++ );
+
+    // Check the added elements
+    EXPECT_EQ( m.getElement( 0, 0 ),  1 );
+    EXPECT_EQ( m.getElement( 0, 1 ),  2 );
+    EXPECT_EQ( m.getElement( 0, 2 ),  3 );
+    EXPECT_EQ( m.getElement( 0, 3 ),  4 );
+    EXPECT_EQ( m.getElement( 0, 4 ),  5 );
+
+    EXPECT_EQ( m.getElement( 1, 0 ),  6 );
+    EXPECT_EQ( m.getElement( 1, 1 ),  7 );
+    EXPECT_EQ( m.getElement( 1, 2 ),  8 );
+    EXPECT_EQ( m.getElement( 1, 3 ),  9 );
+    EXPECT_EQ( m.getElement( 1, 4 ), 10 );
+
+    EXPECT_EQ( m.getElement( 2, 0 ), 11 );
+    EXPECT_EQ( m.getElement( 2, 1 ), 12 );
+    EXPECT_EQ( m.getElement( 2, 2 ), 13 );
+    EXPECT_EQ( m.getElement( 2, 3 ), 14 );
+    EXPECT_EQ( m.getElement( 2, 4 ), 15 );
+
+    EXPECT_EQ( m.getElement( 3, 0 ), 16 );
+    EXPECT_EQ( m.getElement( 3, 1 ), 17 );
+    EXPECT_EQ( m.getElement( 3, 2 ), 18 );
+    EXPECT_EQ( m.getElement( 3, 3 ), 19 );
+    EXPECT_EQ( m.getElement( 3, 4 ), 20 );
+
+    EXPECT_EQ( m.getElement( 4, 0 ), 21 );
+    EXPECT_EQ( m.getElement( 4, 1 ), 22 );
+    EXPECT_EQ( m.getElement( 4, 2 ), 23 );
+    EXPECT_EQ( m.getElement( 4, 3 ), 24 );
+    EXPECT_EQ( m.getElement( 4, 4 ), 25 );
+
+    EXPECT_EQ( m.getElement( 5, 0 ), 26 );
+    EXPECT_EQ( m.getElement( 5, 1 ), 27 );
+    EXPECT_EQ( m.getElement( 5, 2 ), 28 );
+    EXPECT_EQ( m.getElement( 5, 3 ), 29 );
+    EXPECT_EQ( m.getElement( 5, 4 ), 30 );
+
+    // Add new elements to the old elements with a multiplying factor applied to the old elements.
+/*
+ * The following setup results in the following 6x5 dense matrix:
+ *
+ *    /  3  6  9 12 15 \
+ *    | 18 21 24 27 30 |
+ *    | 33 36 39 42 45 |
+ *    | 48 51 54 57 60 |
+ *    | 63 66 69 72 75 |
+ *    \ 78 81 84 87 90 /
+ */
+    RealType newValue = 1;
+    RealType multiplicator = 2;
+    for( IndexType i = 0; i < rows; i++ )
+        for( IndexType j = 0; j < cols; j++ )
+            m.addElement( i, j, newValue++, multiplicator );
+
+    EXPECT_EQ( m.getElement( 0, 0 ),  3 );
+    EXPECT_EQ( m.getElement( 0, 1 ),  6 );
+    EXPECT_EQ( m.getElement( 0, 2 ),  9 );
+    EXPECT_EQ( m.getElement( 0, 3 ), 12 );
+    EXPECT_EQ( m.getElement( 0, 4 ), 15 );
+
+    EXPECT_EQ( m.getElement( 1, 0 ), 18 );
+    EXPECT_EQ( m.getElement( 1, 1 ), 21 );
+    EXPECT_EQ( m.getElement( 1, 2 ), 24 );
+    EXPECT_EQ( m.getElement( 1, 3 ), 27 );
+    EXPECT_EQ( m.getElement( 1, 4 ), 30 );
+
+    EXPECT_EQ( m.getElement( 2, 0 ), 33 );
+    EXPECT_EQ( m.getElement( 2, 1 ), 36 );
+    EXPECT_EQ( m.getElement( 2, 2 ), 39 );
+    EXPECT_EQ( m.getElement( 2, 3 ), 42 );
+    EXPECT_EQ( m.getElement( 2, 4 ), 45 );
+
+    EXPECT_EQ( m.getElement( 3, 0 ), 48 );
+    EXPECT_EQ( m.getElement( 3, 1 ), 51 );
+    EXPECT_EQ( m.getElement( 3, 2 ), 54 );
+    EXPECT_EQ( m.getElement( 3, 3 ), 57 );
+    EXPECT_EQ( m.getElement( 3, 4 ), 60 );
+
+    EXPECT_EQ( m.getElement( 4, 0 ), 63 );
+    EXPECT_EQ( m.getElement( 4, 1 ), 66 );
+    EXPECT_EQ( m.getElement( 4, 2 ), 69 );
+    EXPECT_EQ( m.getElement( 4, 3 ), 72 );
+    EXPECT_EQ( m.getElement( 4, 4 ), 75 );
+
+    EXPECT_EQ( m.getElement( 5, 0 ), 78 );
+    EXPECT_EQ( m.getElement( 5, 1 ), 81 );
+    EXPECT_EQ( m.getElement( 5, 2 ), 84 );
+    EXPECT_EQ( m.getElement( 5, 3 ), 87 );
+    EXPECT_EQ( m.getElement( 5, 4 ), 90 );
+}
+
+template< typename Matrix >
+void test_SetRow()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /*
+    * Sets up the following 3x7 dense matrix:
+    *
+    *    /  1  2  3  4  5  6  7 \
+    *    |  8  9 10 11 12 13 14 |
+    *    \ 15 16 17 18 19 20 21 /
+    */
+   const IndexType rows = 3;
+   const IndexType cols = 7;
+
+   Matrix m;
+   m.reset();
+   m.setDimensions( rows, cols );
+
+   RealType value = 1;
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < cols; j++ )
+         m.setElement( i, j, value++ );
+
+   auto matrix_view = m.getView();
+   auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
+      RealType values[ 3 ][ 5 ] {
+         { 11, 11, 11, 11, 11 },
+         { 22, 22, 22, 22, 22 },
+         { 33, 33, 33, 33, 33 } };
+      IndexType columnIndexes[ 3 ][ 5 ] {
+         { 0, 1, 2, 3, 4 },
+         { 0, 1, 2, 3, 4 },
+         { 2, 3, 4, 5, 6 } };
+      auto row = matrix_view.getRow( rowIdx );
+      for( IndexType i = 0; i < 5; i++ )
+        row.setElement( i, values[ rowIdx ][ i ] );
+   };
+   TNL::Algorithms::ParallelFor< DeviceType >::exec( 0, 3, f );
+
+   EXPECT_EQ( m.getElement( 0, 0 ), 11 );
+   EXPECT_EQ( m.getElement( 0, 1 ), 11 );
+   EXPECT_EQ( m.getElement( 0, 2 ), 11 );
+   EXPECT_EQ( m.getElement( 0, 3 ), 11 );
+   EXPECT_EQ( m.getElement( 0, 4 ), 11 );
+   EXPECT_EQ( m.getElement( 0, 5 ),  6 );
+   EXPECT_EQ( m.getElement( 0, 6 ),  7 );
+
+   EXPECT_EQ( m.getElement( 1, 0 ), 22 );
+   EXPECT_EQ( m.getElement( 1, 1 ), 22 );
+   EXPECT_EQ( m.getElement( 1, 2 ), 22 );
+   EXPECT_EQ( m.getElement( 1, 3 ), 22 );
+   EXPECT_EQ( m.getElement( 1, 4 ), 22 );
+   EXPECT_EQ( m.getElement( 1, 5 ), 13 );
+   EXPECT_EQ( m.getElement( 1, 6 ), 14 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ), 15 );
+   EXPECT_EQ( m.getElement( 2, 1 ), 16 );
+   EXPECT_EQ( m.getElement( 2, 2 ), 33 );
+   EXPECT_EQ( m.getElement( 2, 3 ), 33 );
+   EXPECT_EQ( m.getElement( 2, 4 ), 33 );
+   EXPECT_EQ( m.getElement( 2, 5 ), 33 );
+   EXPECT_EQ( m.getElement( 2, 6 ), 33 );
+}
+
+template< typename Matrix >
+void test_AddRow()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+   /*
+    * Sets up the following 6x5 dense matrix:
+    *
+    *    /  1  2  3  4  5 \
+    *    |  6  7  8  9 10 |
+    *    | 11 12 13 14 15 |
+    *    | 16 17 18 19 20 |
+    *    | 21 22 23 24 25 |
+    *    \ 26 27 28 29 30 /
+    */
+
+   const IndexType rows = 6;
+   const IndexType cols = 5;
+
+   Matrix m( rows, cols );
+
+   RealType value = 1;
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < cols; j++ )
+         m.setElement( i, j, value++ );
+
+   // Check the added elements
+   EXPECT_EQ( m.getElement( 0, 0 ),  1 );
+   EXPECT_EQ( m.getElement( 0, 1 ),  2 );
+   EXPECT_EQ( m.getElement( 0, 2 ),  3 );
+   EXPECT_EQ( m.getElement( 0, 3 ),  4 );
+   EXPECT_EQ( m.getElement( 0, 4 ),  5 );
+
+   EXPECT_EQ( m.getElement( 1, 0 ),  6 );
+   EXPECT_EQ( m.getElement( 1, 1 ),  7 );
+   EXPECT_EQ( m.getElement( 1, 2 ),  8 );
+   EXPECT_EQ( m.getElement( 1, 3 ),  9 );
+   EXPECT_EQ( m.getElement( 1, 4 ), 10 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ), 11 );
+   EXPECT_EQ( m.getElement( 2, 1 ), 12 );
+   EXPECT_EQ( m.getElement( 2, 2 ), 13 );
+   EXPECT_EQ( m.getElement( 2, 3 ), 14 );
+   EXPECT_EQ( m.getElement( 2, 4 ), 15 );
+
+   EXPECT_EQ( m.getElement( 3, 0 ), 16 );
+   EXPECT_EQ( m.getElement( 3, 1 ), 17 );
+   EXPECT_EQ( m.getElement( 3, 2 ), 18 );
+   EXPECT_EQ( m.getElement( 3, 3 ), 19 );
+   EXPECT_EQ( m.getElement( 3, 4 ), 20 );
+
+   EXPECT_EQ( m.getElement( 4, 0 ), 21 );
+   EXPECT_EQ( m.getElement( 4, 1 ), 22 );
+   EXPECT_EQ( m.getElement( 4, 2 ), 23 );
+   EXPECT_EQ( m.getElement( 4, 3 ), 24 );
+   EXPECT_EQ( m.getElement( 4, 4 ), 25 );
+
+   EXPECT_EQ( m.getElement( 5, 0 ), 26 );
+   EXPECT_EQ( m.getElement( 5, 1 ), 27 );
+   EXPECT_EQ( m.getElement( 5, 2 ), 28 );
+   EXPECT_EQ( m.getElement( 5, 3 ), 29 );
+   EXPECT_EQ( m.getElement( 5, 4 ), 30 );
+
+   // Add new elements to the old elements with a multiplying factor applied to the old elements.
+   /*
+    * The following setup results in the following 6x5 sparse matrix:
+    *
+    *    /  3  6  9 12 15 \
+    *    | 18 21 24 27 30 |
+    *    | 33 36 39 42 45 |
+    *    | 48 51 54 57 60 |
+    *    | 63 66 69 72 75 |
+    *    \ 78 81 84 87 90 /
+    */
+
+   auto matrix_view = m.getView();
+   auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
+      RealType values[ 6 ][ 5 ] {
+         { 11, 11, 11, 11, 0 },
+         { 22, 22, 22, 22, 0 },
+         { 33, 33, 33, 33, 0 },
+         { 44, 44, 44, 44, 0 },
+         { 55, 55, 55, 55, 0 },
+         { 66, 66, 66, 66, 0 } };
+      auto row = matrix_view.getRow( rowIdx );
+      for( IndexType i = 0; i < 5; i++ )
+      {
+         RealType& val = row.getValue( i );
+         val = rowIdx * val + values[ rowIdx ][ i ];
+      }
+   };
+   TNL::Algorithms::ParallelFor< DeviceType >::exec( 0, 6, f );
+
+
+    EXPECT_EQ( m.getElement( 0, 0 ),  11 );
+    EXPECT_EQ( m.getElement( 0, 1 ),  11 );
+    EXPECT_EQ( m.getElement( 0, 2 ),  11 );
+    EXPECT_EQ( m.getElement( 0, 3 ),  11 );
+    EXPECT_EQ( m.getElement( 0, 4 ),   0 );
+
+    EXPECT_EQ( m.getElement( 1, 0 ),  28 );
+    EXPECT_EQ( m.getElement( 1, 1 ),  29 );
+    EXPECT_EQ( m.getElement( 1, 2 ),  30 );
+    EXPECT_EQ( m.getElement( 1, 3 ),  31 );
+    EXPECT_EQ( m.getElement( 1, 4 ),  10 );
+
+    EXPECT_EQ( m.getElement( 2, 0 ),  55 );
+    EXPECT_EQ( m.getElement( 2, 1 ),  57 );
+    EXPECT_EQ( m.getElement( 2, 2 ),  59 );
+    EXPECT_EQ( m.getElement( 2, 3 ),  61 );
+    EXPECT_EQ( m.getElement( 2, 4 ),  30 );
+
+    EXPECT_EQ( m.getElement( 3, 0 ),  92 );
+    EXPECT_EQ( m.getElement( 3, 1 ),  95 );
+    EXPECT_EQ( m.getElement( 3, 2 ),  98 );
+    EXPECT_EQ( m.getElement( 3, 3 ), 101 );
+    EXPECT_EQ( m.getElement( 3, 4 ),  60 );
+
+    EXPECT_EQ( m.getElement( 4, 0 ), 139 );
+    EXPECT_EQ( m.getElement( 4, 1 ), 143 );
+    EXPECT_EQ( m.getElement( 4, 2 ), 147 );
+    EXPECT_EQ( m.getElement( 4, 3 ), 151 );
+    EXPECT_EQ( m.getElement( 4, 4 ), 100 );
+
+    EXPECT_EQ( m.getElement( 5, 0 ), 196 );
+    EXPECT_EQ( m.getElement( 5, 1 ), 201 );
+    EXPECT_EQ( m.getElement( 5, 2 ), 206 );
+    EXPECT_EQ( m.getElement( 5, 3 ), 211 );
+    EXPECT_EQ( m.getElement( 5, 4 ), 150 );
+}
+
+template< typename Matrix >
+void test_VectorProduct()
+{
+    using RealType = typename Matrix::RealType;
+    using DeviceType = typename Matrix::DeviceType;
+    using IndexType = typename Matrix::IndexType;
+/*
+ * Sets up the following 5x4 dense matrix:
+ *
+ *    /  1  2  3  4 \
+ *    |  5  6  7  8 |
+ *    |  9 10 11 12 |
+ *    | 13 14 15 16 |
+ *    \ 17 18 19 20 /
+ */
+    const IndexType rows = 5;
+    const IndexType cols = 4;
+
+    Matrix m;
+    m.reset();
+    m.setDimensions( rows, cols );
+
+    RealType value = 1;
+    for( IndexType i = 0; i < rows; i++ )
+        for( IndexType j = 0; j < cols; j++)
+            m.setElement( i, j, value++ );
+
+    using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >;
+
+    VectorType inVector;
+    inVector.setSize( 4 );
+    for( IndexType i = 0; i < inVector.getSize(); i++ )
+        inVector.setElement( i, 2 );
+
+    VectorType outVector;
+    outVector.setSize( 5 );
+    for( IndexType j = 0; j < outVector.getSize(); j++ )
+        outVector.setElement( j, 0 );
+
+
+    m.vectorProduct( inVector, outVector);
+
+    EXPECT_EQ( outVector.getElement( 0 ),  20 );
+    EXPECT_EQ( outVector.getElement( 1 ),  52 );
+    EXPECT_EQ( outVector.getElement( 2 ),  84 );
+    EXPECT_EQ( outVector.getElement( 3 ), 116 );
+    EXPECT_EQ( outVector.getElement( 4 ), 148 );
+}
+
+template< typename Matrix >
+void test_AddMatrix()
+{
+    using RealType = typename Matrix::RealType;
+    using DeviceType = typename Matrix::DeviceType;
+    using IndexType = typename Matrix::IndexType;
+/*
+ * Sets up the following 5x4 dense matrix:
+ *
+ *    /  1  2  3  4 \
+ *    |  5  6  7  8 |
+ *    |  9 10 11 12 |
+ *    | 13 14 15 16 |
+ *    \ 17 18 19 20 /
+ */
+    const IndexType rows = 5;
+    const IndexType cols = 4;
+
+    Matrix m;
+    m.reset();
+    m.setDimensions( rows, cols );
+
+    RealType value = 1;
+    for( IndexType i = 0; i < rows; i++ )
+        for( IndexType j = 0; j < cols; j++)
+            m.setElement( i, j, value++ );
+
+/*
+ * Sets up the following 5x4 dense matrix:
+ *
+ *    /  1  2  3  4 \
+ *    |  5  6  7  8 |
+ *    |  9 10 11 12 |
+ *    | 13 14 15 16 |
+ *    \ 17 18 19 20 /
+ */
+
+    Matrix m2;
+    m2.reset();
+    m2.setDimensions( rows, cols );
+
+    RealType newValue = 1;
+    for( IndexType i = 0; i < rows; i++ )
+        for( IndexType j = 0; j < cols; j++)
+            m2.setElement( i, j, newValue++ );
+
+    /*
+ * Sets up the following 5x4 dense matrix:
+ *
+ *    /  1  2  3  4 \
+ *    |  5  6  7  8 |
+ *    |  9 10 11 12 |
+ *    | 13 14 15 16 |
+ *    \ 17 18 19 20 /
+ */
+
+    Matrix mResult;
+    mResult.reset();
+    mResult.setDimensions( rows, cols );
+
+    mResult = m;
+
+    RealType matrixMultiplicator = 2;
+    RealType thisMatrixMultiplicator = 1;
+
+    mResult.addMatrix( m2, matrixMultiplicator, thisMatrixMultiplicator );
+
+    EXPECT_EQ( mResult.getElement( 0, 0 ), matrixMultiplicator * m2.getElement( 0, 0 ) + thisMatrixMultiplicator * m.getElement( 0, 0 ) );
+    EXPECT_EQ( mResult.getElement( 0, 1 ), matrixMultiplicator * m2.getElement( 0, 1 ) + thisMatrixMultiplicator * m.getElement( 0, 1 ) );
+    EXPECT_EQ( mResult.getElement( 0, 2 ), matrixMultiplicator * m2.getElement( 0, 2 ) + thisMatrixMultiplicator * m.getElement( 0, 2 ) );
+    EXPECT_EQ( mResult.getElement( 0, 3 ), matrixMultiplicator * m2.getElement( 0, 3 ) + thisMatrixMultiplicator * m.getElement( 0, 3 ) );
+
+    EXPECT_EQ( mResult.getElement( 1, 0 ), matrixMultiplicator * m2.getElement( 1, 0 ) + thisMatrixMultiplicator * m.getElement( 1, 0 ) );
+    EXPECT_EQ( mResult.getElement( 1, 1 ), matrixMultiplicator * m2.getElement( 1, 1 ) + thisMatrixMultiplicator * m.getElement( 1, 1 ) );
+    EXPECT_EQ( mResult.getElement( 1, 2 ), matrixMultiplicator * m2.getElement( 1, 2 ) + thisMatrixMultiplicator * m.getElement( 1, 2 ) );
+    EXPECT_EQ( mResult.getElement( 1, 3 ), matrixMultiplicator * m2.getElement( 1, 3 ) + thisMatrixMultiplicator * m.getElement( 1, 3 ) );
+
+    EXPECT_EQ( mResult.getElement( 2, 0 ), matrixMultiplicator * m2.getElement( 2, 0 ) + thisMatrixMultiplicator * m.getElement( 2, 0 ) );
+    EXPECT_EQ( mResult.getElement( 2, 1 ), matrixMultiplicator * m2.getElement( 2, 1 ) + thisMatrixMultiplicator * m.getElement( 2, 1 ) );
+    EXPECT_EQ( mResult.getElement( 2, 2 ), matrixMultiplicator * m2.getElement( 2, 2 ) + thisMatrixMultiplicator * m.getElement( 2, 2 ) );
+    EXPECT_EQ( mResult.getElement( 2, 3 ), matrixMultiplicator * m2.getElement( 2, 3 ) + thisMatrixMultiplicator * m.getElement( 2, 3 ) );
+
+    EXPECT_EQ( mResult.getElement( 3, 0 ), matrixMultiplicator * m2.getElement( 3, 0 ) + thisMatrixMultiplicator * m.getElement( 3, 0 ) );
+    EXPECT_EQ( mResult.getElement( 3, 1 ), matrixMultiplicator * m2.getElement( 3, 1 ) + thisMatrixMultiplicator * m.getElement( 3, 1 ) );
+    EXPECT_EQ( mResult.getElement( 3, 2 ), matrixMultiplicator * m2.getElement( 3, 2 ) + thisMatrixMultiplicator * m.getElement( 3, 2 ) );
+    EXPECT_EQ( mResult.getElement( 3, 3 ), matrixMultiplicator * m2.getElement( 3, 3 ) + thisMatrixMultiplicator * m.getElement( 3, 3 ) );
+
+    EXPECT_EQ( mResult.getElement( 4, 0 ), matrixMultiplicator * m2.getElement( 4, 0 ) + thisMatrixMultiplicator * m.getElement( 4, 0 ) );
+    EXPECT_EQ( mResult.getElement( 4, 1 ), matrixMultiplicator * m2.getElement( 4, 1 ) + thisMatrixMultiplicator * m.getElement( 4, 1 ) );
+    EXPECT_EQ( mResult.getElement( 4, 2 ), matrixMultiplicator * m2.getElement( 4, 2 ) + thisMatrixMultiplicator * m.getElement( 4, 2 ) );
+    EXPECT_EQ( mResult.getElement( 4, 3 ), matrixMultiplicator * m2.getElement( 4, 3 ) + thisMatrixMultiplicator * m.getElement( 4, 3 ) );
+
+    EXPECT_EQ( mResult.getElement( 0, 0 ),  3 );
+    EXPECT_EQ( mResult.getElement( 0, 1 ),  6 );
+    EXPECT_EQ( mResult.getElement( 0, 2 ),  9 );
+    EXPECT_EQ( mResult.getElement( 0, 3 ), 12 );
+
+    EXPECT_EQ( mResult.getElement( 1, 0 ), 15 );
+    EXPECT_EQ( mResult.getElement( 1, 1 ), 18 );
+    EXPECT_EQ( mResult.getElement( 1, 2 ), 21 );
+    EXPECT_EQ( mResult.getElement( 1, 3 ), 24 );
+
+    EXPECT_EQ( mResult.getElement( 2, 0 ), 27 );
+    EXPECT_EQ( mResult.getElement( 2, 1 ), 30 );
+    EXPECT_EQ( mResult.getElement( 2, 2 ), 33 );
+    EXPECT_EQ( mResult.getElement( 2, 3 ), 36 );
+
+    EXPECT_EQ( mResult.getElement( 3, 0 ), 39 );
+    EXPECT_EQ( mResult.getElement( 3, 1 ), 42 );
+    EXPECT_EQ( mResult.getElement( 3, 2 ), 45 );
+    EXPECT_EQ( mResult.getElement( 3, 3 ), 48 );
+
+    EXPECT_EQ( mResult.getElement( 4, 0 ), 51 );
+    EXPECT_EQ( mResult.getElement( 4, 1 ), 54 );
+    EXPECT_EQ( mResult.getElement( 4, 2 ), 57 );
+    EXPECT_EQ( mResult.getElement( 4, 3 ), 60 );
+}
+
+template< typename Matrix >
+void test_GetMatrixProduct()
+{
+    using RealType = typename Matrix::RealType;
+    using DeviceType = typename Matrix::DeviceType;
+    using IndexType = typename Matrix::IndexType;
+/*
+ * Sets up the following 5x4 dense matrix:
+ *
+ *    /  1  2  3  4 \
+ *    |  5  6  7  8 |
+ *    |  9 10 11 12 |
+ *    | 13 14 15 16 |
+ *    \ 17 18 19 20 /
+ */
+    const IndexType leftRows = 5;
+    const IndexType leftCols = 4;
+
+    Matrix leftMatrix;
+    leftMatrix.reset();
+    leftMatrix.setDimensions( leftRows, leftCols );
+
+    RealType value = 1;
+    for( IndexType i = 0; i < leftRows; i++ )
+        for( IndexType j = 0; j < leftCols; j++)
+            leftMatrix.setElement( i, j, value++ );
+
+/*
+ * Sets up the following 4x5 dense matrix:
+ *
+ *    /  1  2  3  4  5 \
+ *    |  6  7  8  9 10 |
+ *    | 11 12 13 14 15 |
+ *    \ 16 17 18 19 20 /
+ */
+    const IndexType rightRows = 4;
+    const IndexType rightCols = 5;
+
+    Matrix rightMatrix;
+    rightMatrix.reset();
+    rightMatrix.setDimensions( rightRows, rightCols );
+
+    RealType newValue = 1;
+    for( IndexType i = 0; i < rightRows; i++ )
+        for( IndexType j = 0; j < rightCols; j++)
+            rightMatrix.setElement( i, j, newValue++ );
+
+/*
+ * Sets up the following 5x5 resulting dense matrix:
+ *
+ *    /  0  0  0  0 \
+ *    |  0  0  0  0 |
+ *    |  0  0  0  0 |
+ *    |  0  0  0  0 |
+ *    \  0  0  0  0 /
+ */
+
+    Matrix mResult;
+    mResult.reset();
+    mResult.setDimensions( leftRows, rightCols );
+    mResult.setValue( 0 );
+
+    RealType leftMatrixMultiplicator = 1;
+    RealType rightMatrixMultiplicator = 2;
+/*
+ *      /  1  2  3  4 \                            /  220  240  260  280  300 \
+ *      |  5  6  7  8 |       /  1  2  3  4  5 \   |  492  544  596  648  700 |
+ *  1 * |  9 10 11 12 | * 2 * |  6  7  8  9 10 | = |  764  848  932 1016 1100 |
+ *      | 13 14 15 16 |       | 11 12 13 14 15 |   | 1036 1152 1268 1384 1500 |
+ *      \ 17 18 19 20 /       \ 16 17 18 19 20 /   \ 1308 1456 1604 1752 1900 /
+ */
+
+    mResult.getMatrixProduct( leftMatrix, rightMatrix, leftMatrixMultiplicator, rightMatrixMultiplicator );
+
+    EXPECT_EQ( mResult.getElement( 0, 0 ),  220 );
+    EXPECT_EQ( mResult.getElement( 0, 1 ),  240 );
+    EXPECT_EQ( mResult.getElement( 0, 2 ),  260 );
+    EXPECT_EQ( mResult.getElement( 0, 3 ),  280 );
+    EXPECT_EQ( mResult.getElement( 0, 4 ),  300 );
+
+    EXPECT_EQ( mResult.getElement( 1, 0 ),  492 );
+    EXPECT_EQ( mResult.getElement( 1, 1 ),  544 );
+    EXPECT_EQ( mResult.getElement( 1, 2 ),  596 );
+    EXPECT_EQ( mResult.getElement( 1, 3 ),  648 );
+    EXPECT_EQ( mResult.getElement( 1, 4 ),  700 );
+
+    EXPECT_EQ( mResult.getElement( 2, 0 ),  764 );
+    EXPECT_EQ( mResult.getElement( 2, 1 ),  848 );
+    EXPECT_EQ( mResult.getElement( 2, 2 ),  932 );
+    EXPECT_EQ( mResult.getElement( 2, 3 ), 1016 );
+    EXPECT_EQ( mResult.getElement( 2, 4 ), 1100 );
+
+    EXPECT_EQ( mResult.getElement( 3, 0 ), 1036 );
+    EXPECT_EQ( mResult.getElement( 3, 1 ), 1152 );
+    EXPECT_EQ( mResult.getElement( 3, 2 ), 1268 );
+    EXPECT_EQ( mResult.getElement( 3, 3 ), 1384 );
+    EXPECT_EQ( mResult.getElement( 3, 4 ), 1500 );
+
+    EXPECT_EQ( mResult.getElement( 4, 0 ), 1308 );
+    EXPECT_EQ( mResult.getElement( 4, 1 ), 1456 );
+    EXPECT_EQ( mResult.getElement( 4, 2 ), 1604 );
+    EXPECT_EQ( mResult.getElement( 4, 3 ), 1752 );
+    EXPECT_EQ( mResult.getElement( 4, 4 ), 1900 );
+}
+
+template< typename Matrix >
+void test_GetTransposition()
+{
+    using RealType = typename Matrix::RealType;
+    using DeviceType = typename Matrix::DeviceType;
+    using IndexType = typename Matrix::IndexType;
+/*
+ * Sets up the following 3x2 dense matrix:
+ *
+ *    /  1  2 \
+ *    |  3  4 |
+ *    \  5  6 /
+ */
+    const IndexType rows = 3;
+    const IndexType cols = 2;
+
+    Matrix m;
+    m.reset();
+    m.setDimensions( rows, cols );
+
+    RealType value = 1;
+    for( IndexType i = 0; i < rows; i++ )
+        for( IndexType j = 0; j < cols; j++ )
+            m.setElement( i, j, value++ );
+
+    m.print( std::cout );
+
+/*
+ * Sets up the following 2x3 dense matrix:
+ *
+ *    /  0  0  0 \
+ *    \  0  0  0 /
+ */
+    Matrix mTransposed;
+    mTransposed.reset();
+    mTransposed.setDimensions( cols, rows );
+
+    mTransposed.print( std::cout );
+
+    RealType matrixMultiplicator = 1;
+
+    mTransposed.getTransposition( m, matrixMultiplicator );
+
+    mTransposed.print( std::cout );
+
+/*
+ * Should result in the following 2x3 dense matrix:
+ *
+ *    /  1  3  5 \
+ *    \  2  4  6 /
+ */
+
+    EXPECT_EQ( mTransposed.getElement( 0, 0 ), 1 );
+    EXPECT_EQ( mTransposed.getElement( 0, 1 ), 3 );
+    EXPECT_EQ( mTransposed.getElement( 0, 2 ), 5 );
+
+    EXPECT_EQ( mTransposed.getElement( 1, 0 ), 2 );
+    EXPECT_EQ( mTransposed.getElement( 1, 1 ), 4 );
+    EXPECT_EQ( mTransposed.getElement( 1, 2 ), 6 );
+}
+
+
+template< typename Matrix >
+void test_PerformSORIteration()
+{
+    using RealType = typename Matrix::RealType;
+    using DeviceType = typename Matrix::DeviceType;
+    using IndexType = typename Matrix::IndexType;
+/*
+ * Sets up the following 4x4 dense matrix:
+ *
+ *    /  4  1  1  1 \
+ *    |  1  4  1  1 |
+ *    |  1  1  4  1 |
+ *    \  1  1  1  4 /
+ */
+    const IndexType rows = 4;
+    const IndexType cols = 4;
+
+    Matrix m;
+    m.reset();
+    m.setDimensions( rows, cols );
+
+    m.setElement( 0, 0, 4.0 );        // 0th row
+    m.setElement( 0, 1, 1.0 );
+    m.setElement( 0, 2, 1.0 );
+    m.setElement( 0, 3, 1.0 );
+
+    m.setElement( 1, 0, 1.0 );        // 1st row
+    m.setElement( 1, 1, 4.0 );
+    m.setElement( 1, 2, 1.0 );
+    m.setElement( 1, 3, 1.0 );
+
+    m.setElement( 2, 0, 1.0 );
+    m.setElement( 2, 1, 1.0 );        // 2nd row
+    m.setElement( 2, 2, 4.0 );
+    m.setElement( 2, 3, 1.0 );
+
+    m.setElement( 3, 0, 1.0 );        // 3rd row
+    m.setElement( 3, 1, 1.0 );
+    m.setElement( 3, 2, 1.0 );
+    m.setElement( 3, 3, 4.0 );
+
+    RealType bVector [ 4 ] = { 1.0, 1.0, 1.0, 1.0 };
+    RealType xVector [ 4 ] = { 1.0, 1.0, 1.0, 1.0 };
+
+    IndexType row = 0;
+    RealType omega = 1;
+
+    m.performSORIteration( bVector, row++, xVector, omega);
+
+    EXPECT_EQ( xVector[ 0 ], -0.5 );
+    EXPECT_EQ( xVector[ 1 ],  1.0 );
+    EXPECT_EQ( xVector[ 2 ],  1.0 );
+    EXPECT_EQ( xVector[ 3 ],  1.0 );
+
+    m.performSORIteration( bVector, row++, xVector, omega);
+
+    EXPECT_EQ( xVector[ 0 ], -0.5 );
+    EXPECT_EQ( xVector[ 1 ], -0.125 );
+    EXPECT_EQ( xVector[ 2 ],  1.0 );
+    EXPECT_EQ( xVector[ 3 ],  1.0 );
+
+    m.performSORIteration( bVector, row++, xVector, omega);
+
+    EXPECT_EQ( xVector[ 0 ], -0.5 );
+    EXPECT_EQ( xVector[ 1 ], -0.125 );
+    EXPECT_EQ( xVector[ 2 ],  0.15625 );
+    EXPECT_EQ( xVector[ 3 ],  1.0 );
+
+    m.performSORIteration( bVector, row++, xVector, omega);
+
+    EXPECT_EQ( xVector[ 0 ], -0.5 );
+    EXPECT_EQ( xVector[ 1 ], -0.125 );
+    EXPECT_EQ( xVector[ 2 ], 0.15625 );
+    EXPECT_EQ( xVector[ 3 ], 0.3671875 );
+}
+
+template< typename Matrix >
+void test_AssignmentOperator()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   using MultidiagonalHost = TNL::Matrices::Multidiagonal< RealType, TNL::Devices::Host, IndexType >;
+   using MultidiagonalCuda = TNL::Matrices::Multidiagonal< RealType, TNL::Devices::Cuda, IndexType >;
+
+   const IndexType rows( 10 ), columns( 10 );
+   MultidiagonalHost hostMatrix( rows, columns );
+   for( IndexType i = 0; i < columns; i++ )
+      for( IndexType j = 0; j <= i; j++ )
+         hostMatrix.setElement( i, j,  i + j );
+
+   Matrix matrix( rows, columns );
+   matrix.getValues() = 0.0;
+   matrix = hostMatrix;
+   for( IndexType i = 0; i < columns; i++ )
+      for( IndexType j = 0; j < rows; j++ )
+      {
+         if( j > i )
+            EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
+         else
+            EXPECT_EQ( matrix.getElement( i, j ), i + j );
+      }
+
+#ifdef HAVE_CUDA
+   MultidiagonalCuda cudaMatrix( rows, columns );
+   for( IndexType i = 0; i < columns; i++ )
+      for( IndexType j = 0; j <= i; j++ )
+         cudaMatrix.setElement( i, j, i + j );
+
+   matrix.getValues() = 0.0;
+   matrix = cudaMatrix;
+   for( IndexType i = 0; i < columns; i++ )
+      for( IndexType j = 0; j < rows; j++ )
+      {
+         if( j > i )
+            EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
+         else
+            EXPECT_EQ( matrix.getElement( i, j ), i + j );
+      }
+#endif
+}
+
+
+template< typename Matrix >
+void test_SaveAndLoad()
+{
+    using RealType = typename Matrix::RealType;
+    using DeviceType = typename Matrix::DeviceType;
+    using IndexType = typename Matrix::IndexType;
+/*
+ * Sets up the following 4x4 dense matrix:
+ *
+ *    /  1  2  3  4 \
+ *    |  5  6  7  8 |
+ *    |  9 10 11 12 |
+ *    \ 13 14 15 16 /
+ */
+    const IndexType rows = 4;
+    const IndexType cols = 4;
+
+    Matrix savedMatrix;
+    savedMatrix.reset();
+    savedMatrix.setDimensions( rows, cols );
+
+    RealType value = 1;
+    for( IndexType i = 0; i < rows; i++ )
+        for( IndexType j = 0; j < cols; j++ )
+            savedMatrix.setElement( i, j, value++ );
+
+    ASSERT_NO_THROW( savedMatrix.save( TEST_FILE_NAME ) );
+
+    Matrix loadedMatrix;
+    loadedMatrix.reset();
+    loadedMatrix.setDimensions( rows, cols );
+
+    ASSERT_NO_THROW( loadedMatrix.load( TEST_FILE_NAME ) );
+
+    EXPECT_EQ( savedMatrix.getElement( 0, 0 ), loadedMatrix.getElement( 0, 0 ) );
+    EXPECT_EQ( savedMatrix.getElement( 0, 1 ), loadedMatrix.getElement( 0, 1 ) );
+    EXPECT_EQ( savedMatrix.getElement( 0, 2 ), loadedMatrix.getElement( 0, 2 ) );
+    EXPECT_EQ( savedMatrix.getElement( 0, 3 ), loadedMatrix.getElement( 0, 3 ) );
+
+    EXPECT_EQ( savedMatrix.getElement( 1, 0 ), loadedMatrix.getElement( 1, 0 ) );
+    EXPECT_EQ( savedMatrix.getElement( 1, 1 ), loadedMatrix.getElement( 1, 1 ) );
+    EXPECT_EQ( savedMatrix.getElement( 1, 2 ), loadedMatrix.getElement( 1, 2 ) );
+    EXPECT_EQ( savedMatrix.getElement( 1, 3 ), loadedMatrix.getElement( 1, 3 ) );
+
+    EXPECT_EQ( savedMatrix.getElement( 2, 0 ), loadedMatrix.getElement( 2, 0 ) );
+    EXPECT_EQ( savedMatrix.getElement( 2, 1 ), loadedMatrix.getElement( 2, 1 ) );
+    EXPECT_EQ( savedMatrix.getElement( 2, 2 ), loadedMatrix.getElement( 2, 2 ) );
+    EXPECT_EQ( savedMatrix.getElement( 2, 3 ), loadedMatrix.getElement( 2, 3 ) );
+
+    EXPECT_EQ( savedMatrix.getElement( 3, 0 ), loadedMatrix.getElement( 3, 0 ) );
+    EXPECT_EQ( savedMatrix.getElement( 3, 1 ), loadedMatrix.getElement( 3, 1 ) );
+    EXPECT_EQ( savedMatrix.getElement( 3, 2 ), loadedMatrix.getElement( 3, 2 ) );
+    EXPECT_EQ( savedMatrix.getElement( 3, 3 ), loadedMatrix.getElement( 3, 3 ) );
+
+    EXPECT_EQ( savedMatrix.getElement( 0, 0 ),  1 );
+    EXPECT_EQ( savedMatrix.getElement( 0, 1 ),  2 );
+    EXPECT_EQ( savedMatrix.getElement( 0, 2 ),  3 );
+    EXPECT_EQ( savedMatrix.getElement( 0, 3 ),  4 );
+
+    EXPECT_EQ( savedMatrix.getElement( 1, 0 ),  5 );
+    EXPECT_EQ( savedMatrix.getElement( 1, 1 ),  6 );
+    EXPECT_EQ( savedMatrix.getElement( 1, 2 ),  7 );
+    EXPECT_EQ( savedMatrix.getElement( 1, 3 ),  8 );
+
+    EXPECT_EQ( savedMatrix.getElement( 2, 0 ),  9 );
+    EXPECT_EQ( savedMatrix.getElement( 2, 1 ), 10 );
+    EXPECT_EQ( savedMatrix.getElement( 2, 2 ), 11 );
+    EXPECT_EQ( savedMatrix.getElement( 2, 3 ), 12 );
+
+    EXPECT_EQ( savedMatrix.getElement( 3, 0 ), 13 );
+    EXPECT_EQ( savedMatrix.getElement( 3, 1 ), 14 );
+    EXPECT_EQ( savedMatrix.getElement( 3, 2 ), 15 );
+    EXPECT_EQ( savedMatrix.getElement( 3, 3 ), 16 );
+}
+
+template< typename Matrix >
+void test_Print()
+{
+    using RealType = typename Matrix::RealType;
+    using DeviceType = typename Matrix::DeviceType;
+    using IndexType = typename Matrix::IndexType;
+/*
+ * Sets up the following 5x4 sparse matrix:
+ *
+ *    /  1  2  3  4 \
+ *    |  5  6  7  8 |
+ *    |  9 10 11 12 |
+ *    | 13 14 15 16 |
+ *    \ 17 18 19 20 /
+ */
+    const IndexType rows = 5;
+    const IndexType cols = 4;
+
+    Matrix m;
+    m.reset();
+    m.setDimensions( rows, cols );
+
+    RealType value = 1;
+    for( IndexType i = 0; i < rows; i++)
+        for( IndexType j = 0; j < cols; j++)
+            m.setElement( i, j, value++ );
+
+    #include <sstream>
+    std::stringstream printed;
+    std::stringstream couted;
+
+    //change the underlying buffer and save the old buffer
+    auto old_buf = std::cout.rdbuf(printed.rdbuf());
+
+    m.print( std::cout ); //all the std::cout goes to ss
+
+    std::cout.rdbuf(old_buf); //reset
+
+    couted << "Row: 0 ->  Col:0->1	 Col:1->2	 Col:2->3	 Col:3->4\t\n"
+              "Row: 1 ->  Col:0->5	 Col:1->6	 Col:2->7	 Col:3->8\t\n"
+              "Row: 2 ->  Col:0->9	 Col:1->10	 Col:2->11	 Col:3->12\t\n"
+              "Row: 3 ->  Col:0->13	 Col:1->14	 Col:2->15	 Col:3->16\t\n"
+              "Row: 4 ->  Col:0->17	 Col:1->18	 Col:2->19	 Col:3->20\t\n";
+
+    EXPECT_EQ( printed.str(), couted.str() );
+}
+
+// test fixture for typed tests
+template< typename Matrix >
+class MatrixTest : public ::testing::Test
+{
+protected:
+   using MatrixType = Matrix;
+};
+
+// types for which MatrixTest is instantiated
+using MatrixTypes = ::testing::Types
+<
+    TNL::Matrices::Multidiagonal< int,    TNL::Devices::Host, short >,
+    TNL::Matrices::Multidiagonal< long,   TNL::Devices::Host, short >,
+    TNL::Matrices::Multidiagonal< float,  TNL::Devices::Host, short >,
+    TNL::Matrices::Multidiagonal< double, TNL::Devices::Host, short >,
+    TNL::Matrices::Multidiagonal< int,    TNL::Devices::Host, int >,
+    TNL::Matrices::Multidiagonal< long,   TNL::Devices::Host, int >,
+    TNL::Matrices::Multidiagonal< float,  TNL::Devices::Host, int >,
+    TNL::Matrices::Multidiagonal< double, TNL::Devices::Host, int >,
+    TNL::Matrices::Multidiagonal< int,    TNL::Devices::Host, long >,
+    TNL::Matrices::Multidiagonal< long,   TNL::Devices::Host, long >,
+    TNL::Matrices::Multidiagonal< float,  TNL::Devices::Host, long >,
+    TNL::Matrices::Multidiagonal< double, TNL::Devices::Host, long >
+#ifdef HAVE_CUDA
+    ,TNL::Matrices::Multidiagonal< int,    TNL::Devices::Cuda, short >,
+    TNL::Matrices::Multidiagonal< long,   TNL::Devices::Cuda, short >,
+    TNL::Matrices::Multidiagonal< float,  TNL::Devices::Cuda, short >,
+    TNL::Matrices::Multidiagonal< double, TNL::Devices::Cuda, short >,
+    TNL::Matrices::Multidiagonal< int,    TNL::Devices::Cuda, int >,
+    TNL::Matrices::Multidiagonal< long,   TNL::Devices::Cuda, int >,
+    TNL::Matrices::Multidiagonal< float,  TNL::Devices::Cuda, int >,
+    TNL::Matrices::Multidiagonal< double, TNL::Devices::Cuda, int >,
+    TNL::Matrices::Multidiagonal< int,    TNL::Devices::Cuda, long >,
+    TNL::Matrices::Multidiagonal< long,   TNL::Devices::Cuda, long >,
+    TNL::Matrices::Multidiagonal< float,  TNL::Devices::Cuda, long >,
+    TNL::Matrices::Multidiagonal< double, TNL::Devices::Cuda, long >
+#endif
+>;
+
+TYPED_TEST_SUITE( MatrixTest, MatrixTypes );
+
+TYPED_TEST( MatrixTest, getSerializationType )
+{
+   test_GetSerializationType();
+}
+
+TYPED_TEST( MatrixTest, setDimensionsTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_SetDimensions< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, setLikeTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_SetLike< MatrixType, MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, getRowLengthTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_GetRowLength< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, getNumberOfMatrixElementsTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_GetNumberOfMatrixElements< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, getNumberOfNonzeroMatrixElementsTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_GetNumberOfNonzeroMatrixElements< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, resetTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_Reset< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, setValueTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_SetValue< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, setElementTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_SetElement< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, addElementTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_AddElement< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, setRowTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_SetRow< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, addRowTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_AddRow< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, vectorProductTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_VectorProduct< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, addMatrixTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_AddMatrix< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, assignmentOperatorTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_AssignmentOperator< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, saveAndLoadTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_SaveAndLoad< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, printTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_Print< MatrixType >();
+}
+
+//// test_getType is not general enough yet. DO NOT TEST IT YET.
+
+//TEST( MultidiagonalMatrixTest, Multidiagonal_GetTypeTest_Host )
+//{
+//    host_test_GetType< Multidiagonal_host_float, Multidiagonal_host_int >();
+//}
+//
+//#ifdef HAVE_CUDA
+//TEST( MultidiagonalMatrixTest, Multidiagonal_GetTypeTest_Cuda )
+//{
+//    cuda_test_GetType< Multidiagonal_cuda_float, Multidiagonal_cuda_int >();
+//}
+//#endif
+
+/*TEST( MultidiagonalMatrixTest, Multidiagonal_getMatrixProductTest_Host )
+{
+    bool testRan = false;
+    EXPECT_TRUE( testRan );
+    std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n";
+    std::cout << "If launched on CPU, this test will not build, but will print the following message: \n";
+    std::cout << "      /home/lukas/tnl-dev/src/TNL/Matrices/Multidiagonal_impl.h(609): error: no instance of function template \"TNL::Matrices::MultidiagonalMatrixProductKernel\" matches the argument list\n";
+    std::cout << "              argument types are: (TNL::Matrices::Multidiagonal<int, TNL::Devices::Host, int> *, Multidiagonal_host_int *, Multidiagonal_host_int *, const int, const int, int, int)\n";
+    std::cout << "          detected during:\n";
+    std::cout << "              instantiation of \"void TNL::Matrices::Multidiagonal<Real, Device, Index>::getMatrixProduct(const Matrix1 &, const Matrix2 &, const TNL::Matrices::Multidiagonal<Real, Device, Index>::RealType &, const TNL::Matrices::Multidiagonal<Real, Device, Index>::RealType &) [with Real=int, Device=TNL::Devices::Host, Index=int, Matrix1=Multidiagonal_host_int, Matrix2=Multidiagonal_host_int, tileDim=32]\"\n";
+    std::cout << "              /home/lukas/tnl-dev/src/UnitTests/Matrices/MultidiagonalMatrixTest.h(901): here\n";
+    std::cout << "                  instantiation of \"void test_GetMatrixProduct<Matrix>() [with Matrix=Multidiagonal_host_int]\"\n";
+    std::cout << "              /home/lukas/tnl-dev/src/UnitTests/Matrices/MultidiagonalMatrixTest.h(1315): here\n\n";
+}
+
+#ifdef HAVE_CUDA
+TEST( MultidiagonalMatrixTest, Multidiagonal_getMatrixProductTest_Cuda )
+{
+    bool testRan = false;
+    EXPECT_TRUE( testRan );
+    std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n";
+    std::cout << "If launched on GPU, this test will not build, but will print the following message: \n";
+    std::cout << "      /home/lukas/tnl-dev/src/TNL/Matrices/Multidiagonal_impl.h(510): error: identifier \"tnlCudaMin\" is undefined\n";
+    std::cout << "          detected during:\n";
+    std::cout << "              instantiation of \"void TNL::Matrices::MultidiagonalMatrixProductKernel<Real,Index,Matrix1,Matrix2,tileDim,tileRowBlockSize>(TNL::Matrices::Multidiagonal<Real, TNL::Devices::Cuda, Index> *, const Matrix1 *, const Matrix2 *, Real, Real, Index, Index) [with Real=int, Index=int, Matrix1=Multidiagonal_cuda_int, Matrix2=Multidiagonal_cuda_int, tileDim=32, tileRowBlockSize=8]\"\n";
+    std::cout << "              instantiation of \"void TNL::Matrices::Multidiagonal<Real, Device, Index>::getMatrixProduct(const Matrix1 &, const Matrix2 &, const TNL::Matrices::Multidiagonal<Real, Device, Index>::RealType &, const TNL::Matrices::Multidiagonal<Real, Device, Index>::RealType &) [with Real=int, Device=TNL::Devices::Cuda, Index=int, Matrix1=Multidiagonal_cuda_int, Matrix2=Multidiagonal_cuda_int, tileDim=32]\"\n";
+    std::cout << "              /home/lukas/tnl-dev/src/UnitTests/Matrices/MultidiagonalMatrixTest.h(901): here\n";
+    std::cout << "                  instantiation of \"void test_GetMatrixProduct<Matrix>() [with Matrix=Multidiagonal_cuda_int]\"\n";
+    std::cout << "              /home/lukas/tnl-dev/src/UnitTests/Matrices/MultidiagonalMatrixTest.h(1332): here\n\n";
+}
+#endif
+
+TEST( MultidiagonalMatrixTest, Multidiagonal_getTranspositionTest_Host )
+{
+//    test_GetTransposition< Multidiagonal_host_int >();
+    bool testRan = false;
+    EXPECT_TRUE( testRan );
+    std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n";
+    std::cout << "If launched on CPU, this test will not build, but will print the following message: \n";
+    std::cout << "      /home/lukas/tnl-dev/src/TNL/Matrices/Multidiagonal_impl.h(836): error: no instance of function template \"TNL::Matrices::MultidiagonalTranspositionAlignedKernel\" matches the argument list\n";
+    std::cout << "              argument types are: (TNL::Matrices::Multidiagonal<int, TNL::Devices::Host, int> *, Multidiagonal_host_int *, const int, int, int)\n";
+    std::cout << "          detected during:\n";
+    std::cout << "              instantiation of \"void TNL::Matrices::Multidiagonal<Real, Device, Index>::getTransposition(const Matrix &, const TNL::Matrices::Multidiagonal<Real, Device, Index>::RealType &) [with Real=int, Device=TNL::Devices::Host, Index=int, Matrix=Multidiagonal_host_int, tileDim=32]\"\n";
+    std::cout << "              /home/lukas/tnl-dev/src/UnitTests/Matrices/MultidiagonalMatrixTest.h(977): here\n";
+    std::cout << "                  instantiation of \"void test_GetTransposition<Matrix>() [with Matrix=Multidiagonal_host_int]\"\n";
+    std::cout << "              /home/lukas/tnl-dev/src/UnitTests/Matrices/MultidiagonalMatrixTest.h(1420): here\n\n";
+    std::cout << "AND this message: \n";
+    std::cout << "      /home/lukas/tnl-dev/src/TNL/Matrices/Multidiagonal_impl.h(852): error: no instance of function template \"TNL::Matrices::MultidiagonalTranspositionNonAlignedKernel\" matches the argument list\n";
+    std::cout << "              argument types are: (TNL::Matrices::Multidiagonal<int, TNL::Devices::Host, int> *, Multidiagonal_host_int *, const int, int, int)\n";
+    std::cout << "          detected during:\n";
+    std::cout << "              instantiation of \"void TNL::Matrices::Multidiagonal<Real, Device, Index>::getTransposition(const Matrix &, const TNL::Matrices::Multidiagonal<Real, Device, Index>::RealType &) [with Real=int, Device=TNL::Devices::Host, Index=int, Matrix=Multidiagonal_host_int, tileDim=32]\"\n";
+    std::cout << "              /home/lukas/tnl-dev/src/UnitTests/Matrices/MultidiagonalMatrixTest.h(977): here\n";
+    std::cout << "                  instantiation of \"void test_GetTransposition<Matrix>() [with Matrix=Multidiagonal_host_int]\"\n";
+    std::cout << "              /home/lukas/tnl-dev/src/UnitTests/Matrices/MultidiagonalMatrixTest.h(1420): here\n\n";
+}
+
+#ifdef HAVE_CUDA
+TEST( MultidiagonalMatrixTest, Multidiagonal_getTranspositionTest_Cuda )
+{
+//    test_GetTransposition< Multidiagonal_cuda_int >();
+    bool testRan = false;
+    EXPECT_TRUE( testRan );
+    std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n";
+    std::cout << "If launched on GPU, this test throws the following message: \n";
+    std::cout << "  Assertion 'row >= 0 && row < this->getRows() && column >= 0 && column < this->getColumns()' failed !!!\n";
+    std::cout << "      File: /home/lukas/tnl-dev/src/TNL/Matrices/Multidiagonal_impl.h \n";
+    std::cout << "      Line: 329 \n";
+    std::cout << "      Diagnostics: Not supported with CUDA.\n";
+    std::cout << "  Assertion 'row >= 0 && row < this->getRows() && column >= 0 && column < this->getColumns()' failed !!! \n";
+    std::cout << "      File: /home/lukas/tnl-dev/src/TNL/Matrices/Multidiagonal_impl.h \n";
+    std::cout << "      Line: 329 \n";
+    std::cout << "      Diagnostics: Not supported with CUDA.\n";
+    std::cout << "  Assertion 'row >= 0 && row < this->getRows() && column >= 0 && column < this->getColumns()' failed !!! \n";
+    std::cout << "      File: /home/lukas/tnl-dev/src/TNL/Matrices/Multidiagonal_impl.h \n";
+    std::cout << "      Line: 329 \n";
+    std::cout << "      Diagnostics: Not supported with CUDA.\n";
+    std::cout << "  Assertion 'row >= 0 && row < this->getRows() && column >= 0 && column < this->getColumns()' failed !!! \n";
+    std::cout << "      File: /home/lukas/tnl-dev/src/TNL/Matrices/Multidiagonal_impl.h \n";
+    std::cout << "      Line: 329 \n";
+    std::cout << "      Diagnostics: Not supported with CUDA.\n";
+    std::cout << "  terminate called after throwing an instance of 'TNL::Exceptions::CudaRuntimeError'\n";
+    std::cout << "          what():  CUDA ERROR 4 (cudaErrorLaunchFailure): unspecified launch failure.\n";
+    std::cout << "  Source: line 57 in /home/lukas/tnl-dev/src/TNL/Containers/Algorithms/ArrayOperationsCuda_impl.h: unspecified launch failure\n";
+    std::cout << "  [1]    4003 abort (core dumped)  ./MultidiagonalMatrixTest-dbg\n";
+}
+#endif
+
+TEST( MultidiagonalMatrixTest, Multidiagonal_performSORIterationTest_Host )
+{
+    test_PerformSORIteration< Multidiagonal_host_float >();
+}
+
+#ifdef HAVE_CUDA
+TEST( MultidiagonalMatrixTest, Multidiagonal_performSORIterationTest_Cuda )
+{
+//    test_PerformSORIteration< Multidiagonal_cuda_float >();
+    bool testRan = false;
+    EXPECT_TRUE( testRan );
+    std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n";
+    std::cout << "If launched, this test throws the following message: \n";
+    std::cout << "      [1]    6992 segmentation fault (core dumped)  ./SparseMatrixTest-dbg\n\n";
+    std::cout << "\n THIS IS NOT IMPLEMENTED FOR CUDA YET!!\n\n";
+}
+#endif
+ * */
+
+#endif // HAVE_GTEST
+
+#include "../main.h"
-- 
GitLab


From 000546f73ab34bc3705d7e85522c8af3a247afea Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Fri, 10 Jan 2020 22:39:19 +0100
Subject: [PATCH 081/179] Added a method ArrayView::copy for shallow copy.

---
 src/TNL/Containers/ArrayView.h   |  9 +++++++++
 src/TNL/Containers/ArrayView.hpp | 13 +++++++++++++
 2 files changed, 22 insertions(+)

diff --git a/src/TNL/Containers/ArrayView.h b/src/TNL/Containers/ArrayView.h
index c06ad56dc..b4e063b7e 100644
--- a/src/TNL/Containers/ArrayView.h
+++ b/src/TNL/Containers/ArrayView.h
@@ -237,6 +237,15 @@ public:
              typename = std::enable_if_t< std::is_convertible< T, ValueType >::value || IsArrayType< T >::value > >
    ArrayView& operator=( const T& array );
 
+   /**
+    * \brief Shallow copy of the array view
+    * 
+    * \param view Reference to the source array view.
+    * \return Reference to this array view.
+    */
+   __cuda_callable__
+   ArrayView& copy( const ArrayView& view );
+
    /**
     * \brief Swaps this array view with another.
     *
diff --git a/src/TNL/Containers/ArrayView.hpp b/src/TNL/Containers/ArrayView.hpp
index c3c39bc10..4ef8ac3f6 100644
--- a/src/TNL/Containers/ArrayView.hpp
+++ b/src/TNL/Containers/ArrayView.hpp
@@ -118,6 +118,19 @@ operator=( const T& data )
    return *this;
 }
 
+template< typename Value,
+           typename Device,
+           typename Index >
+__cuda_callable__
+ArrayView< Value, Device, Index >&
+ArrayView< Value, Device, Index >::
+copy( const ArrayView& view )
+{
+   data = view.data;
+   size = view.size;
+   return *this;
+}
+
 template< typename Value,
           typename Device,
           typename Index >
-- 
GitLab


From c799fd4ef3af47f174a45cb8a495ab6208a6fe9d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Fri, 10 Jan 2020 22:40:11 +0100
Subject: [PATCH 082/179] Fixing tridiagonal matrix.

---
 src/TNL/Matrices/Matrix.h                     |    2 +-
 src/TNL/Matrices/Matrix.hpp                   |    2 +-
 src/TNL/Matrices/MatrixView.h                 |   13 +-
 src/TNL/Matrices/MatrixView.hpp               |   14 +-
 src/TNL/Matrices/Tridiagonal.h                |    6 +-
 src/TNL/Matrices/Tridiagonal.hpp              |  181 +--
 src/TNL/Matrices/TridiagonalMatrixView.h      |   11 -
 src/TNL/Matrices/TridiagonalMatrixView.hpp    |  147 +--
 .../details/TridiagonalMatrixIndexer.h        |   20 +-
 src/UnitTests/Matrices/DenseMatrixTest.h      |    6 +-
 .../Matrices/MultidiagonalMatrixTest.h        |    2 +-
 .../Matrices/TridiagonalMatrixTest.h          | 1105 +++++++++--------
 12 files changed, 743 insertions(+), 766 deletions(-)

diff --git a/src/TNL/Matrices/Matrix.h b/src/TNL/Matrices/Matrix.h
index a9b458d7b..7813fa962 100644
--- a/src/TNL/Matrices/Matrix.h
+++ b/src/TNL/Matrices/Matrix.h
@@ -64,7 +64,7 @@ public:
    template< typename Matrix_ >
    void setLike( const Matrix_& matrix );
 
-   IndexType getNumberOfMatrixElements() const;
+   IndexType getAllocatedElementsCount() const;
 
    virtual IndexType getNumberOfNonzeroMatrixElements() const = 0;
 
diff --git a/src/TNL/Matrices/Matrix.hpp b/src/TNL/Matrices/Matrix.hpp
index 29226cb00..efd26e1fa 100644
--- a/src/TNL/Matrices/Matrix.hpp
+++ b/src/TNL/Matrices/Matrix.hpp
@@ -91,7 +91,7 @@ template< typename Real,
           typename Device,
           typename Index,
           typename RealAllocator >
-Index Matrix< Real, Device, Index, RealAllocator >::getNumberOfMatrixElements() const
+Index Matrix< Real, Device, Index, RealAllocator >::getAllocatedElementsCount() const
 {
    return this->values.getSize();
 }
diff --git a/src/TNL/Matrices/MatrixView.h b/src/TNL/Matrices/MatrixView.h
index 76965e511..b8adfd791 100644
--- a/src/TNL/Matrices/MatrixView.h
+++ b/src/TNL/Matrices/MatrixView.h
@@ -57,12 +57,10 @@ public:
 
    virtual void getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const;
 
-   IndexType getNumberOfMatrixElements() const;
+   IndexType getAllocatedElementsCount() const;
 
    virtual IndexType getNumberOfNonzeroMatrixElements() const;
 
-   void reset();
-
    __cuda_callable__
    IndexType getRows() const;
 
@@ -91,6 +89,15 @@ public:
 
    ValuesView& getValues();
 
+   /**
+    * \brief Shallow copy of the matrix view.
+    *
+    * @param view
+    * @return 
+    */
+   __cuda_callable__
+   MatrixView& operator=( const MatrixView& view );
+   
    // TODO: parallelize and optimize for sparse matrices
    template< typename Matrix >
    bool operator == ( const Matrix& matrix ) const;
diff --git a/src/TNL/Matrices/MatrixView.hpp b/src/TNL/Matrices/MatrixView.hpp
index bd3d9beae..b2739ae1d 100644
--- a/src/TNL/Matrices/MatrixView.hpp
+++ b/src/TNL/Matrices/MatrixView.hpp
@@ -64,7 +64,7 @@ void MatrixView< Real, Device, Index >::getCompressedRowLengths( CompressedRowLe
 template< typename Real,
           typename Device,
           typename Index >
-Index MatrixView< Real, Device, Index >::getNumberOfMatrixElements() const
+Index MatrixView< Real, Device, Index >::getAllocatedElementsCount() const
 {
    return this->values.getSize();
 }
@@ -118,15 +118,17 @@ getValues()
 {
    return this->values;
 }
-
 template< typename Real,
           typename Device,
           typename Index >
-void MatrixView< Real, Device, Index >::reset()
+__cuda_callable__
+MatrixView< Real, Device, Index >& 
+MatrixView< Real, Device, Index >::
+operator=( const MatrixView& view )
 {
-   this->rows = 0;
-   this->columns = 0;
-   this->values.reset();
+   rows = view.rows;
+   columns = view.columns;
+   values.copy( view.values );
 }
 
 template< typename Real,
diff --git a/src/TNL/Matrices/Tridiagonal.h b/src/TNL/Matrices/Tridiagonal.h
index 51e05c899..d28270156 100644
--- a/src/TNL/Matrices/Tridiagonal.h
+++ b/src/TNL/Matrices/Tridiagonal.h
@@ -79,12 +79,8 @@ class Tridiagonal : public Matrix< Real, Device, Index, RealAllocator >
       template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ >
       void setLike( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& m );
 
-      IndexType getNumberOfMatrixElements() const;
-
       IndexType getNumberOfNonzeroMatrixElements() const;
 
-      IndexType getMaxRowlength() const;
-
       void reset();
 
       template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ >
@@ -179,6 +175,8 @@ class Tridiagonal : public Matrix< Real, Device, Index, RealAllocator >
                                  const IndexType localIdx ) const;
 
       IndexerType indexer;
+
+      ViewType view;
 };
 
 } // namespace Matrices
diff --git a/src/TNL/Matrices/Tridiagonal.hpp b/src/TNL/Matrices/Tridiagonal.hpp
index a7178f86e..c6d359d3b 100644
--- a/src/TNL/Matrices/Tridiagonal.hpp
+++ b/src/TNL/Matrices/Tridiagonal.hpp
@@ -10,6 +10,7 @@
 
 #pragma once
 
+#include <sstream>
 #include <TNL/Assert.h>
 #include <TNL/Matrices/Tridiagonal.h>
 #include <TNL/Exceptions/NotImplementedError.h>
@@ -105,6 +106,7 @@ setDimensions( const IndexType rows, const IndexType columns )
    this->indexer.setDimensions( rows, columns );
    this->values.setSize( this->indexer.getStorageSize() );
    this->values = 0.0;
+   this->view = this->getView();
 }
 
 template< typename Real,
@@ -138,20 +140,25 @@ template< typename Real,
           typename Index,
           bool RowMajorOrder,
           typename RealAllocator >
-Index
+   template< typename Vector >
+void
 Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
-getRowLength( const IndexType row ) const
+getCompressedRowLengths( Vector& rowLengths ) const
 {
-   const IndexType diagonalLength = min( this->getRows(), this->getColumns() );
-   if( row == 0 )
-      return 2;
-   if( row > 0 && row < diagonalLength - 1 )
-      return 3;
-   if( this->getRows() > this->getColumns() )
-      return 1;
-   if( this->getRows() == this->getColumns() )
-      return 2;
-   return 3;
+   return this->view.getCompressedRowLengths( rowLengths );
+   /*rowLengths.setSize( this->getRows() );
+   rowLengths = 0;
+   auto rowLengths_view = rowLengths.getView();
+   auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, const RealType& value ) -> IndexType {
+      return ( value != 0.0 );
+   };
+   auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) {
+      aux += a;
+   };
+   auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable {
+      rowLengths_view[ rowIdx ] = value;
+   };
+   this->allRowsReduction( fetch, reduce, keep, 0 );*/
 }
 
 template< typename Real,
@@ -161,9 +168,10 @@ template< typename Real,
           typename RealAllocator >
 Index
 Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
-getMaxRowLength() const
+getRowLength( const IndexType row ) const
 {
-   return 3;
+   return this->view.getRowLength( row );
+   //return this->indexer.getRowSize( row );
 }
 
 template< typename Real,
@@ -171,12 +179,11 @@ template< typename Real,
           typename Index,
           bool RowMajorOrder,
           typename RealAllocator >
-   template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ >
-void
+Index
 Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
-setLike( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& m )
+getMaxRowLength() const
 {
-   this->setDimensions( m.getRows(), m.getColumns() );
+   return this->view.getMaxRowLength();
 }
 
 template< typename Real,
@@ -184,11 +191,12 @@ template< typename Real,
           typename Index,
           bool RowMajorOrder,
           typename RealAllocator >
-Index
+   template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ >
+void
 Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
-getNumberOfMatrixElements() const
+setLike( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& m )
 {
-   return 3 * min( this->getRows(), this->getColumns() );
+   this->setDimensions( m.getRows(), m.getColumns() );
 }
 
 template< typename Real,
@@ -200,23 +208,12 @@ Index
 Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
 getNumberOfNonzeroMatrixElements() const
 {
-   const auto values_view = this->values.getConstView();
+   return this->view.getNumberOfNonzeroMatrixElements();
+   /*const auto values_view = this->values.getConstView();
    auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType {
       return ( values_view[ i ] != 0.0 );
    };
-   return Algorithms::Reduction< DeviceType >::reduce( this->values.getSize(), std::plus<>{}, fetch, 0 );
-}
-
-template< typename Real,
-          typename Device,
-          typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
-Index
-Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
-getMaxRowlength() const
-{
-   return 3;
+   return Algorithms::Reduction< DeviceType >::reduce( this->values.getSize(), std::plus<>{}, fetch, 0 );*/
 }
 
 template< typename Real,
@@ -272,7 +269,7 @@ void
 Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
 setValue( const RealType& v )
 {
-   this->values = v;
+   this->view.setValue( v );
 }
 
 template< typename Real,
@@ -285,7 +282,8 @@ auto
 Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
 getRow( const IndexType& rowIdx ) const -> const RowView
 {
-   return RowView( this->values.getView(), this->indexer );
+   return this->view.getRow( rowIdx );
+   //return RowView( this->values.getView(), this->indexer );
 }
 
 template< typename Real,
@@ -298,7 +296,8 @@ auto
 Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
 getRow( const IndexType& rowIdx ) -> RowView
 {
-   return RowView( this->values.getView(), this->indexer );
+   return this->view.getRow( rowIdx );
+   //return RowView( this->values.getView(), this->indexer );
 }
 
 template< typename Real,
@@ -310,14 +309,19 @@ bool
 Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
 setElement( const IndexType row, const IndexType column, const RealType& value )
 {
-   TNL_ASSERT_GE( row, 0, "" );
+   return this->view.setElement( row, column, value );
+   /*TNL_ASSERT_GE( row, 0, "" );
    TNL_ASSERT_LT( row, this->getRows(), "" );
    TNL_ASSERT_GE( column, 0, "" );
    TNL_ASSERT_LT( column, this->getColumns(), "" );
    if( abs( row - column ) > 1 )
-      throw std::logic_error( "Wrong matrix element coordinates in tridiagonal matrix." );
+   {
+      std::stringstream msg;
+      msg << "Wrong matrix element coordinates ( "  << row << ", " << column << " ) in tridiagonal matrix.";
+      throw std::logic_error( msg.str() );
+   }
    this->values.setElement( this->getElementIndex( row, column ), value );
-   return true;
+   return true;*/
 }
 
 template< typename Real,
@@ -332,15 +336,20 @@ addElement( const IndexType row,
             const RealType& value,
             const RealType& thisElementMultiplicator )
 {
-   TNL_ASSERT_GE( row, 0, "" );
+   return this->view.addElement( row, column, value, thisElementMultiplicator );
+   /*TNL_ASSERT_GE( row, 0, "" );
    TNL_ASSERT_LT( row, this->getRows(), "" );
    TNL_ASSERT_GE( column, 0, "" );
    TNL_ASSERT_LT( column, this->getColumns(), "" );
    if( abs( row - column ) > 1 )
-      throw std::logic_error( "Wrong matrix element coordinates in tridiagonal matrix." );
+   {
+      std::stringstream msg;
+      msg << "Wrong matrix element coordinates ( "  << row << ", " << column << " ) in tridiagonal matrix.";
+      throw std::logic_error( msg.str() );
+   }
    const Index i = this->getElementIndex( row, column );
    this->values.setElement( i, thisElementMultiplicator * this->values.getElement( i ) + value );
-   return true;
+   return true;*/
 }
 
 template< typename Real,
@@ -352,14 +361,15 @@ Real
 Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
 getElement( const IndexType row, const IndexType column ) const
 {
-   TNL_ASSERT_GE( row, 0, "" );
+   return this->view.getElement( row, column );
+   /*TNL_ASSERT_GE( row, 0, "" );
    TNL_ASSERT_LT( row, this->getRows(), "" );
    TNL_ASSERT_GE( column, 0, "" );
    TNL_ASSERT_LT( column, this->getColumns(), "" );
 
    if( abs( column - row ) > 1 )
       return 0.0;
-   return this->values.getElement( this->getElementIndex( row, column ) );
+   return this->values.getElement( this->getElementIndex( row, column ) );*/
 }
 
 template< typename Real,
@@ -372,46 +382,40 @@ void
 Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
 rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const
 {
+   this->view.rowsReduction( first, last, fetch, reduce, keep, zero );
+   /*using Real_ = decltype( fetch( IndexType(), IndexType(), RealType() ) );
    const auto values_view = this->values.getConstView();
-   const auto indexer_ = this->indexer;
-   const auto rows = this->getRows();
-   const auto columns = this->getColumns();
-   const auto size = this->size;
+   const auto indexer = this->indexer;
+   const auto zero = zero_;
    auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
-      //bool compute;
+      Real_ sum( zero );
       if( rowIdx == 0 )
       {
-         IndexType i_0 = indexer.getGlobalIndex( 0, 0 );
-         IndexType i_1 = indexer.getGlobalIndex( 0, 1 );
-         keep( 0, reduce( fetch( 0, 0, i_0, values_view[ i_0 ] ),
-                          fetch( 0, 1, i_1, values_view[ i_1 ] ) ) );
+         reduce( sum, fetch( 0, 0, values_view[ indexer.getGlobalIndex( 0, 0 ) ] ) );
+         reduce( sum, fetch( 0, 1, values_view[ indexer.getGlobalIndex( 0, 1 ) ] ) );
+         keep( 0, sum );
          return;
       }
-      if( rowIdx < size || columns > rows )
+      if( rowIdx < indexer.getSize() || indexer.getColumns() > indexer.getRows() )
       {
-         IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 );
-         IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 );
-         IndexType i_2 = indexer.getGlobalIndex( rowIdx, 2 );
-
-         keep( rowIdx, reduce( reduce( fetch( rowIdx, rowIdx - 1, i_0, values_view[ i_0 ] ),
-                                       fetch( rowIdx, rowIdx, i_1, values_view[ i_1 ] ) ),
-                               fetch( rowIdx, rowIdx + 1, i_2, values_view[ i_2] ) ) );
+         reduce( sum, fetch( rowIdx, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ) );
+         reduce( sum, fetch( rowIdx, rowIdx,     values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] ) );
+         reduce( sum, fetch( rowIdx, rowIdx + 1, values_view[ indexer.getGlobalIndex( rowIdx, 2 ) ] ) );
+         keep( rowIdx, sum );
          return;
       }
-      if( rows == columns )
+      if( indexer.getRows() == indexer.getColumns() )
       {
-         IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 );
-         IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 );
-         keep( rowIdx, reduce( fetch( rowIdx, rowIdx - 1, i_0, values_view[ i_0 ] ),
-                               fetch( rowIdx, rowIdx, i_1, values_view[ i_1 ] ) ) );
+         reduce( sum, fetch( rowIdx, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ) );
+         reduce( sum, fetch( rowIdx, rowIdx,     values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] ) );
+         keep( rowIdx, sum );
       }
       else
       {
-         IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 );
-         keep( rowIdx, fetch( rowIdx, rowIdx, i_0, values_view[ i_0 ] ) );
+         keep( rowIdx, fetch( rowIdx, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ) );
       }
    };
-   Algorithms::ParallelFor< DeviceType >::exec( first, last, f );
+   Algorithms::ParallelFor< DeviceType >::exec( first, last, f );*/
 }
 
 template< typename Real,
@@ -424,7 +428,7 @@ void
 Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
 allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const
 {
-   this->rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero );
+   this->view.rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero );
 }
 
 template< typename Real,
@@ -437,7 +441,8 @@ void
 Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
 forRows( IndexType first, IndexType last, Function& function ) const
 {
-   const auto values_view = this->values.getConstView();
+   this->view.forRows( first, last, function );
+   /*const auto values_view = this->values.getConstView();
    const auto indexer_ = this->indexer;
    const auto rows = this->getRows();
    const auto columns = this->getColumns();
@@ -475,7 +480,7 @@ forRows( IndexType first, IndexType last, Function& function ) const
          function( rowIdx, 0, rowIdx, values_view[ i_0 ] );
       }
    };
-   Algorithms::ParallelFor< DeviceType >::exec( first, last, f );
+   Algorithms::ParallelFor< DeviceType >::exec( first, last, f );*/
 }
 
 template< typename Real,
@@ -488,7 +493,8 @@ void
 Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
 forRows( IndexType first, IndexType last, Function& function )
 {
-   const auto values_view = this->values.getConstView();
+   this->view.forRows( first, last, function );
+   /*const auto values_view = this->values.getConstView();
    const auto indexer_ = this->indexer;
    const auto rows = this->getRows();
    const auto columns = this->getColumns();
@@ -526,7 +532,7 @@ forRows( IndexType first, IndexType last, Function& function )
          function( rowIdx, 0, rowIdx, values_view[ i_0 ] );
       }
    };
-   Algorithms::ParallelFor< DeviceType >::exec( first, last, f );
+   Algorithms::ParallelFor< DeviceType >::exec( first, last, f );*/
 }
 
 template< typename Real,
@@ -539,7 +545,7 @@ void
 Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
 forAllRows( Function& function ) const
 {
-   this->forRows( 0, this->getRows(), function );
+   this->view.forRows( 0, this->getRows(), function );
 }
 
 template< typename Real,
@@ -552,7 +558,7 @@ void
 Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
 forAllRows( Function& function )
 {
-   this->forRows( 0, this->getRows(), function );
+   this->view.forRows( 0, this->getRows(), function );
 }
 
 template< typename Real,
@@ -566,11 +572,12 @@ typename Vector::RealType
 Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
 rowVectorProduct( const IndexType row, const Vector& vector ) const
 {
-   return TridiagonalDeviceDependentCode< Device >::
+   return this->view.rowVectorProduct();
+   /*return TridiagonalDeviceDependentCode< Device >::
              rowVectorProduct( this->rows,
                                this->values,
                                row,
-                               vector );
+                               vector );*/
 }
 
 template< typename Real,
@@ -584,12 +591,13 @@ void
 Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
 vectorProduct( const InVector& inVector, OutVector& outVector ) const
 {
-   TNL_ASSERT( this->getColumns() == inVector.getSize(),
+   this->view.vectorProduct( inVector, outVector );
+   /*TNL_ASSERT( this->getColumns() == inVector.getSize(),
             std::cerr << "Matrix columns: " << this->getColumns() << std::endl
                  << "Vector size: " << inVector.getSize() << std::endl );
    TNL_ASSERT( this->getRows() == outVector.getSize(),
                std::cerr << "Matrix rows: " << this->getRows() << std::endl
-                    << "Vector size: " << outVector.getSize() << std::endl );
+                    << "Vector size: " << outVector.getSize() << std::endl );*/
 
    //DeviceDependentCode::vectorProduct( *this, inVector, outVector );
 }
@@ -815,12 +823,15 @@ template< typename Real,
           typename RealAllocator >
 __cuda_callable__
 Index Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
-getElementIndex( const IndexType row, const IndexType localIdx ) const
+getElementIndex( const IndexType row, const IndexType column ) const
 {
-   TNL_ASSERT_GE( row, 0, "" );
-   TNL_ASSERT_LT( row, this->getRows(), "" );
+   IndexType localIdx = column - row;
+   if( row > 0 )
+      localIdx++;
+
    TNL_ASSERT_GE( localIdx, 0, "" );
    TNL_ASSERT_LT( localIdx, 3, "" );
+
    return this->indexer.getGlobalIndex( row, localIdx );
 }
 
diff --git a/src/TNL/Matrices/TridiagonalMatrixView.h b/src/TNL/Matrices/TridiagonalMatrixView.h
index 05f7663c9..78593acf5 100644
--- a/src/TNL/Matrices/TridiagonalMatrixView.h
+++ b/src/TNL/Matrices/TridiagonalMatrixView.h
@@ -70,12 +70,8 @@ class TridiagonalMatrixView : public MatrixView< Real, Device, Index >
 
       IndexType getMaxRowLength() const;
 
-      IndexType getNumberOfMatrixElements() const;
-
       IndexType getNumberOfNonzeroMatrixElements() const;
 
-      IndexType getMaxRowlength() const;
-
       template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ >
       bool operator == ( const TridiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix ) const;
 
@@ -144,13 +140,6 @@ class TridiagonalMatrixView : public MatrixView< Real, Device, Index >
                                 Vector2& x,
                                 const RealType& omega = 1.0 ) const;
 
-      // copy assignment
-      TridiagonalMatrixView& operator=( const TridiagonalMatrixView& matrix );
-
-      // cross-device copy assignment
-      template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ >
-      TridiagonalMatrixView& operator=( const TridiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix );
-
       void save( File& file ) const;
 
       void save( const String& fileName ) const;
diff --git a/src/TNL/Matrices/TridiagonalMatrixView.hpp b/src/TNL/Matrices/TridiagonalMatrixView.hpp
index ef893295e..83ff6035d 100644
--- a/src/TNL/Matrices/TridiagonalMatrixView.hpp
+++ b/src/TNL/Matrices/TridiagonalMatrixView.hpp
@@ -87,31 +87,36 @@ template< typename Real,
           typename Device,
           typename Index,
           bool RowMajorOrder >
-Index
+   template< typename Vector >
+void
 TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
-getRowLength( const IndexType row ) const
+getCompressedRowLengths( Vector& rowLengths ) const
 {
-   const IndexType diagonalLength = min( this->getRows(), this->getColumns() );
-   if( row == 0 )
-      return 2;
-   if( row > 0 && row < diagonalLength - 1 )
-      return 3;
-   if( this->getRows() > this->getColumns() )
-      return 1;
-   if( this->getRows() == this->getColumns() )
-      return 2;
-   return 3;
+   rowLengths.setSize( this->getRows() );
+   rowLengths = 0;
+   auto rowLengths_view = rowLengths.getView();
+   auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, const RealType& value ) -> IndexType {
+      return ( value != 0.0 );
+   };
+   auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) {
+      aux += a;
+   };
+   auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable {
+      rowLengths_view[ rowIdx ] = value;
+   };
+   this->allRowsReduction( fetch, reduce, keep, 0 );
 }
 
+
 template< typename Real,
           typename Device,
           typename Index,
           bool RowMajorOrder >
 Index
 TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
-getMaxRowLength() const
+getRowLength( const IndexType row ) const
 {
-   return 3;
+   return this->indexer.getRowSize( row );
 }
 
 template< typename Real,
@@ -120,9 +125,9 @@ template< typename Real,
           bool RowMajorOrder >
 Index
 TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
-getNumberOfMatrixElements() const
+getMaxRowLength() const
 {
-   return 3 * min( this->getRows(), this->getColumns() );
+   return 3;
 }
 
 template< typename Real,
@@ -140,17 +145,6 @@ getNumberOfNonzeroMatrixElements() const
    return Algorithms::Reduction< DeviceType >::reduce( this->values.getSize(), std::plus<>{}, fetch, 0 );
 }
 
-template< typename Real,
-          typename Device,
-          typename Index,
-          bool RowMajorOrder >
-Index
-TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
-getMaxRowlength() const
-{
-   return 3;
-}
-
 template< typename Real,
           typename Device,
           typename Index,
@@ -228,7 +222,11 @@ setElement( const IndexType row, const IndexType column, const RealType& value )
    TNL_ASSERT_GE( column, 0, "" );
    TNL_ASSERT_LT( column, this->getColumns(), "" );
    if( abs( row - column ) > 1 )
-      throw std::logic_error( "Wrong matrix element coordinates in tridiagonal matrix." );
+   {
+      std::stringstream msg;
+      msg << "Wrong matrix element coordinates ( "  << row << ", " << column << " ) in tridiagonal matrix.";
+      throw std::logic_error( msg.str() );
+   }
    this->values.setElement( this->getElementIndex( row, column ), value );
    return true;
 }
@@ -249,7 +247,11 @@ addElement( const IndexType row,
    TNL_ASSERT_GE( column, 0, "" );
    TNL_ASSERT_LT( column, this->getColumns(), "" );
    if( abs( row - column ) > 1 )
-      throw std::logic_error( "Wrong matrix element coordinates in tridiagonal matrix." );
+   {
+      std::stringstream msg;
+      msg << "Wrong matrix element coordinates ( "  << row << ", " << column << " ) in tridiagonal matrix.";
+      throw std::logic_error( msg.str() );
+   }
    const Index i = this->getElementIndex( row, column );
    this->values.setElement( i, thisElementMultiplicator * this->values.getElement( i ) + value );
    return true;
@@ -280,45 +282,38 @@ template< typename Real,
    template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
 void
 TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
-rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const
+rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero_ ) const
 {
+   using Real_ = decltype( fetch( IndexType(), IndexType(), RealType() ) );
    const auto values_view = this->values.getConstView();
-   const auto indexer_ = this->indexer;
-   const auto rows = this->getRows();
-   const auto columns = this->getColumns();
-   const auto size = this->size;
+   const auto indexer = this->indexer;
+   const auto zero = zero_;
    auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
-      //bool compute;
+      Real_ sum( zero );
       if( rowIdx == 0 )
       {
-         IndexType i_0 = indexer.getGlobalIndex( 0, 0 );
-         IndexType i_1 = indexer.getGlobalIndex( 0, 1 );
-         keep( 0, reduce( fetch( 0, 0, i_0, values_view[ i_0 ] ),
-                          fetch( 0, 1, i_1, values_view[ i_1 ] ) ) );
+         reduce( sum, fetch( 0, 0, values_view[ indexer.getGlobalIndex( 0, 0 ) ] ) );
+         reduce( sum, fetch( 0, 1, values_view[ indexer.getGlobalIndex( 0, 1 ) ] ) );
+         keep( 0, sum );
          return;
       }
-      if( rowIdx < size || columns > rows )
+      if( rowIdx < indexer.getSize() || indexer.getColumns() > indexer.getRows() )
       {
-         IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 );
-         IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 );
-         IndexType i_2 = indexer.getGlobalIndex( rowIdx, 2 );
-
-         keep( rowIdx, reduce( reduce( fetch( rowIdx, rowIdx - 1, i_0, values_view[ i_0 ] ),
-                                       fetch( rowIdx, rowIdx, i_1, values_view[ i_1 ] ) ),
-                               fetch( rowIdx, rowIdx + 1, i_2, values_view[ i_2] ) ) );
+         reduce( sum, fetch( rowIdx, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ) );
+         reduce( sum, fetch( rowIdx, rowIdx,     values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] ) );
+         reduce( sum, fetch( rowIdx, rowIdx + 1, values_view[ indexer.getGlobalIndex( rowIdx, 2 ) ] ) );
+         keep( rowIdx, sum );
          return;
       }
-      if( rows == columns )
+      if( indexer.getRows() == indexer.getColumns() )
       {
-         IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 );
-         IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 );
-         keep( rowIdx, reduce( fetch( rowIdx, rowIdx - 1, i_0, values_view[ i_0 ] ),
-                               fetch( rowIdx, rowIdx, i_1, values_view[ i_1 ] ) ) );
+         reduce( sum, fetch( rowIdx, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ) );
+         reduce( sum, fetch( rowIdx, rowIdx,     values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] ) );
+         keep( rowIdx, sum );
       }
       else
       {
-         IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 );
-         keep( rowIdx, fetch( rowIdx, rowIdx, i_0, values_view[ i_0 ] ) );
+         keep( rowIdx, fetch( rowIdx, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ) );
       }
    };
    Algorithms::ParallelFor< DeviceType >::exec( first, last, f );
@@ -613,41 +608,6 @@ performSORIteration( const Vector1& b,
 }
 
 
-// copy assignment
-template< typename Real,
-          typename Device,
-          typename Index,
-          bool RowMajorOrder >
-TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >&
-TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
-operator=( const TridiagonalMatrixView& matrix )
-{
-   this->setLike( matrix );
-   this->values = matrix.values;
-   return *this;
-}
-
-// cross-device copy assignment
-template< typename Real,
-          typename Device,
-          typename Index,
-          bool RowMajorOrder >
-   template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ >
-TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >&
-TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
-operator=( const TridiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix )
-{
-   static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Cuda >::value,
-                  "unknown device" );
-   static_assert( std::is_same< Device_, Devices::Host >::value || std::is_same< Device_, Devices::Cuda >::value,
-                  "unknown device" );
-
-   this->setLike( matrix );
-
-   throw Exceptions::NotImplementedError("Cross-device assignment for the Tridiagonal format is not implemented yet.");
-}
-
-
 template< typename Real,
           typename Device,
           typename Index,
@@ -690,12 +650,15 @@ template< typename Real,
           bool RowMajorOrder >
 __cuda_callable__
 Index TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
-getElementIndex( const IndexType row, const IndexType localIdx ) const
+getElementIndex( const IndexType row, const IndexType column ) const
 {
-   TNL_ASSERT_GE( row, 0, "" );
-   TNL_ASSERT_LT( row, this->getRows(), "" );
+   IndexType localIdx = column - row;
+   if( row > 0 )
+      localIdx++;
+
    TNL_ASSERT_GE( localIdx, 0, "" );
    TNL_ASSERT_LT( localIdx, 3, "" );
+
    return this->indexer.getGlobalIndex( row, localIdx );
 }
 
diff --git a/src/TNL/Matrices/details/TridiagonalMatrixIndexer.h b/src/TNL/Matrices/details/TridiagonalMatrixIndexer.h
index 2f245c38f..d9fdd0c23 100644
--- a/src/TNL/Matrices/details/TridiagonalMatrixIndexer.h
+++ b/src/TNL/Matrices/details/TridiagonalMatrixIndexer.h
@@ -26,21 +26,21 @@ class TridiagonalMatrixIndexer
 
       __cuda_callable__
       TridiagonalMatrixIndexer()
-      : rows( 0 ), columns( 0 ), size( 0 ){};
+      : rows( 0 ), columns( 0 ), nonEmptyRows( 0 ){};
 
       __cuda_callable__
       TridiagonalMatrixIndexer( const IndexType& rows, const IndexType& columns )
-      : rows( rows ), columns( columns ), size( TNL::min( rows, columns ) ) {};
+      : rows( rows ), columns( columns ), nonEmptyRows( TNL::min( rows, columns ) + ( rows > columns ) ) {};
 
       __cuda_callable__
       TridiagonalMatrixIndexer( const TridiagonalMatrixIndexer& indexer )
-      : rows( indexer.rows ), columns( indexer.columns ), size( indexer.size ) {};
+      : rows( indexer.rows ), columns( indexer.columns ), nonEmptyRows( indexer.nonEmptyRows ) {};
 
       void setDimensions( const IndexType& rows, const IndexType& columns )
       {
          this->rows = rows;
          this->columns = columns;
-         this->size = min( rows, columns );
+         this->nonEmptyRows = min( rows, columns ) + ( rows > columns );
       };
 
       __cuda_callable__
@@ -59,13 +59,15 @@ class TridiagonalMatrixIndexer
       };
 
       __cuda_callable__
-      IndexType getRows() const { return this->rows; };
+      const IndexType& getRows() const { return this->rows; };
 
       __cuda_callable__
-      IndexType getColumns() const { return this->rows; };
+      const IndexType& getColumns() const { return this->columns; };
 
       __cuda_callable__
-      IndexType getStorageSize() const { return 3 * this->size; };
+      const IndexType& getSize() const { return this->nonEmptyRows; };
+      __cuda_callable__
+      IndexType getStorageSize() const { return 3 * this->nonEmptyRows; };
 
       __cuda_callable__
       IndexType getGlobalIndex( const Index rowIdx, const Index localIdx ) const
@@ -78,12 +80,12 @@ class TridiagonalMatrixIndexer
          if( RowMajorOrder )
             return 3 * rowIdx + localIdx;
          else
-            return localIdx * size + rowIdx;
+            return localIdx * nonEmptyRows + rowIdx;
       };
 
       protected:
 
-         IndexType rows, columns, size;
+         IndexType rows, columns, nonEmptyRows;
 };
       } //namespace details
    } // namespace Materices
diff --git a/src/UnitTests/Matrices/DenseMatrixTest.h b/src/UnitTests/Matrices/DenseMatrixTest.h
index 183783ea3..0f7158010 100644
--- a/src/UnitTests/Matrices/DenseMatrixTest.h
+++ b/src/UnitTests/Matrices/DenseMatrixTest.h
@@ -559,9 +559,9 @@ void test_SetRow()
    /*
     * Sets up the following 3x7 dense matrix:
     *
-    *    /  1  2  3  4  5  6  7 \
-    *    |  8  9 10 11 12 13 14 |
-    *    \ 15 16 17 18 19 20 21 /
+    *    / 11 11 11 11 11  6  7 \
+    *    | 22 22 22 22 22 13 14 |
+    *    \ 15 16 33 33 33 33 33 /
     */
    const IndexType rows = 3;
    const IndexType cols = 7;
diff --git a/src/UnitTests/Matrices/MultidiagonalMatrixTest.h b/src/UnitTests/Matrices/MultidiagonalMatrixTest.h
index 01ae4a518..abe6b64c5 100644
--- a/src/UnitTests/Matrices/MultidiagonalMatrixTest.h
+++ b/src/UnitTests/Matrices/MultidiagonalMatrixTest.h
@@ -1,7 +1,7 @@
 /***************************************************************************
                           MultidiagonalMatrixTest.h -  description
                              -------------------
-    begin                : Jan 9, 2020
+    begin                : Jan 8, 2020
     copyright            : (C) 2020 by Tomas Oberhuber et al.
     email                : tomas.oberhuber@fjfi.cvut.cz
  ***************************************************************************/
diff --git a/src/UnitTests/Matrices/TridiagonalMatrixTest.h b/src/UnitTests/Matrices/TridiagonalMatrixTest.h
index 962f8c82d..dcd14302a 100644
--- a/src/UnitTests/Matrices/TridiagonalMatrixTest.h
+++ b/src/UnitTests/Matrices/TridiagonalMatrixTest.h
@@ -46,42 +46,42 @@ void test_GetSerializationType()
 template< typename Matrix >
 void test_SetDimensions()
 {
-    using RealType = typename Matrix::RealType;
-    using DeviceType = typename Matrix::DeviceType;
-    using IndexType = typename Matrix::IndexType;
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
 
-    const IndexType rows = 9;
-    const IndexType cols = 8;
+   const IndexType rows = 9;
+   const IndexType cols = 8;
 
-    Matrix m;
-    m.setDimensions( rows, cols );
+   Matrix m;
+   m.setDimensions( rows, cols );
 
-    EXPECT_EQ( m.getRows(), 9 );
-    EXPECT_EQ( m.getColumns(), 8 );
+   EXPECT_EQ( m.getRows(), 9 );
+   EXPECT_EQ( m.getColumns(), 8 );
 }
 
 template< typename Matrix1, typename Matrix2 >
 void test_SetLike()
 {
-    using RealType = typename Matrix1::RealType;
-    using DeviceType = typename Matrix1::DeviceType;
-    using IndexType = typename Matrix1::IndexType;
+   using RealType = typename Matrix1::RealType;
+   using DeviceType = typename Matrix1::DeviceType;
+   using IndexType = typename Matrix1::IndexType;
 
-    const IndexType rows = 8;
-    const IndexType cols = 7;
+   const IndexType rows = 8;
+   const IndexType cols = 7;
 
-    Matrix1 m1;
-    m1.reset();
-    m1.setDimensions( rows + 1, cols + 2 );
+   Matrix1 m1;
+   m1.reset();
+   m1.setDimensions( rows + 1, cols + 2 );
 
-    Matrix2 m2;
-    m2.reset();
-    m2.setDimensions( rows, cols );
+   Matrix2 m2;
+   m2.reset();
+   m2.setDimensions( rows, cols );
 
-    m1.setLike( m2 );
+   m1.setLike( m2 );
 
-    EXPECT_EQ( m1.getRows(), m2.getRows() );
-    EXPECT_EQ( m1.getColumns(), m2.getColumns() );
+   EXPECT_EQ( m1.getRows(), m2.getRows() );
+   EXPECT_EQ( m1.getColumns(), m2.getColumns() );
 }
 
 template< typename Matrix >
@@ -94,459 +94,464 @@ void test_GetCompressedRowLengths()
    const IndexType rows = 10;
    const IndexType cols = 11;
 
-    Matrix m( rows, cols );
+   Matrix m( rows, cols );
 
-    // Insert values into the rows.
-    RealType value = 1;
+   // Insert values into the rows.
+   RealType value = 1;
 
-    for( IndexType i = 0; i < 3; i++ )      // 0th row
-        m.setElement( 0, i, value++ );
+   for( IndexType i = 0; i < 2; i++ )  // 0th row -> 2 elements
+      m.setElement( 0, i, value++ );
 
-    for( IndexType i = 0; i < 3; i++ )      // 1st row
-        m.setElement( 1, i, value++ );
+   for( IndexType i = 0; i < 3; i++ )  // 1st row -> 3 elements
+      m.setElement( 1, i, value++ );
 
-    for( IndexType i = 0; i < 1; i++ )      // 2nd row
-        m.setElement( 2, i, value++ );
+   for( IndexType i = 1; i < 3; i++ )  // 2nd row -> 2 elements
+      m.setElement( 2, i, value++ );
 
-    for( IndexType i = 0; i < 2; i++ )      // 3rd row
-        m.setElement( 3, i, value++ );
+   for( IndexType i = 2; i < 5; i++ )  // 3rd row -> 3 elements
+      m.setElement( 3, i, value++ );
 
-    for( IndexType i = 0; i < 3; i++ )      // 4th row
-        m.setElement( 4, i, value++ );
+   for( IndexType i = 3; i < 6; i++ )  // 4th row -> 3 elements
+      m.setElement( 4, i, value++ );
 
-    for( IndexType i = 0; i < 4; i++ )      // 5th row
-        m.setElement( 5, i, value++ );
+   for( IndexType i = 4; i < 6; i++ )  // 5th row -> 2 elements
+      m.setElement( 5, i, value++ );
 
-    for( IndexType i = 0; i < 5; i++ )      // 6th row
-        m.setElement( 6, i, value++ );
+   for( IndexType i = 5; i < 8; i++ )  // 6th row -> 3 elements
+      m.setElement( 6, i, value++ );
 
-    for( IndexType i = 0; i < 6; i++ )      // 7th row
-        m.setElement( 7, i, value++ );
+   for( IndexType i = 6; i < 8; i++ )  // 7th row -> 2 elements
+      m.setElement( 7, i, value++ );
 
-    for( IndexType i = 0; i < 7; i++ )      // 8th row
-        m.setElement( 8, i, value++ );
+   for( IndexType i = 7; i < 10; i++ ) // 8th row -> 3 elements
+      m.setElement( 8, i, value++ );
 
-    for( IndexType i = 0; i < 8; i++ )      // 9th row
-        m.setElement( 9, i, value++ );
+   for( IndexType i = 8; i < 11; i++ ) // 9th row -> 3 elements
+      m.setElement( 9, i, value++ );
 
-   typename Matrix::CompressedRowLengthsVector rowLengths;
+   typename Matrix::CompressedRowLengthsVector rowLengths( rows );
    rowLengths = 0;
    m.getCompressedRowLengths( rowLengths );
-   typename Matrix::CompressedRowLengthsVector correctRowLengths{ 3, 3, 1, 2, 3, 4, 5, 6, 7, 8 };
+   typename Matrix::CompressedRowLengthsVector correctRowLengths{ 2, 3, 2, 3, 3, 2, 3, 2, 3, 3 };
    EXPECT_EQ( rowLengths, correctRowLengths );
 }
 
 template< typename Matrix >
 void test_GetRowLength()
 {
-    using RealType = typename Matrix::RealType;
-    using DeviceType = typename Matrix::DeviceType;
-    using IndexType = typename Matrix::IndexType;
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
 
-    const IndexType rows = 8;
-    const IndexType cols = 7;
+   const IndexType rows = 8;
+   const IndexType cols = 7;
 
-    Matrix m;
-    m.reset();
-    m.setDimensions( rows, cols );
+   Matrix m( rows, cols );
 
-    EXPECT_EQ( m.getRowLength( 0 ), 7 );
-    EXPECT_EQ( m.getRowLength( 1 ), 7 );
-    EXPECT_EQ( m.getRowLength( 2 ), 7 );
-    EXPECT_EQ( m.getRowLength( 3 ), 7 );
-    EXPECT_EQ( m.getRowLength( 4 ), 7 );
-    EXPECT_EQ( m.getRowLength( 5 ), 7 );
-    EXPECT_EQ( m.getRowLength( 6 ), 7 );
-    EXPECT_EQ( m.getRowLength( 7 ), 7 );
+   EXPECT_EQ( m.getRowLength( 0 ), 2 );
+   EXPECT_EQ( m.getRowLength( 1 ), 3 );
+   EXPECT_EQ( m.getRowLength( 2 ), 3 );
+   EXPECT_EQ( m.getRowLength( 3 ), 3 );
+   EXPECT_EQ( m.getRowLength( 4 ), 3 );
+   EXPECT_EQ( m.getRowLength( 5 ), 3 );
+   EXPECT_EQ( m.getRowLength( 6 ), 2 );
+   EXPECT_EQ( m.getRowLength( 7 ), 1 );
 }
 
 template< typename Matrix >
-void test_GetNumberOfMatrixElements()
+void test_GetAllocatedElementsCount()
 {
-    using RealType = typename Matrix::RealType;
-    using DeviceType = typename Matrix::DeviceType;
-    using IndexType = typename Matrix::IndexType;
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
 
-    const IndexType rows = 7;
-    const IndexType cols = 6;
+   const IndexType rows = 7;
+   const IndexType cols = 6;
 
-    Matrix m;
-    m.reset();
-    m.setDimensions( rows, cols );
+   Matrix m( rows, cols );
 
-    EXPECT_EQ( m.getNumberOfMatrixElements(), 42 );
+   EXPECT_EQ( m.getAllocatedElementsCount(), 21 );
 }
 
 template< typename Matrix >
 void test_GetNumberOfNonzeroMatrixElements()
 {
-    using RealType = typename Matrix::RealType;
-    using DeviceType = typename Matrix::DeviceType;
-    using IndexType = typename Matrix::IndexType;
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
 
-/*
- * Sets up the following 7x6 dense matrix:
- *
- *    /  0  2  3  4  5  6 \
- *    |  7  8  9 10 11 12 |
- *    | 13 14 15 16 17 18 |
- *    | 19 20 21 22 23 24 |
- *    | 25 26 27 28 29 30 |
- *    | 31 32 33 34 35 36 |
- *    \ 37 38 39 40 41  0 /
- */
-    const IndexType rows = 7;
-    const IndexType cols = 6;
+   /*
+    * Sets up the following 7x6 dense matrix:
+    *
+    *    /  0  1  0  0  0  0 \
+    *    |  2  3  4  0  0  0 |
+    *    |  0  5  6  7  0  0 |
+    *    |  0  0  8  9 10  0 |
+    *    |  0  0  0 11 12 13 |
+    *    |  0  0  0  0 14  0 |
+    *    \  0  0  0  0  0 16 /
+    */
+   const IndexType rows = 7;
+   const IndexType cols = 6;
 
-    Matrix m;
-    m.reset();
-    m.setDimensions( rows, cols );
+   Matrix m( rows, cols );
 
-    RealType value = 1;
-    for( IndexType i = 0; i < rows; i++ )
-        for( IndexType j = 0; j < cols; j++ )
-            m.setElement( i, j, value++ );
+   RealType value = 0;
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = TNL::max( 0, i - 1 ); j < TNL::min( cols, i + 2 ); j++ )
+         m.setElement( i, j, value++ );
 
-    m.setElement( 0, 0, 0); // Set the first element of the diagonal to 0.
-    m.setElement( 6, 5, 0); // Set the last element of the diagonal to 0.
+   m.setElement( 5, 5, 0);
 
-    EXPECT_EQ( m.getNumberOfNonzeroMatrixElements(), 40 );
+   EXPECT_EQ( m.getNumberOfNonzeroMatrixElements(), 15 );
 }
 
 template< typename Matrix >
 void test_Reset()
 {
-    using RealType = typename Matrix::RealType;
-    using DeviceType = typename Matrix::DeviceType;
-    using IndexType = typename Matrix::IndexType;
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
 
-/*
- * Sets up the following 5x4 dense matrix:
- *
- *    /  0  0  0  0 \
- *    |  0  0  0  0 |
- *    |  0  0  0  0 |
- *    |  0  0  0  0 |
- *    \  0  0  0  0 /
- */
-    const IndexType rows = 5;
-    const IndexType cols = 4;
+   /*
+    * Sets up the following 5x4 dense matrix:
+    *
+    *    /  0  0  0  0 \
+    *    |  0  0  0  0 |
+    *    |  0  0  0  0 |
+    *    |  0  0  0  0 |
+    *    \  0  0  0  0 /
+    */
+   const IndexType rows = 5;
+   const IndexType cols = 4;
 
-    Matrix m;
-    m.setDimensions( rows, cols );
+   Matrix m( rows, cols );
 
-    m.reset();
+   m.reset();
 
-    EXPECT_EQ( m.getRows(), 0 );
-    EXPECT_EQ( m.getColumns(), 0 );
+   EXPECT_EQ( m.getRows(), 0 );
+   EXPECT_EQ( m.getColumns(), 0 );
 }
 
 template< typename Matrix >
 void test_SetValue()
 {
-    using RealType = typename Matrix::RealType;
-    using DeviceType = typename Matrix::DeviceType;
-    using IndexType = typename Matrix::IndexType;
-/*
- * Sets up the following 7x6 dense matrix:
- *
- *    /  1  2  3  4  5  6 \
- *    |  7  8  9 10 11 12 |
- *    | 13 14 15 16 17 18 |
- *    | 19 20 21 22 23 24 |
- *    | 25 26 27 28 29 30 |
- *    | 31 32 33 34 35 36 |
- *    \ 37 38 39 40 41 42 /
- */
-    const IndexType rows = 7;
-    const IndexType cols = 6;
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
 
-    Matrix m;
-    m.reset();
-    m.setDimensions( rows, cols );
+   /*
+    * Sets up the following 7x6 dense matrix:
+    *
+    *    /  0  1  0  0  0  0 \
+    *    |  2  3  4  0  0  0 |
+    *    |  0  5  6  7  0  0 |
+    *    |  0  0  8  9 10  0 |
+    *    |  0  0  0 11 12 13 |
+    *    |  0  0  0  0 14  0 |
+    *    \  0  0  0  0  0 16 /
+    */
+   const IndexType rows = 7;
+   const IndexType cols = 6;
 
-    RealType value = 1;
-    for( IndexType i = 0; i < rows; i++ )
-        for( IndexType j = 0; j < cols; j++ )
-            m.setElement( i, j, value++ );
+   Matrix m( rows, cols );
+
+   RealType value = 0;
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = TNL::max( 0, i - 1 ); j < TNL::min( cols, i + 2 ); j++ )
+         m.setElement( i, j, value++ );
 
-    EXPECT_EQ( m.getElement( 0, 0 ),  1 );
-    EXPECT_EQ( m.getElement( 0, 1 ),  2 );
-    EXPECT_EQ( m.getElement( 0, 2 ),  3 );
-    EXPECT_EQ( m.getElement( 0, 3 ),  4 );
-    EXPECT_EQ( m.getElement( 0, 4 ),  5 );
-    EXPECT_EQ( m.getElement( 0, 5 ),  6 );
-
-    EXPECT_EQ( m.getElement( 1, 0 ),  7 );
-    EXPECT_EQ( m.getElement( 1, 1 ),  8 );
-    EXPECT_EQ( m.getElement( 1, 2 ),  9 );
-    EXPECT_EQ( m.getElement( 1, 3 ), 10 );
-    EXPECT_EQ( m.getElement( 1, 4 ), 11 );
-    EXPECT_EQ( m.getElement( 1, 5 ), 12 );
-
-    EXPECT_EQ( m.getElement( 2, 0 ), 13 );
-    EXPECT_EQ( m.getElement( 2, 1 ), 14 );
-    EXPECT_EQ( m.getElement( 2, 2 ), 15 );
-    EXPECT_EQ( m.getElement( 2, 3 ), 16 );
-    EXPECT_EQ( m.getElement( 2, 4 ), 17 );
-    EXPECT_EQ( m.getElement( 2, 5 ), 18 );
-
-    EXPECT_EQ( m.getElement( 3, 0 ), 19 );
-    EXPECT_EQ( m.getElement( 3, 1 ), 20 );
-    EXPECT_EQ( m.getElement( 3, 2 ), 21 );
-    EXPECT_EQ( m.getElement( 3, 3 ), 22 );
-    EXPECT_EQ( m.getElement( 3, 4 ), 23 );
-    EXPECT_EQ( m.getElement( 3, 5 ), 24 );
-
-    EXPECT_EQ( m.getElement( 4, 0 ), 25 );
-    EXPECT_EQ( m.getElement( 4, 1 ), 26 );
-    EXPECT_EQ( m.getElement( 4, 2 ), 27 );
-    EXPECT_EQ( m.getElement( 4, 3 ), 28 );
-    EXPECT_EQ( m.getElement( 4, 4 ), 29 );
-    EXPECT_EQ( m.getElement( 4, 5 ), 30 );
-
-    EXPECT_EQ( m.getElement( 5, 0 ), 31 );
-    EXPECT_EQ( m.getElement( 5, 1 ), 32 );
-    EXPECT_EQ( m.getElement( 5, 2 ), 33 );
-    EXPECT_EQ( m.getElement( 5, 3 ), 34 );
-    EXPECT_EQ( m.getElement( 5, 4 ), 35 );
-    EXPECT_EQ( m.getElement( 5, 5 ), 36 );
-
-    EXPECT_EQ( m.getElement( 6, 0 ), 37 );
-    EXPECT_EQ( m.getElement( 6, 1 ), 38 );
-    EXPECT_EQ( m.getElement( 6, 2 ), 39 );
-    EXPECT_EQ( m.getElement( 6, 3 ), 40 );
-    EXPECT_EQ( m.getElement( 6, 4 ), 41 );
-    EXPECT_EQ( m.getElement( 6, 5 ), 42 );
-
-    // Set the values of all elements to a certain number
-    m.setValue( 42 );
-
-    EXPECT_EQ( m.getElement( 0, 0 ), 42 );
-    EXPECT_EQ( m.getElement( 0, 1 ), 42 );
-    EXPECT_EQ( m.getElement( 0, 2 ), 42 );
-    EXPECT_EQ( m.getElement( 0, 3 ), 42 );
-    EXPECT_EQ( m.getElement( 0, 4 ), 42 );
-    EXPECT_EQ( m.getElement( 0, 5 ), 42 );
-
-    EXPECT_EQ( m.getElement( 1, 0 ), 42 );
-    EXPECT_EQ( m.getElement( 1, 1 ), 42 );
-    EXPECT_EQ( m.getElement( 1, 2 ), 42 );
-    EXPECT_EQ( m.getElement( 1, 3 ), 42 );
-    EXPECT_EQ( m.getElement( 1, 4 ), 42 );
-    EXPECT_EQ( m.getElement( 1, 5 ), 42 );
-
-    EXPECT_EQ( m.getElement( 2, 0 ), 42 );
-    EXPECT_EQ( m.getElement( 2, 1 ), 42 );
-    EXPECT_EQ( m.getElement( 2, 2 ), 42 );
-    EXPECT_EQ( m.getElement( 2, 3 ), 42 );
-    EXPECT_EQ( m.getElement( 2, 4 ), 42 );
-    EXPECT_EQ( m.getElement( 2, 5 ), 42 );
-
-    EXPECT_EQ( m.getElement( 3, 0 ), 42 );
-    EXPECT_EQ( m.getElement( 3, 1 ), 42 );
-    EXPECT_EQ( m.getElement( 3, 2 ), 42 );
-    EXPECT_EQ( m.getElement( 3, 3 ), 42 );
-    EXPECT_EQ( m.getElement( 3, 4 ), 42 );
-    EXPECT_EQ( m.getElement( 3, 5 ), 42 );
-
-    EXPECT_EQ( m.getElement( 4, 0 ), 42 );
-    EXPECT_EQ( m.getElement( 4, 1 ), 42 );
-    EXPECT_EQ( m.getElement( 4, 2 ), 42 );
-    EXPECT_EQ( m.getElement( 4, 3 ), 42 );
-    EXPECT_EQ( m.getElement( 4, 4 ), 42 );
-    EXPECT_EQ( m.getElement( 4, 5 ), 42 );
-
-    EXPECT_EQ( m.getElement( 5, 0 ), 42 );
-    EXPECT_EQ( m.getElement( 5, 1 ), 42 );
-    EXPECT_EQ( m.getElement( 5, 2 ), 42 );
-    EXPECT_EQ( m.getElement( 5, 3 ), 42 );
-    EXPECT_EQ( m.getElement( 5, 4 ), 42 );
-    EXPECT_EQ( m.getElement( 5, 5 ), 42 );
-
-    EXPECT_EQ( m.getElement( 6, 0 ), 42 );
-    EXPECT_EQ( m.getElement( 6, 1 ), 42 );
-    EXPECT_EQ( m.getElement( 6, 2 ), 42 );
-    EXPECT_EQ( m.getElement( 6, 3 ), 42 );
-    EXPECT_EQ( m.getElement( 6, 4 ), 42 );
-    EXPECT_EQ( m.getElement( 6, 5 ), 42 );
+   m.setElement( 5, 5, 0);
+
+   EXPECT_EQ( m.getElement( 0, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 1 ),  1 );
+   EXPECT_EQ( m.getElement( 0, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 1, 0 ),  2 );
+   EXPECT_EQ( m.getElement( 1, 1 ),  3 );
+   EXPECT_EQ( m.getElement( 1, 2 ),  4 );
+   EXPECT_EQ( m.getElement( 1, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 1 ),  5 );
+   EXPECT_EQ( m.getElement( 2, 2 ),  6 );
+   EXPECT_EQ( m.getElement( 2, 3 ),  7 );
+   EXPECT_EQ( m.getElement( 2, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 3, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 2 ),  8 );
+   EXPECT_EQ( m.getElement( 3, 3 ),  9 );
+   EXPECT_EQ( m.getElement( 3, 4 ), 10 );
+   EXPECT_EQ( m.getElement( 3, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 4, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 3 ), 11 );
+   EXPECT_EQ( m.getElement( 4, 4 ), 12 );
+   EXPECT_EQ( m.getElement( 4, 5 ), 13 );
+
+   EXPECT_EQ( m.getElement( 5, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 4 ), 14 );
+   EXPECT_EQ( m.getElement( 5, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 6, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 5 ), 16 );
+
+   // Set the values of all elements to a certain number
+   m.setValue( 42 );
+
+   EXPECT_EQ( m.getElement( 0, 0 ), 42 );
+   EXPECT_EQ( m.getElement( 0, 1 ), 42 );
+   EXPECT_EQ( m.getElement( 0, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 1, 0 ), 42 );
+   EXPECT_EQ( m.getElement( 1, 1 ), 42 );
+   EXPECT_EQ( m.getElement( 1, 2 ), 42 );
+   EXPECT_EQ( m.getElement( 1, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 1 ), 42 );
+   EXPECT_EQ( m.getElement( 2, 2 ), 42 );
+   EXPECT_EQ( m.getElement( 2, 3 ), 42 );
+   EXPECT_EQ( m.getElement( 2, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 3, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 2 ), 42 );
+   EXPECT_EQ( m.getElement( 3, 3 ), 42 );
+   EXPECT_EQ( m.getElement( 3, 4 ), 42 );
+   EXPECT_EQ( m.getElement( 3, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 4, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 3 ), 42 );
+   EXPECT_EQ( m.getElement( 4, 4 ), 42 );
+   EXPECT_EQ( m.getElement( 4, 5 ), 42 );
+
+   EXPECT_EQ( m.getElement( 5, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 4 ), 42 );
+   EXPECT_EQ( m.getElement( 5, 5 ), 42 );
+
+   EXPECT_EQ( m.getElement( 6, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 5 ), 42 );
 }
 
 template< typename Matrix >
 void test_SetElement()
 {
-    using RealType = typename Matrix::RealType;
-    using DeviceType = typename Matrix::DeviceType;
-    using IndexType = typename Matrix::IndexType;
-/*
- * Sets up the following 5x5 dense matrix:
- *
- *    /  1  2  3  4  5 \
- *    |  6  7  8  9 10 |
- *    | 11 12 13 14 15 |
- *    | 16 17 18 19 20 |
- *    \ 21 22 23 24 25 /
- */
-    const IndexType rows = 5;
-    const IndexType cols = 5;
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
 
-    Matrix m;
-    m.reset();
-    m.setDimensions( rows, cols );
+   /*
+    * Sets up the following 5x5 dense matrix:
+    *
+    *    /  1  2  0  0  0 \
+    *    |  6  7  8  0  0 |
+    *    |  0 12 13 14  0 |
+    *    |  0  0 18 19 20 |
+    *    \  0  0  0 24 25 /
+    */
+   const IndexType rows = 5;
+   const IndexType cols = 5;
 
-    RealType value = 1;
-    for( IndexType i = 0; i < rows; i++ )
-        for( IndexType j = 0; j < cols; j++ )
+   Matrix m( rows, cols );
+
+   RealType value = 1;
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < cols; j++ )
+      {
+         if( abs( i - j ) > 1 )
+         {
+            EXPECT_THROW( m.setElement( i, j, value++ ), std::logic_error );
+         }
+         else
             m.setElement( i, j, value++ );
+      }
+
+   EXPECT_EQ( m.getElement( 0, 0 ),  1 );
+   EXPECT_EQ( m.getElement( 0, 1 ),  2 );
+   EXPECT_EQ( m.getElement( 0, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 4 ),  0 );
+
+   EXPECT_EQ( m.getElement( 1, 0 ),  6 );
+   EXPECT_EQ( m.getElement( 1, 1 ),  7 );
+   EXPECT_EQ( m.getElement( 1, 2 ),  8 );
+   EXPECT_EQ( m.getElement( 1, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 4 ),  0 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 1 ), 12 );
+   EXPECT_EQ( m.getElement( 2, 2 ), 13 );
+   EXPECT_EQ( m.getElement( 2, 3 ), 14 );
+   EXPECT_EQ( m.getElement( 2, 4 ),  0 );
+
+   EXPECT_EQ( m.getElement( 3, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 2 ), 18 );
+   EXPECT_EQ( m.getElement( 3, 3 ), 19 );
+   EXPECT_EQ( m.getElement( 3, 4 ), 20 );
 
-    EXPECT_EQ( m.getElement( 0, 0 ),  1 );
-    EXPECT_EQ( m.getElement( 0, 1 ),  2 );
-    EXPECT_EQ( m.getElement( 0, 2 ),  3 );
-    EXPECT_EQ( m.getElement( 0, 3 ),  4 );
-    EXPECT_EQ( m.getElement( 0, 4 ),  5 );
-
-    EXPECT_EQ( m.getElement( 1, 0 ),  6 );
-    EXPECT_EQ( m.getElement( 1, 1 ),  7 );
-    EXPECT_EQ( m.getElement( 1, 2 ),  8 );
-    EXPECT_EQ( m.getElement( 1, 3 ),  9 );
-    EXPECT_EQ( m.getElement( 1, 4 ), 10 );
-
-    EXPECT_EQ( m.getElement( 2, 0 ), 11 );
-    EXPECT_EQ( m.getElement( 2, 1 ), 12 );
-    EXPECT_EQ( m.getElement( 2, 2 ), 13 );
-    EXPECT_EQ( m.getElement( 2, 3 ), 14 );
-    EXPECT_EQ( m.getElement( 2, 4 ), 15 );
-
-    EXPECT_EQ( m.getElement( 3, 0 ), 16 );
-    EXPECT_EQ( m.getElement( 3, 1 ), 17 );
-    EXPECT_EQ( m.getElement( 3, 2 ), 18 );
-    EXPECT_EQ( m.getElement( 3, 3 ), 19 );
-    EXPECT_EQ( m.getElement( 3, 4 ), 20 );
-
-    EXPECT_EQ( m.getElement( 4, 0 ), 21 );
-    EXPECT_EQ( m.getElement( 4, 1 ), 22 );
-    EXPECT_EQ( m.getElement( 4, 2 ), 23 );
-    EXPECT_EQ( m.getElement( 4, 3 ), 24 );
-    EXPECT_EQ( m.getElement( 4, 4 ), 25 );
+   EXPECT_EQ( m.getElement( 4, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 3 ), 24 );
+   EXPECT_EQ( m.getElement( 4, 4 ), 25 );
 }
 
 template< typename Matrix >
 void test_AddElement()
 {
-    using RealType = typename Matrix::RealType;
-    using DeviceType = typename Matrix::DeviceType;
-    using IndexType = typename Matrix::IndexType;
-/*
- * Sets up the following 6x5 dense matrix:
- *
- *    /  1  2  3  4  5 \
- *    |  6  7  8  9 10 |
- *    | 11 12 13 14 15 |
- *    | 16 17 18 19 20 |
- *    | 21 22 23 24 25 |
- *    \ 26 27 28 29 30 /
- */
-    const IndexType rows = 6;
-    const IndexType cols = 5;
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
 
-    Matrix m;
-    m.reset();
-    m.setDimensions( rows, cols );
+   /*
+    * Sets up the following 6x5 dense matrix:
+    *
+    *    /  1  2  0  0  0 \
+    *    |  6  7  8  0  0 |
+    *    |  0 12 13 14  0 |
+    *    |  0  0 18 19 20 |
+    *    |  0  0  0 24 25 |
+    *    \  0  0  0  0 30 /
+    */
+
+   const IndexType rows = 6;
+   const IndexType cols = 5;
+
+   Matrix m( rows, cols );
 
     RealType value = 1;
     for( IndexType i = 0; i < rows; i++ )
         for( IndexType j = 0; j < cols; j++ )
-            m.setElement( i, j, value++ );
+        {
+           if( abs( i - j ) <= 1 )
+               m.setElement( i, j, value );
+           value++;
+        }
 
-    // Check the added elements
-    EXPECT_EQ( m.getElement( 0, 0 ),  1 );
-    EXPECT_EQ( m.getElement( 0, 1 ),  2 );
-    EXPECT_EQ( m.getElement( 0, 2 ),  3 );
-    EXPECT_EQ( m.getElement( 0, 3 ),  4 );
-    EXPECT_EQ( m.getElement( 0, 4 ),  5 );
-
-    EXPECT_EQ( m.getElement( 1, 0 ),  6 );
-    EXPECT_EQ( m.getElement( 1, 1 ),  7 );
-    EXPECT_EQ( m.getElement( 1, 2 ),  8 );
-    EXPECT_EQ( m.getElement( 1, 3 ),  9 );
-    EXPECT_EQ( m.getElement( 1, 4 ), 10 );
-
-    EXPECT_EQ( m.getElement( 2, 0 ), 11 );
-    EXPECT_EQ( m.getElement( 2, 1 ), 12 );
-    EXPECT_EQ( m.getElement( 2, 2 ), 13 );
-    EXPECT_EQ( m.getElement( 2, 3 ), 14 );
-    EXPECT_EQ( m.getElement( 2, 4 ), 15 );
-
-    EXPECT_EQ( m.getElement( 3, 0 ), 16 );
-    EXPECT_EQ( m.getElement( 3, 1 ), 17 );
-    EXPECT_EQ( m.getElement( 3, 2 ), 18 );
-    EXPECT_EQ( m.getElement( 3, 3 ), 19 );
-    EXPECT_EQ( m.getElement( 3, 4 ), 20 );
-
-    EXPECT_EQ( m.getElement( 4, 0 ), 21 );
-    EXPECT_EQ( m.getElement( 4, 1 ), 22 );
-    EXPECT_EQ( m.getElement( 4, 2 ), 23 );
-    EXPECT_EQ( m.getElement( 4, 3 ), 24 );
-    EXPECT_EQ( m.getElement( 4, 4 ), 25 );
-
-    EXPECT_EQ( m.getElement( 5, 0 ), 26 );
-    EXPECT_EQ( m.getElement( 5, 1 ), 27 );
-    EXPECT_EQ( m.getElement( 5, 2 ), 28 );
-    EXPECT_EQ( m.getElement( 5, 3 ), 29 );
-    EXPECT_EQ( m.getElement( 5, 4 ), 30 );
-
-    // Add new elements to the old elements with a multiplying factor applied to the old elements.
-/*
- * The following setup results in the following 6x5 dense matrix:
- *
- *    /  3  6  9 12 15 \
- *    | 18 21 24 27 30 |
- *    | 33 36 39 42 45 |
- *    | 48 51 54 57 60 |
- *    | 63 66 69 72 75 |
- *    \ 78 81 84 87 90 /
- */
-    RealType newValue = 1;
-    RealType multiplicator = 2;
-    for( IndexType i = 0; i < rows; i++ )
-        for( IndexType j = 0; j < cols; j++ )
+   // Check the added elements
+   EXPECT_EQ( m.getElement( 0, 0 ),  1 );
+   EXPECT_EQ( m.getElement( 0, 1 ),  2 );
+   EXPECT_EQ( m.getElement( 0, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 4 ),  0 );
+
+   EXPECT_EQ( m.getElement( 1, 0 ),  6 );
+   EXPECT_EQ( m.getElement( 1, 1 ),  7 );
+   EXPECT_EQ( m.getElement( 1, 2 ),  8 );
+   EXPECT_EQ( m.getElement( 1, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 4 ),  0 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 1 ), 12 );
+   EXPECT_EQ( m.getElement( 2, 2 ), 13 );
+   EXPECT_EQ( m.getElement( 2, 3 ), 14 );
+   EXPECT_EQ( m.getElement( 2, 4 ),  0 );
+
+   EXPECT_EQ( m.getElement( 3, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 2 ), 18 );
+   EXPECT_EQ( m.getElement( 3, 3 ), 19 );
+   EXPECT_EQ( m.getElement( 3, 4 ), 20 );
+
+   EXPECT_EQ( m.getElement( 4, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 3 ), 24 );
+   EXPECT_EQ( m.getElement( 4, 4 ), 25 );
+
+   EXPECT_EQ( m.getElement( 5, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 4 ), 30 );
+
+   // Add new elements to the old elements with a multiplying factor applied to the old elements.
+   /*
+    * The following setup results in the following 6x5 dense matrix:
+    *
+    *     /  1  2  0  0  0 \    /  1  2  0  0  0 \   /  3  6  0  0  0 \
+    *     |  6  7  8  0  0 |    |  3  4  5  0  0 |   | 15 18 21  0  0 |
+    * 2 * |  0 12 13 14  0 |  + |  0  6  7  8  0 | = |  0 30 33 36  0 |
+    *     |  0  0 18 19 20 |    |  0  0  9 10 11 |   |  0  0 45 48 51 |
+    *     |  0  0  0 24 25 |    |  0  0  0 12 13 |   |  0  0  0 60 63 |
+    *     \  0  0  0  0 30 /    \  0  0  0  0 14 /   \  0  0  0  0 74 /
+    */
+
+   RealType newValue = 1;
+   RealType multiplicator = 2;
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < cols; j++ )
+         if( abs( i - j ) <= 1 )
             m.addElement( i, j, newValue++, multiplicator );
 
-    EXPECT_EQ( m.getElement( 0, 0 ),  3 );
-    EXPECT_EQ( m.getElement( 0, 1 ),  6 );
-    EXPECT_EQ( m.getElement( 0, 2 ),  9 );
-    EXPECT_EQ( m.getElement( 0, 3 ), 12 );
-    EXPECT_EQ( m.getElement( 0, 4 ), 15 );
-
-    EXPECT_EQ( m.getElement( 1, 0 ), 18 );
-    EXPECT_EQ( m.getElement( 1, 1 ), 21 );
-    EXPECT_EQ( m.getElement( 1, 2 ), 24 );
-    EXPECT_EQ( m.getElement( 1, 3 ), 27 );
-    EXPECT_EQ( m.getElement( 1, 4 ), 30 );
-
-    EXPECT_EQ( m.getElement( 2, 0 ), 33 );
-    EXPECT_EQ( m.getElement( 2, 1 ), 36 );
-    EXPECT_EQ( m.getElement( 2, 2 ), 39 );
-    EXPECT_EQ( m.getElement( 2, 3 ), 42 );
-    EXPECT_EQ( m.getElement( 2, 4 ), 45 );
-
-    EXPECT_EQ( m.getElement( 3, 0 ), 48 );
-    EXPECT_EQ( m.getElement( 3, 1 ), 51 );
-    EXPECT_EQ( m.getElement( 3, 2 ), 54 );
-    EXPECT_EQ( m.getElement( 3, 3 ), 57 );
-    EXPECT_EQ( m.getElement( 3, 4 ), 60 );
-
-    EXPECT_EQ( m.getElement( 4, 0 ), 63 );
-    EXPECT_EQ( m.getElement( 4, 1 ), 66 );
-    EXPECT_EQ( m.getElement( 4, 2 ), 69 );
-    EXPECT_EQ( m.getElement( 4, 3 ), 72 );
-    EXPECT_EQ( m.getElement( 4, 4 ), 75 );
-
-    EXPECT_EQ( m.getElement( 5, 0 ), 78 );
-    EXPECT_EQ( m.getElement( 5, 1 ), 81 );
-    EXPECT_EQ( m.getElement( 5, 2 ), 84 );
-    EXPECT_EQ( m.getElement( 5, 3 ), 87 );
-    EXPECT_EQ( m.getElement( 5, 4 ), 90 );
+   EXPECT_EQ( m.getElement( 0, 0 ),  3 );
+   EXPECT_EQ( m.getElement( 0, 1 ),  6 );
+   EXPECT_EQ( m.getElement( 0, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 4 ),  0 );
+
+   EXPECT_EQ( m.getElement( 1, 0 ), 15 );
+   EXPECT_EQ( m.getElement( 1, 1 ), 18 );
+   EXPECT_EQ( m.getElement( 1, 2 ), 21 );
+   EXPECT_EQ( m.getElement( 1, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 4 ),  0 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 1 ), 30 );
+   EXPECT_EQ( m.getElement( 2, 2 ), 33 );
+   EXPECT_EQ( m.getElement( 2, 3 ), 36 );
+   EXPECT_EQ( m.getElement( 2, 4 ),  0 );
+
+   EXPECT_EQ( m.getElement( 3, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 2 ), 45  );
+   EXPECT_EQ( m.getElement( 3, 3 ), 48 );
+   EXPECT_EQ( m.getElement( 3, 4 ), 51 );
+
+   EXPECT_EQ( m.getElement( 4, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 3 ), 60 );
+   EXPECT_EQ( m.getElement( 4, 4 ), 63 );
+
+   EXPECT_EQ( m.getElement( 5, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 4 ), 74 );
 }
 
 template< typename Matrix >
@@ -559,61 +564,54 @@ void test_SetRow()
    /*
     * Sets up the following 3x7 dense matrix:
     *
-    *    /  1  2  3  4  5  6  7 \
-    *    |  8  9 10 11 12 13 14 |
-    *    \ 15 16 17 18 19 20 21 /
+    *    /  1  2  0  0  0  0  0 \
+    *    |  8  9 10  0  0  0  0 |
+    *    \  0 16 17 18  0  0  0 /
     */
    const IndexType rows = 3;
    const IndexType cols = 7;
 
-   Matrix m;
-   m.reset();
-   m.setDimensions( rows, cols );
-
-   RealType value = 1;
-   for( IndexType i = 0; i < rows; i++ )
-      for( IndexType j = 0; j < cols; j++ )
-         m.setElement( i, j, value++ );
+   Matrix m( rows, cols );
 
    auto matrix_view = m.getView();
    auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
-      RealType values[ 3 ][ 5 ] {
-         { 11, 11, 11, 11, 11 },
-         { 22, 22, 22, 22, 22 },
-         { 33, 33, 33, 33, 33 } };
-      IndexType columnIndexes[ 3 ][ 5 ] {
-         { 0, 1, 2, 3, 4 },
-         { 0, 1, 2, 3, 4 },
-         { 2, 3, 4, 5, 6 } };
+      RealType values[ 3 ][ 3 ] {
+         {  1,  2,  0 },
+         {  8,  9, 10 },
+         { 16, 17, 18 } };
       auto row = matrix_view.getRow( rowIdx );
-      for( IndexType i = 0; i < 5; i++ )
-        row.setElement( i, values[ rowIdx ][ i ] );
+      for( IndexType i = 0; i < 3; i++ )
+      {
+         if( rowIdx == 0 && i > 1 )
+            break;
+         row.setElement( i, values[ rowIdx ][ i ] );
+      }
    };
    TNL::Algorithms::ParallelFor< DeviceType >::exec( 0, 3, f );
 
-   EXPECT_EQ( m.getElement( 0, 0 ), 11 );
-   EXPECT_EQ( m.getElement( 0, 1 ), 11 );
-   EXPECT_EQ( m.getElement( 0, 2 ), 11 );
-   EXPECT_EQ( m.getElement( 0, 3 ), 11 );
-   EXPECT_EQ( m.getElement( 0, 4 ), 11 );
-   EXPECT_EQ( m.getElement( 0, 5 ),  6 );
-   EXPECT_EQ( m.getElement( 0, 6 ),  7 );
-
-   EXPECT_EQ( m.getElement( 1, 0 ), 22 );
-   EXPECT_EQ( m.getElement( 1, 1 ), 22 );
-   EXPECT_EQ( m.getElement( 1, 2 ), 22 );
-   EXPECT_EQ( m.getElement( 1, 3 ), 22 );
-   EXPECT_EQ( m.getElement( 1, 4 ), 22 );
-   EXPECT_EQ( m.getElement( 1, 5 ), 13 );
-   EXPECT_EQ( m.getElement( 1, 6 ), 14 );
-
-   EXPECT_EQ( m.getElement( 2, 0 ), 15 );
+   EXPECT_EQ( m.getElement( 0, 0 ),  1 );
+   EXPECT_EQ( m.getElement( 0, 1 ),  2 );
+   EXPECT_EQ( m.getElement( 0, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 6 ),  0 );
+
+   EXPECT_EQ( m.getElement( 1, 0 ),  8 );
+   EXPECT_EQ( m.getElement( 1, 1 ),  9 );
+   EXPECT_EQ( m.getElement( 1, 2 ), 10 );
+   EXPECT_EQ( m.getElement( 1, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 6 ),  0 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ),  0 );
    EXPECT_EQ( m.getElement( 2, 1 ), 16 );
-   EXPECT_EQ( m.getElement( 2, 2 ), 33 );
-   EXPECT_EQ( m.getElement( 2, 3 ), 33 );
-   EXPECT_EQ( m.getElement( 2, 4 ), 33 );
-   EXPECT_EQ( m.getElement( 2, 5 ), 33 );
-   EXPECT_EQ( m.getElement( 2, 6 ), 33 );
+   EXPECT_EQ( m.getElement( 2, 2 ), 17 );
+   EXPECT_EQ( m.getElement( 2, 3 ), 18 );
+   EXPECT_EQ( m.getElement( 2, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 6 ),  0 );
 }
 
 template< typename Matrix >
@@ -625,12 +623,12 @@ void test_AddRow()
    /*
     * Sets up the following 6x5 dense matrix:
     *
-    *    /  1  2  3  4  5 \
-    *    |  6  7  8  9 10 |
-    *    | 11 12 13 14 15 |
-    *    | 16 17 18 19 20 |
-    *    | 21 22 23 24 25 |
-    *    \ 26 27 28 29 30 /
+    *    /  1  2  0  0  0 \
+    *    |  6  7  8  0  0 |
+    *    |  0 12 13 14  0 |
+    *    |  0  0 18 19 20 |
+    *    |  0  0  0 24 25 |
+    *    \  0  0  0  0 30 /
     */
 
    const IndexType rows = 6;
@@ -641,68 +639,72 @@ void test_AddRow()
    RealType value = 1;
    for( IndexType i = 0; i < rows; i++ )
       for( IndexType j = 0; j < cols; j++ )
-         m.setElement( i, j, value++ );
+      {
+         if( abs( i - j ) <= 1 )
+            m.setElement( i, j, value );
+         value++;
+      }
 
    // Check the added elements
    EXPECT_EQ( m.getElement( 0, 0 ),  1 );
    EXPECT_EQ( m.getElement( 0, 1 ),  2 );
-   EXPECT_EQ( m.getElement( 0, 2 ),  3 );
-   EXPECT_EQ( m.getElement( 0, 3 ),  4 );
-   EXPECT_EQ( m.getElement( 0, 4 ),  5 );
+   EXPECT_EQ( m.getElement( 0, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 4 ),  0 );
 
    EXPECT_EQ( m.getElement( 1, 0 ),  6 );
    EXPECT_EQ( m.getElement( 1, 1 ),  7 );
    EXPECT_EQ( m.getElement( 1, 2 ),  8 );
-   EXPECT_EQ( m.getElement( 1, 3 ),  9 );
-   EXPECT_EQ( m.getElement( 1, 4 ), 10 );
+   EXPECT_EQ( m.getElement( 1, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 4 ),  0 );
 
-   EXPECT_EQ( m.getElement( 2, 0 ), 11 );
+   EXPECT_EQ( m.getElement( 2, 0 ),  0 );
    EXPECT_EQ( m.getElement( 2, 1 ), 12 );
    EXPECT_EQ( m.getElement( 2, 2 ), 13 );
    EXPECT_EQ( m.getElement( 2, 3 ), 14 );
-   EXPECT_EQ( m.getElement( 2, 4 ), 15 );
+   EXPECT_EQ( m.getElement( 2, 4 ),  0 );
 
-   EXPECT_EQ( m.getElement( 3, 0 ), 16 );
-   EXPECT_EQ( m.getElement( 3, 1 ), 17 );
+   EXPECT_EQ( m.getElement( 3, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 1 ),  0 );
    EXPECT_EQ( m.getElement( 3, 2 ), 18 );
    EXPECT_EQ( m.getElement( 3, 3 ), 19 );
    EXPECT_EQ( m.getElement( 3, 4 ), 20 );
 
-   EXPECT_EQ( m.getElement( 4, 0 ), 21 );
-   EXPECT_EQ( m.getElement( 4, 1 ), 22 );
-   EXPECT_EQ( m.getElement( 4, 2 ), 23 );
+   EXPECT_EQ( m.getElement( 4, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 2 ),  0 );
    EXPECT_EQ( m.getElement( 4, 3 ), 24 );
    EXPECT_EQ( m.getElement( 4, 4 ), 25 );
 
-   EXPECT_EQ( m.getElement( 5, 0 ), 26 );
-   EXPECT_EQ( m.getElement( 5, 1 ), 27 );
-   EXPECT_EQ( m.getElement( 5, 2 ), 28 );
-   EXPECT_EQ( m.getElement( 5, 3 ), 29 );
+   EXPECT_EQ( m.getElement( 5, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 3 ),  0 );
    EXPECT_EQ( m.getElement( 5, 4 ), 30 );
 
    // Add new elements to the old elements with a multiplying factor applied to the old elements.
    /*
     * The following setup results in the following 6x5 sparse matrix:
     *
-    *    /  3  6  9 12 15 \
-    *    | 18 21 24 27 30 |
-    *    | 33 36 39 42 45 |
-    *    | 48 51 54 57 60 |
-    *    | 63 66 69 72 75 |
-    *    \ 78 81 84 87 90 /
+    *  / 0  0  0  0  0  0 \   /  1  2  0  0  0 \   / 11 11  0  0  0 \   / 11  11  0   0   0 \
+    *  | 0  1  0  0  0  0 |   |  6  7  8  0  0 |   | 22 22 22  0  0 |   | 28  29 30   0   0 |
+    *  | 0  0  2  0  0  0 | * |  0 12 13 14  0 | + |  0 33 33 33  0 | = |  0  57 59  61   0 |
+    *  | 0  0  0  3  0  0 |   |  0  0 18 19 20 |   |  0  0 44 44 44 |   |  0   0 98 101 104 |
+    *  | 0  0  0  0  4  0 |   |  0  0  0 24 25 |   |  0  0  0 55 55 |   |  0   0  0 151 155 |
+    *  \ 0  0  0  0  0  5 /   \  0  0  0  0 30 /   \  0  0  0  0 66 /   \  0   0  0   0 216 /
     */
 
    auto matrix_view = m.getView();
    auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
-      RealType values[ 6 ][ 5 ] {
-         { 11, 11, 11, 11, 0 },
-         { 22, 22, 22, 22, 0 },
-         { 33, 33, 33, 33, 0 },
-         { 44, 44, 44, 44, 0 },
-         { 55, 55, 55, 55, 0 },
-         { 66, 66, 66, 66, 0 } };
+      RealType values[ 6 ][ 3 ] {
+         { 11, 11,  0 },
+         { 22, 22, 22 },
+         { 33, 33, 33 },
+         { 44, 44, 44 },
+         { 55, 55, 55 },
+         { 66, 66, 66 } };
       auto row = matrix_view.getRow( rowIdx );
-      for( IndexType i = 0; i < 5; i++ )
+      for( IndexType i = 0; i < 3; i++ )
       {
          RealType& val = row.getValue( i );
          val = rowIdx * val + values[ rowIdx ][ i ];
@@ -711,90 +713,86 @@ void test_AddRow()
    TNL::Algorithms::ParallelFor< DeviceType >::exec( 0, 6, f );
 
 
-    EXPECT_EQ( m.getElement( 0, 0 ),  11 );
-    EXPECT_EQ( m.getElement( 0, 1 ),  11 );
-    EXPECT_EQ( m.getElement( 0, 2 ),  11 );
-    EXPECT_EQ( m.getElement( 0, 3 ),  11 );
-    EXPECT_EQ( m.getElement( 0, 4 ),   0 );
-
-    EXPECT_EQ( m.getElement( 1, 0 ),  28 );
-    EXPECT_EQ( m.getElement( 1, 1 ),  29 );
-    EXPECT_EQ( m.getElement( 1, 2 ),  30 );
-    EXPECT_EQ( m.getElement( 1, 3 ),  31 );
-    EXPECT_EQ( m.getElement( 1, 4 ),  10 );
-
-    EXPECT_EQ( m.getElement( 2, 0 ),  55 );
-    EXPECT_EQ( m.getElement( 2, 1 ),  57 );
-    EXPECT_EQ( m.getElement( 2, 2 ),  59 );
-    EXPECT_EQ( m.getElement( 2, 3 ),  61 );
-    EXPECT_EQ( m.getElement( 2, 4 ),  30 );
-
-    EXPECT_EQ( m.getElement( 3, 0 ),  92 );
-    EXPECT_EQ( m.getElement( 3, 1 ),  95 );
-    EXPECT_EQ( m.getElement( 3, 2 ),  98 );
-    EXPECT_EQ( m.getElement( 3, 3 ), 101 );
-    EXPECT_EQ( m.getElement( 3, 4 ),  60 );
-
-    EXPECT_EQ( m.getElement( 4, 0 ), 139 );
-    EXPECT_EQ( m.getElement( 4, 1 ), 143 );
-    EXPECT_EQ( m.getElement( 4, 2 ), 147 );
-    EXPECT_EQ( m.getElement( 4, 3 ), 151 );
-    EXPECT_EQ( m.getElement( 4, 4 ), 100 );
-
-    EXPECT_EQ( m.getElement( 5, 0 ), 196 );
-    EXPECT_EQ( m.getElement( 5, 1 ), 201 );
-    EXPECT_EQ( m.getElement( 5, 2 ), 206 );
-    EXPECT_EQ( m.getElement( 5, 3 ), 211 );
-    EXPECT_EQ( m.getElement( 5, 4 ), 150 );
+   EXPECT_EQ( m.getElement( 0, 0 ),  11 );
+   EXPECT_EQ( m.getElement( 0, 1 ),  11 );
+   EXPECT_EQ( m.getElement( 0, 2 ),   0 );
+   EXPECT_EQ( m.getElement( 0, 3 ),   0 );
+   EXPECT_EQ( m.getElement( 0, 4 ),   0 );
+
+   EXPECT_EQ( m.getElement( 1, 0 ),  28 );
+   EXPECT_EQ( m.getElement( 1, 1 ),  29 );
+   EXPECT_EQ( m.getElement( 1, 2 ),  30 );
+   EXPECT_EQ( m.getElement( 1, 3 ),   0 );
+   EXPECT_EQ( m.getElement( 1, 4 ),   0 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ),   0 );
+   EXPECT_EQ( m.getElement( 2, 1 ),  57 );
+   EXPECT_EQ( m.getElement( 2, 2 ),  59 );
+   EXPECT_EQ( m.getElement( 2, 3 ),  61 );
+   EXPECT_EQ( m.getElement( 2, 4 ),   0  );
+
+   EXPECT_EQ( m.getElement( 3, 0 ),   0 );
+   EXPECT_EQ( m.getElement( 3, 1 ),   0 );
+   EXPECT_EQ( m.getElement( 3, 2 ),  98 );
+   EXPECT_EQ( m.getElement( 3, 3 ), 101 );
+   EXPECT_EQ( m.getElement( 3, 4 ), 104 );
+
+   EXPECT_EQ( m.getElement( 4, 0 ),   0 );
+   EXPECT_EQ( m.getElement( 4, 1 ),   0 );
+   EXPECT_EQ( m.getElement( 4, 2 ),   0 );
+   EXPECT_EQ( m.getElement( 4, 3 ), 151 );
+   EXPECT_EQ( m.getElement( 4, 4 ), 155 );
+
+   EXPECT_EQ( m.getElement( 5, 0 ),   0 );
+   EXPECT_EQ( m.getElement( 5, 1 ),   0 );
+   EXPECT_EQ( m.getElement( 5, 2 ),   0 );
+   EXPECT_EQ( m.getElement( 5, 3 ),   0 );
+   EXPECT_EQ( m.getElement( 5, 4 ), 216 );
 }
 
 template< typename Matrix >
 void test_VectorProduct()
 {
-    using RealType = typename Matrix::RealType;
-    using DeviceType = typename Matrix::DeviceType;
-    using IndexType = typename Matrix::IndexType;
-/*
- * Sets up the following 5x4 dense matrix:
- *
- *    /  1  2  3  4 \
- *    |  5  6  7  8 |
- *    |  9 10 11 12 |
- *    | 13 14 15 16 |
- *    \ 17 18 19 20 /
- */
-    const IndexType rows = 5;
-    const IndexType cols = 4;
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
 
-    Matrix m;
-    m.reset();
-    m.setDimensions( rows, cols );
+   /*
+    * Sets up the following 5x4 dense matrix:
+    *
+    *    /  1  2  0  0 \
+    *    |  5  6  7  0 |
+    *    |  0 10 11 12 |
+    *    |  0  0 15 16 |
+    *    \  0  0  0 20 /
+    */
+   const IndexType rows = 5;
+   const IndexType cols = 4;
 
-    RealType value = 1;
-    for( IndexType i = 0; i < rows; i++ )
-        for( IndexType j = 0; j < cols; j++)
-            m.setElement( i, j, value++ );
+   Matrix m( rows, cols );
 
-    using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >;
+   RealType value = 1;
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < cols; j++)
+         if( abs( i - j ) <= 1 )
+            m.setElement( i, j, value++ );
 
-    VectorType inVector;
-    inVector.setSize( 4 );
-    for( IndexType i = 0; i < inVector.getSize(); i++ )
-        inVector.setElement( i, 2 );
+   using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >;
 
-    VectorType outVector;
-    outVector.setSize( 5 );
-    for( IndexType j = 0; j < outVector.getSize(); j++ )
-        outVector.setElement( j, 0 );
+   VectorType inVector( 4 );
+   inVector = 2;
 
+   VectorType outVector( 5 );
+   outVector = 0;
 
-    m.vectorProduct( inVector, outVector);
+   m.vectorProduct( inVector, outVector);
 
-    EXPECT_EQ( outVector.getElement( 0 ),  20 );
-    EXPECT_EQ( outVector.getElement( 1 ),  52 );
-    EXPECT_EQ( outVector.getElement( 2 ),  84 );
-    EXPECT_EQ( outVector.getElement( 3 ), 116 );
-    EXPECT_EQ( outVector.getElement( 4 ), 148 );
+   std::cerr << outVector << std::endl;
+   EXPECT_EQ( outVector.getElement( 0 ),  6 );
+   EXPECT_EQ( outVector.getElement( 1 ), 36 );
+   EXPECT_EQ( outVector.getElement( 2 ), 66 );
+   EXPECT_EQ( outVector.getElement( 3 ), 62 );
+   EXPECT_EQ( outVector.getElement( 4 ), 40 );
 }
 
 template< typename Matrix >
@@ -1388,6 +1386,13 @@ TYPED_TEST( MatrixTest, setLikeTest )
     test_SetLike< MatrixType, MatrixType >();
 }
 
+TYPED_TEST( MatrixTest, getCompressedRowLengthTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_GetCompressedRowLengths< MatrixType >();
+}
+
 TYPED_TEST( MatrixTest, getRowLengthTest )
 {
     using MatrixType = typename TestFixture::MatrixType;
@@ -1395,11 +1400,11 @@ TYPED_TEST( MatrixTest, getRowLengthTest )
     test_GetRowLength< MatrixType >();
 }
 
-TYPED_TEST( MatrixTest, getNumberOfMatrixElementsTest )
+TYPED_TEST( MatrixTest, getAllocatedElementsCountTest )
 {
     using MatrixType = typename TestFixture::MatrixType;
 
-    test_GetNumberOfMatrixElements< MatrixType >();
+    test_GetAllocatedElementsCount< MatrixType >();
 }
 
 TYPED_TEST( MatrixTest, getNumberOfNonzeroMatrixElementsTest )
-- 
GitLab


From 7eb350bf9e4735b0c2e4653f91963360fbe5e97a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Sat, 11 Jan 2020 13:46:45 +0100
Subject: [PATCH 083/179] Fixing Tridiagonal matrix unit tests.

---
 src/TNL/Matrices/Tridiagonal.h                |  10 +-
 src/TNL/Matrices/Tridiagonal.hpp              | 251 +++------
 src/TNL/Matrices/TridiagonalMatrixView.h      |   6 +
 src/TNL/Matrices/TridiagonalMatrixView.hpp    | 183 ++++---
 .../details/TridiagonalMatrixIndexer.h        |   2 +-
 src/UnitTests/Matrices/CMakeLists.txt         |  12 +-
 .../Matrices/TridiagonalMatrixTest.h          | 478 +++++++++---------
 7 files changed, 430 insertions(+), 512 deletions(-)

diff --git a/src/TNL/Matrices/Tridiagonal.h b/src/TNL/Matrices/Tridiagonal.h
index d28270156..e7e3ab6b2 100644
--- a/src/TNL/Matrices/Tridiagonal.h
+++ b/src/TNL/Matrices/Tridiagonal.h
@@ -50,13 +50,15 @@ class Tridiagonal : public Matrix< Real, Device, Index, RealAllocator >
                 typename _Index = Index >
       using Self = Tridiagonal< _Real, _Device, _Index >;
 
+      static constexpr bool getRowMajorOrder() { return RowMajorOrder; };
+
       Tridiagonal();
 
       Tridiagonal( const IndexType rows, const IndexType columns );
 
-      ViewType getView();
+      ViewType getView() const; // TODO: remove const
 
-      ConstViewType getConstView() const;
+      //ConstViewType getConstView() const;
 
       static String getSerializationType();
 
@@ -168,6 +170,10 @@ class Tridiagonal : public Matrix< Real, Device, Index, RealAllocator >
 
       void print( std::ostream& str ) const;
 
+      const IndexerType& getIndexer() const;
+
+      IndexerType& getIndexer();
+
    protected:
 
       __cuda_callable__
diff --git a/src/TNL/Matrices/Tridiagonal.hpp b/src/TNL/Matrices/Tridiagonal.hpp
index c6d359d3b..6c09238ff 100644
--- a/src/TNL/Matrices/Tridiagonal.hpp
+++ b/src/TNL/Matrices/Tridiagonal.hpp
@@ -49,12 +49,13 @@ template< typename Real,
           typename RealAllocator >
 auto
 Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
-getView() -> ViewType
+getView() const -> ViewType
 {
-   return ViewType( this->values.getView(), indexer );
+   // TODO: fix when getConstView works
+   return ViewType( const_cast< Tridiagonal* >( this )->values.getView(), indexer );
 }
 
-template< typename Real,
+/*template< typename Real,
           typename Device,
           typename Index,
           bool RowMajorOrder,
@@ -64,7 +65,7 @@ Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
 getConstView() const -> ConstViewType
 {
    return ConstViewType( this->values.getConstView(), indexer );
-}
+}*/
 
 template< typename Real,
           typename Device,
@@ -146,19 +147,6 @@ Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
 getCompressedRowLengths( Vector& rowLengths ) const
 {
    return this->view.getCompressedRowLengths( rowLengths );
-   /*rowLengths.setSize( this->getRows() );
-   rowLengths = 0;
-   auto rowLengths_view = rowLengths.getView();
-   auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, const RealType& value ) -> IndexType {
-      return ( value != 0.0 );
-   };
-   auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) {
-      aux += a;
-   };
-   auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable {
-      rowLengths_view[ rowIdx ] = value;
-   };
-   this->allRowsReduction( fetch, reduce, keep, 0 );*/
 }
 
 template< typename Real,
@@ -171,7 +159,6 @@ Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
 getRowLength( const IndexType row ) const
 {
    return this->view.getRowLength( row );
-   //return this->indexer.getRowSize( row );
 }
 
 template< typename Real,
@@ -209,11 +196,6 @@ Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
 getNumberOfNonzeroMatrixElements() const
 {
    return this->view.getNumberOfNonzeroMatrixElements();
-   /*const auto values_view = this->values.getConstView();
-   auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType {
-      return ( values_view[ i ] != 0.0 );
-   };
-   return Algorithms::Reduction< DeviceType >::reduce( this->values.getSize(), std::plus<>{}, fetch, 0 );*/
 }
 
 template< typename Real,
@@ -283,7 +265,6 @@ Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
 getRow( const IndexType& rowIdx ) const -> const RowView
 {
    return this->view.getRow( rowIdx );
-   //return RowView( this->values.getView(), this->indexer );
 }
 
 template< typename Real,
@@ -297,7 +278,6 @@ Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
 getRow( const IndexType& rowIdx ) -> RowView
 {
    return this->view.getRow( rowIdx );
-   //return RowView( this->values.getView(), this->indexer );
 }
 
 template< typename Real,
@@ -310,18 +290,6 @@ Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
 setElement( const IndexType row, const IndexType column, const RealType& value )
 {
    return this->view.setElement( row, column, value );
-   /*TNL_ASSERT_GE( row, 0, "" );
-   TNL_ASSERT_LT( row, this->getRows(), "" );
-   TNL_ASSERT_GE( column, 0, "" );
-   TNL_ASSERT_LT( column, this->getColumns(), "" );
-   if( abs( row - column ) > 1 )
-   {
-      std::stringstream msg;
-      msg << "Wrong matrix element coordinates ( "  << row << ", " << column << " ) in tridiagonal matrix.";
-      throw std::logic_error( msg.str() );
-   }
-   this->values.setElement( this->getElementIndex( row, column ), value );
-   return true;*/
 }
 
 template< typename Real,
@@ -337,19 +305,6 @@ addElement( const IndexType row,
             const RealType& thisElementMultiplicator )
 {
    return this->view.addElement( row, column, value, thisElementMultiplicator );
-   /*TNL_ASSERT_GE( row, 0, "" );
-   TNL_ASSERT_LT( row, this->getRows(), "" );
-   TNL_ASSERT_GE( column, 0, "" );
-   TNL_ASSERT_LT( column, this->getColumns(), "" );
-   if( abs( row - column ) > 1 )
-   {
-      std::stringstream msg;
-      msg << "Wrong matrix element coordinates ( "  << row << ", " << column << " ) in tridiagonal matrix.";
-      throw std::logic_error( msg.str() );
-   }
-   const Index i = this->getElementIndex( row, column );
-   this->values.setElement( i, thisElementMultiplicator * this->values.getElement( i ) + value );
-   return true;*/
 }
 
 template< typename Real,
@@ -362,14 +317,6 @@ Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
 getElement( const IndexType row, const IndexType column ) const
 {
    return this->view.getElement( row, column );
-   /*TNL_ASSERT_GE( row, 0, "" );
-   TNL_ASSERT_LT( row, this->getRows(), "" );
-   TNL_ASSERT_GE( column, 0, "" );
-   TNL_ASSERT_LT( column, this->getColumns(), "" );
-
-   if( abs( column - row ) > 1 )
-      return 0.0;
-   return this->values.getElement( this->getElementIndex( row, column ) );*/
 }
 
 template< typename Real,
@@ -383,39 +330,6 @@ Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
 rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const
 {
    this->view.rowsReduction( first, last, fetch, reduce, keep, zero );
-   /*using Real_ = decltype( fetch( IndexType(), IndexType(), RealType() ) );
-   const auto values_view = this->values.getConstView();
-   const auto indexer = this->indexer;
-   const auto zero = zero_;
-   auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
-      Real_ sum( zero );
-      if( rowIdx == 0 )
-      {
-         reduce( sum, fetch( 0, 0, values_view[ indexer.getGlobalIndex( 0, 0 ) ] ) );
-         reduce( sum, fetch( 0, 1, values_view[ indexer.getGlobalIndex( 0, 1 ) ] ) );
-         keep( 0, sum );
-         return;
-      }
-      if( rowIdx < indexer.getSize() || indexer.getColumns() > indexer.getRows() )
-      {
-         reduce( sum, fetch( rowIdx, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ) );
-         reduce( sum, fetch( rowIdx, rowIdx,     values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] ) );
-         reduce( sum, fetch( rowIdx, rowIdx + 1, values_view[ indexer.getGlobalIndex( rowIdx, 2 ) ] ) );
-         keep( rowIdx, sum );
-         return;
-      }
-      if( indexer.getRows() == indexer.getColumns() )
-      {
-         reduce( sum, fetch( rowIdx, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ) );
-         reduce( sum, fetch( rowIdx, rowIdx,     values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] ) );
-         keep( rowIdx, sum );
-      }
-      else
-      {
-         keep( rowIdx, fetch( rowIdx, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ) );
-      }
-   };
-   Algorithms::ParallelFor< DeviceType >::exec( first, last, f );*/
 }
 
 template< typename Real,
@@ -442,45 +356,6 @@ Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
 forRows( IndexType first, IndexType last, Function& function ) const
 {
    this->view.forRows( first, last, function );
-   /*const auto values_view = this->values.getConstView();
-   const auto indexer_ = this->indexer;
-   const auto rows = this->getRows();
-   const auto columns = this->getColumns();
-   const auto size = this->size;
-   auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
-      //bool compute;
-      if( rowIdx == 0 )
-      {
-         IndexType i_0 = indexer.getGlobalIndex( 0, 0 );
-         IndexType i_1 = indexer.getGlobalIndex( 0, 1 );
-         function( 0, 1, rowIdx,     values_view[ i_0 ] );
-         function( 0, 2, rowIdx + 1, values_view[ i_1 ] );
-         return;
-      }
-      if( rowIdx < size || columns > rows )
-      {
-         IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 );
-         IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 );
-         IndexType i_2 = indexer.getGlobalIndex( rowIdx, 2 );
-         function( rowIdx, 0, rowIdx - 1, values_view[ i_0 ] );
-         function( rowIdx, 1, rowIdx,     values_view[ i_1 ] );
-         function( rowIdx, 2, rowIdx + 1, values_view[ i_2 ] );
-         return;
-      }
-      if( rows == columns )
-      {
-         IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 );
-         IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 );
-         function( rowIdx, 0, rowIdx - 1, values_view[ i_0 ] );
-         function( rowIdx, 1, rowIdx,     values_view[ i_1 ] );
-      }
-      else
-      {
-         IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 );
-         function( rowIdx, 0, rowIdx, values_view[ i_0 ] );
-      }
-   };
-   Algorithms::ParallelFor< DeviceType >::exec( first, last, f );*/
 }
 
 template< typename Real,
@@ -494,45 +369,6 @@ Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
 forRows( IndexType first, IndexType last, Function& function )
 {
    this->view.forRows( first, last, function );
-   /*const auto values_view = this->values.getConstView();
-   const auto indexer_ = this->indexer;
-   const auto rows = this->getRows();
-   const auto columns = this->getColumns();
-   const auto size = this->size;
-   auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
-      //bool compute;
-      if( rowIdx == 0 )
-      {
-         IndexType i_0 = indexer.getGlobalIndex( 0, 0 );
-         IndexType i_1 = indexer.getGlobalIndex( 0, 1 );
-         function( 0, 1, rowIdx,     values_view[ i_0 ] );
-         function( 0, 2, rowIdx + 1, values_view[ i_1 ] );
-         return;
-      }
-      if( rowIdx < size || columns > rows )
-      {
-         IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 );
-         IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 );
-         IndexType i_2 = indexer.getGlobalIndex( rowIdx, 2 );
-         function( rowIdx, 0, rowIdx - 1, values_view[ i_0 ] );
-         function( rowIdx, 1, rowIdx,     values_view[ i_1 ] );
-         function( rowIdx, 2, rowIdx + 1, values_view[ i_2 ] );
-         return;
-      }
-      if( rows == columns )
-      {
-         IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 );
-         IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 );
-         function( rowIdx, 0, rowIdx - 1, values_view[ i_0 ] );
-         function( rowIdx, 1, rowIdx,     values_view[ i_1 ] );
-      }
-      else
-      {
-         IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 );
-         function( rowIdx, 0, rowIdx, values_view[ i_0 ] );
-      }
-   };
-   Algorithms::ParallelFor< DeviceType >::exec( first, last, f );*/
 }
 
 template< typename Real,
@@ -573,11 +409,6 @@ Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
 rowVectorProduct( const IndexType row, const Vector& vector ) const
 {
    return this->view.rowVectorProduct();
-   /*return TridiagonalDeviceDependentCode< Device >::
-             rowVectorProduct( this->rows,
-                               this->values,
-                               row,
-                               vector );*/
 }
 
 template< typename Real,
@@ -592,14 +423,6 @@ Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
 vectorProduct( const InVector& inVector, OutVector& outVector ) const
 {
    this->view.vectorProduct( inVector, outVector );
-   /*TNL_ASSERT( this->getColumns() == inVector.getSize(),
-            std::cerr << "Matrix columns: " << this->getColumns() << std::endl
-                 << "Vector size: " << inVector.getSize() << std::endl );
-   TNL_ASSERT( this->getRows() == outVector.getSize(),
-               std::cerr << "Matrix rows: " << this->getRows() << std::endl
-                    << "Vector size: " << outVector.getSize() << std::endl );*/
-
-   //DeviceDependentCode::vectorProduct( *this, inVector, outVector );
 }
 
 template< typename Real,
@@ -614,14 +437,7 @@ addMatrix( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAlloca
            const RealType& matrixMultiplicator,
            const RealType& thisMatrixMultiplicator )
 {
-   TNL_ASSERT( this->getRows() == matrix.getRows(),
-            std::cerr << "This matrix columns: " << this->getColumns() << std::endl
-                 << "This matrix rows: " << this->getRows() << std::endl );
-
-   if( thisMatrixMultiplicator == 1.0 )
-      this->values += matrixMultiplicator * matrix.values;
-   else
-      this->values = thisMatrixMultiplicator * this->values + matrixMultiplicator * matrix.values;
+   this->view.addMatrix( matrix.getView(), matrixMultiplicator, thisMatrixMultiplicator );
 }
 
 #ifdef HAVE_CUDA
@@ -753,11 +569,31 @@ operator=( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAlloca
                   "unknown device" );
 
    this->setLike( matrix );
-
-   throw Exceptions::NotImplementedError("Cross-device assignment for the Tridiagonal format is not implemented yet.");
+   if( RowMajorOrder == RowMajorOrder_ )
+      this->values = matrix.getValues();
+   else
+   {
+      if( std::is_same< Device, Device_ >::value )
+      {
+         const auto matrix_view = matrix.getView();
+         auto f = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value ) mutable {
+            value = matrix_view.getValues()[ matrix_view.getIndexer().getGlobalIndex( rowIdx, localIdx ) ];
+         };
+         this->forAllRows( f );
+      }
+      else
+      {
+         Tridiagonal< Real, Device, Index, RowMajorOrder_ > auxMatrix;
+         auxMatrix = matrix;
+         const auto matrix_view = auxMatrix.getView();
+         auto f = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value ) mutable {
+            value = matrix_view.getValues()[ matrix_view.getIndexer().getGlobalIndex( rowIdx, localIdx ) ];
+         };
+         this->forAllRows( f );
+      }
+   }
 }
 
-
 template< typename Real,
           typename Device,
           typename Index,
@@ -777,6 +613,7 @@ void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::load( Fil
 {
    Matrix< Real, Device, Index >::load( file );
    this->indexer.setDimensions( this->getRows(), this->getColumns() );
+   this->view = this->getView();
 }
 
 template< typename Real,
@@ -804,7 +641,9 @@ template< typename Real,
           typename Index,
           bool RowMajorOrder,
           typename RealAllocator >
-void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::print( std::ostream& str ) const
+void
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+print( std::ostream& str ) const
 {
    for( IndexType row = 0; row < this->getRows(); row++ )
    {
@@ -816,6 +655,30 @@ void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::print( st
    }
 }
 
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+auto
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+getIndexer() const -> const IndexerType&
+{
+   return this->indexer;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+auto
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+getIndexer() -> IndexerType&
+{
+   return this->indexer;
+}
+
 template< typename Real,
           typename Device,
           typename Index,
diff --git a/src/TNL/Matrices/TridiagonalMatrixView.h b/src/TNL/Matrices/TridiagonalMatrixView.h
index 78593acf5..290062793 100644
--- a/src/TNL/Matrices/TridiagonalMatrixView.h
+++ b/src/TNL/Matrices/TridiagonalMatrixView.h
@@ -146,6 +146,12 @@ class TridiagonalMatrixView : public MatrixView< Real, Device, Index >
 
       void print( std::ostream& str ) const;
 
+      __cuda_callable__
+      const IndexerType& getIndexer() const;
+
+      __cuda_callable__
+      IndexerType& getIndexer();
+
    protected:
 
       __cuda_callable__
diff --git a/src/TNL/Matrices/TridiagonalMatrixView.hpp b/src/TNL/Matrices/TridiagonalMatrixView.hpp
index 83ff6035d..4d4950c4e 100644
--- a/src/TNL/Matrices/TridiagonalMatrixView.hpp
+++ b/src/TNL/Matrices/TridiagonalMatrixView.hpp
@@ -297,7 +297,7 @@ rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Ke
          keep( 0, sum );
          return;
       }
-      if( rowIdx < indexer.getSize() || indexer.getColumns() > indexer.getRows() )
+      if( rowIdx + 1 < indexer.getColumns() )
       {
          reduce( sum, fetch( rowIdx, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ) );
          reduce( sum, fetch( rowIdx, rowIdx,     values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] ) );
@@ -305,7 +305,7 @@ rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Ke
          keep( rowIdx, sum );
          return;
       }
-      if( indexer.getRows() == indexer.getColumns() )
+      if( rowIdx < indexer.getColumns() )
       {
          reduce( sum, fetch( rowIdx, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ) );
          reduce( sum, fetch( rowIdx, rowIdx,     values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] ) );
@@ -313,7 +313,7 @@ rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Ke
       }
       else
       {
-         keep( rowIdx, fetch( rowIdx, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ) );
+         keep( rowIdx, fetch( rowIdx, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ) );
       }
    };
    Algorithms::ParallelFor< DeviceType >::exec( first, last, f );
@@ -328,7 +328,7 @@ void
 TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
 allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const
 {
-   this->rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero );
+   this->rowsReduction( 0, this->indexer.getNonEmptyRowsCount(), fetch, reduce, keep, zero );
 }
 
 template< typename Real,
@@ -341,42 +341,26 @@ TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
 forRows( IndexType first, IndexType last, Function& function ) const
 {
    const auto values_view = this->values.getConstView();
-   const auto indexer_ = this->indexer;
-   const auto rows = this->getRows();
-   const auto columns = this->getColumns();
-   const auto size = this->size;
+   const auto indexer = this->indexer;
    auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
-      //bool compute;
       if( rowIdx == 0 )
       {
-         IndexType i_0 = indexer.getGlobalIndex( 0, 0 );
-         IndexType i_1 = indexer.getGlobalIndex( 0, 1 );
-         function( 0, 1, rowIdx,     values_view[ i_0 ] );
-         function( 0, 2, rowIdx + 1, values_view[ i_1 ] );
-         return;
-      }
-      if( rowIdx < size || columns > rows )
+         function( 0, 0, 0, values_view[ indexer.getGlobalIndex( 0, 0 ) ] );
+         function( 0, 1, 1, values_view[ indexer.getGlobalIndex( 0, 1 ) ] );
+      } 
+      else if( rowIdx + 1 < indexer.getColumns() )
       {
-         IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 );
-         IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 );
-         IndexType i_2 = indexer.getGlobalIndex( rowIdx, 2 );
-         function( rowIdx, 0, rowIdx - 1, values_view[ i_0 ] );
-         function( rowIdx, 1, rowIdx,     values_view[ i_1 ] );
-         function( rowIdx, 2, rowIdx + 1, values_view[ i_2 ] );
-         return;
+         function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] );
+         function( rowIdx, 1, rowIdx,     values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] );
+         function( rowIdx, 2, rowIdx + 1, values_view[ indexer.getGlobalIndex( rowIdx, 2 ) ] );
       }
-      if( rows == columns )
+      else if( rowIdx < indexer.getColumns() )
       {
-         IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 );
-         IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 );
-         function( rowIdx, 0, rowIdx - 1, values_view[ i_0 ] );
-         function( rowIdx, 1, rowIdx,     values_view[ i_1 ] );
+         function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] );
+         function( rowIdx, 1, rowIdx,     values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] );
       }
       else
-      {
-         IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 );
-         function( rowIdx, 0, rowIdx, values_view[ i_0 ] );
-      }
+         function( rowIdx, 0, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] );
    };
    Algorithms::ParallelFor< DeviceType >::exec( first, last, f );
 }
@@ -390,43 +374,27 @@ void
 TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
 forRows( IndexType first, IndexType last, Function& function )
 {
-   const auto values_view = this->values.getConstView();
-   const auto indexer_ = this->indexer;
-   const auto rows = this->getRows();
-   const auto columns = this->getColumns();
-   const auto size = this->size;
+   auto values_view = this->values.getView();
+   const auto indexer = this->indexer;
    auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
-      //bool compute;
       if( rowIdx == 0 )
       {
-         IndexType i_0 = indexer.getGlobalIndex( 0, 0 );
-         IndexType i_1 = indexer.getGlobalIndex( 0, 1 );
-         function( 0, 1, rowIdx,     values_view[ i_0 ] );
-         function( 0, 2, rowIdx + 1, values_view[ i_1 ] );
-         return;
-      }
-      if( rowIdx < size || columns > rows )
+         function( 0, 0, 0, values_view[ indexer.getGlobalIndex( 0, 0 ) ] );
+         function( 0, 1, 1, values_view[ indexer.getGlobalIndex( 0, 1 ) ] );
+      } 
+      else if( rowIdx + 1 < indexer.getColumns() )
       {
-         IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 );
-         IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 );
-         IndexType i_2 = indexer.getGlobalIndex( rowIdx, 2 );
-         function( rowIdx, 0, rowIdx - 1, values_view[ i_0 ] );
-         function( rowIdx, 1, rowIdx,     values_view[ i_1 ] );
-         function( rowIdx, 2, rowIdx + 1, values_view[ i_2 ] );
-         return;
+         function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] );
+         function( rowIdx, 1, rowIdx,     values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] );
+         function( rowIdx, 2, rowIdx + 1, values_view[ indexer.getGlobalIndex( rowIdx, 2 ) ] );
       }
-      if( rows == columns )
+      else if( rowIdx < indexer.getColumns() )
       {
-         IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 );
-         IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 );
-         function( rowIdx, 0, rowIdx - 1, values_view[ i_0 ] );
-         function( rowIdx, 1, rowIdx,     values_view[ i_1 ] );
+         function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] );
+         function( rowIdx, 1, rowIdx,     values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] );
       }
       else
-      {
-         IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 );
-         function( rowIdx, 0, rowIdx, values_view[ i_0 ] );
-      }
+         function( rowIdx, 0, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] );
    };
    Algorithms::ParallelFor< DeviceType >::exec( first, last, f );
 }
@@ -440,7 +408,7 @@ void
 TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
 forAllRows( Function& function ) const
 {
-   this->forRows( 0, this->getRows(), function );
+   this->forRows( 0, this->indxer.getNonEmptyRowsCount(), function );
 }
 
 template< typename Real,
@@ -452,7 +420,7 @@ void
 TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
 forAllRows( Function& function )
 {
-   this->forRows( 0, this->getRows(), function );
+   this->forRows( 0, this->indexer.getNonEmptyRowsCount(), function );
 }
 
 template< typename Real,
@@ -477,14 +445,22 @@ void
 TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
 vectorProduct( const InVector& inVector, OutVector& outVector ) const
 {
-   TNL_ASSERT( this->getColumns() == inVector.getSize(),
-            std::cerr << "Matrix columns: " << this->getColumns() << std::endl
-                 << "Vector size: " << inVector.getSize() << std::endl );
-   TNL_ASSERT( this->getRows() == outVector.getSize(),
-               std::cerr << "Matrix rows: " << this->getRows() << std::endl
-                    << "Vector size: " << outVector.getSize() << std::endl );
+   TNL_ASSERT_EQ( this->getColumns(), inVector.getSize(), "Matrix columns do not fit with input vector." );
+   TNL_ASSERT_EQ( this->getRows(), outVector.getSize(), "Matrix rows do not fit with output vector." );
 
-   //DeviceDependentCode::vectorProduct( *this, inVector, outVector );
+   const auto inVectorView = inVector.getConstView();
+   auto outVectorView = outVector.getView();
+   const auto valuesView = this->values.getConstView();
+   auto fetch = [=] __cuda_callable__ ( const IndexType& row, const IndexType& column, const RealType& value ) -> RealType {
+      return value * inVectorView[ column ];
+   };
+   auto reduction = [] __cuda_callable__ ( RealType& sum, const RealType& value ) {
+      sum += value;
+   };
+   auto keeper = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable {
+      outVectorView[ row ] = value;
+   };
+   this->allRowsReduction( fetch, reduction, keeper, ( RealType ) 0.0 );
 }
 
 template< typename Real,
@@ -498,18 +474,41 @@ addMatrix( const TridiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >
            const RealType& matrixMultiplicator,
            const RealType& thisMatrixMultiplicator )
 {
-   TNL_ASSERT( this->getRows() == matrix.getRows(),
-            std::cerr << "This matrix columns: " << this->getColumns() << std::endl
-                 << "This matrix rows: " << this->getRows() << std::endl );
+   TNL_ASSERT_EQ( this->getRows(), matrix.getRows(), "Matrices rows are not equal." );
+   TNL_ASSERT_EQ( this->getColumns(), matrix.getColumns(), "Matrices columns are not equal." );
 
-   if( thisMatrixMultiplicator == 1.0 )
-      this->values += matrixMultiplicator * matrix.values;
+   if( RowMajorOrder == RowMajorOrder_ )
+   {
+      if( thisMatrixMultiplicator == 1.0 )
+         this->values += matrixMultiplicator * matrix.getValues();
+      else
+         this->values = thisMatrixMultiplicator * this->values + matrixMultiplicator * matrix.getValues();
+   }
    else
-      this->values = thisMatrixMultiplicator * this->values + matrixMultiplicator * matrix.values;
+   {
+      const auto matrix_view = matrix;
+      const auto matrixMult = matrixMultiplicator;
+      const auto thisMult = thisMatrixMultiplicator;
+      auto add0 = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value ) mutable {
+         value = matrixMult * matrix.getValues()[ matrix.getIndexer().getGlobalIndex( rowIdx, localIdx ) ];
+      };
+      auto add1 = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value ) mutable {
+         value += matrixMult * matrix.getValues()[ matrix.getIndexer().getGlobalIndex( rowIdx, localIdx ) ];
+      };
+      auto addGen = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value ) mutable {
+         value = thisMult * value + matrixMult * matrix.getValues()[ matrix.getIndexer().getGlobalIndex( rowIdx, localIdx ) ];
+      };
+      if( thisMult == 0.0 )
+         this->forAllRows( add0 );
+      else if( thisMult == 1.0 )
+         this->forAllRows( add1 );
+      else
+         this->forAllRows( addGen );
+   }
 }
 
 #ifdef HAVE_CUDA
-template< typename Real,
+/*template< typename Real,
           typename Real2,
           typename Index,
           typename Index2 >
@@ -533,7 +532,7 @@ __global__ void TridiagonalTranspositionCudaKernel( const Tridiagonal< Real2, De
                                     rowIdx,
                                     matrixMultiplicator * inMatrix->getElementFast( rowIdx, rowIdx+1 ) );
    }
-}
+}*/
 #endif
 
 template< typename Real,
@@ -563,7 +562,7 @@ getTransposition( const TridiagonalMatrixView< Real2, Device, Index2 >& matrix,
    if( std::is_same< Device, Devices::Cuda >::value )
    {
 #ifdef HAVE_CUDA
-      Tridiagonal* kernel_this = Cuda::passToDevice( *this );
+      /*Tridiagonal* kernel_this = Cuda::passToDevice( *this );
       typedef  Tridiagonal< Real2, Device, Index2 > InMatrixType;
       InMatrixType* kernel_inMatrix = Cuda::passToDevice( matrix );
       dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() );
@@ -581,7 +580,7 @@ getTransposition( const TridiagonalMatrixView< Real2, Device, Index2 >& matrix,
       }
       Cuda::freeFromDevice( kernel_this );
       Cuda::freeFromDevice( kernel_inMatrix );
-      TNL_CHECK_CUDA_DEVICE;
+      TNL_CHECK_CUDA_DEVICE;*/
 #endif
    }
 }
@@ -644,6 +643,30 @@ void TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::print( std::os
    }
 }
 
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+__cuda_callable__
+auto
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getIndexer() const -> const IndexerType&
+{
+   return this->indexer;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+__cuda_callable__
+auto
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getIndexer() -> IndexerType&
+{
+   return this->indexer;
+}
+
 template< typename Real,
           typename Device,
           typename Index,
diff --git a/src/TNL/Matrices/details/TridiagonalMatrixIndexer.h b/src/TNL/Matrices/details/TridiagonalMatrixIndexer.h
index d9fdd0c23..6d3377b4f 100644
--- a/src/TNL/Matrices/details/TridiagonalMatrixIndexer.h
+++ b/src/TNL/Matrices/details/TridiagonalMatrixIndexer.h
@@ -65,7 +65,7 @@ class TridiagonalMatrixIndexer
       const IndexType& getColumns() const { return this->columns; };
 
       __cuda_callable__
-      const IndexType& getSize() const { return this->nonEmptyRows; };
+      const IndexType& getNonEmptyRowsCount() const { return this->nonEmptyRows; };
       __cuda_callable__
       IndexType getStorageSize() const { return 3 * this->nonEmptyRows; };
 
diff --git a/src/UnitTests/Matrices/CMakeLists.txt b/src/UnitTests/Matrices/CMakeLists.txt
index 287495405..4b95380c4 100644
--- a/src/UnitTests/Matrices/CMakeLists.txt
+++ b/src/UnitTests/Matrices/CMakeLists.txt
@@ -13,8 +13,8 @@ IF( BUILD_CUDA )
    CUDA_ADD_EXECUTABLE( TridiagonalMatrixTest TridiagonalMatrixTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
    TARGET_LINK_LIBRARIES( TridiagonalMatrixTest ${GTEST_BOTH_LIBRARIES} )
 
-   CUDA_ADD_EXECUTABLE( MultidiagonalMatrixTest MultidiagonalMatrixTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
-   TARGET_LINK_LIBRARIES( MultidiagonalMatrixTest ${GTEST_BOTH_LIBRARIES} )
+#   CUDA_ADD_EXECUTABLE( MultidiagonalMatrixTest MultidiagonalMatrixTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
+#   TARGET_LINK_LIBRARIES( MultidiagonalMatrixTest ${GTEST_BOTH_LIBRARIES} )
 
    CUDA_ADD_EXECUTABLE( SparseMatrixTest_CSR_segments SparseMatrixTest_CSR_segments.cu OPTIONS ${CXX_TESTS_FLAGS} )
    TARGET_LINK_LIBRARIES( SparseMatrixTest_CSR_segments ${GTEST_BOTH_LIBRARIES} )
@@ -42,9 +42,9 @@ ELSE(  BUILD_CUDA )
    TARGET_COMPILE_OPTIONS( TridiagonalMatrixTest PRIVATE ${CXX_TESTS_FLAGS} )
    TARGET_LINK_LIBRARIES( TridiagonalMatrixTest ${GTEST_BOTH_LIBRARIES} )
 
-   ADD_EXECUTABLE( MultidiagonalMatrixTest MultidiagonalMatrixTest.cpp )
-   TARGET_COMPILE_OPTIONS( MultidiagonalMatrixTest PRIVATE ${CXX_TESTS_FLAGS} )
-   TARGET_LINK_LIBRARIES( MultidiagonalMatrixTest ${GTEST_BOTH_LIBRARIES} )
+#   ADD_EXECUTABLE( MultidiagonalMatrixTest MultidiagonalMatrixTest.cpp )
+#   TARGET_COMPILE_OPTIONS( MultidiagonalMatrixTest PRIVATE ${CXX_TESTS_FLAGS} )
+#   TARGET_LINK_LIBRARIES( MultidiagonalMatrixTest ${GTEST_BOTH_LIBRARIES} )
 
    ADD_EXECUTABLE( SparseMatrixTest_CSR_segments SparseMatrixTest_CSR_segments.cpp )
    TARGET_COMPILE_OPTIONS( SparseMatrixTest_CSR_segments PRIVATE ${CXX_TESTS_FLAGS} )
@@ -65,7 +65,7 @@ ADD_TEST( SparseMatrixCopyTest ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixCopyTest${C
 ADD_TEST( SparseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( DenseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/DenseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( TridiagonalMatrixTest ${EXECUTABLE_OUTPUT_PATH}/TridiagonalMatrixTest${CMAKE_EXECUTABLE_SUFFIX} )
-ADD_TEST( MultidiagonalMatrixTest ${EXECUTABLE_OUTPUT_PATH}/MultidiagonalMatrixTest${CMAKE_EXECUTABLE_SUFFIX} )
+#ADD_TEST( MultidiagonalMatrixTest ${EXECUTABLE_OUTPUT_PATH}/MultidiagonalMatrixTest${CMAKE_EXECUTABLE_SUFFIX} )
 
 ADD_TEST( SparseMatrixTest_CSR_segments ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_CSR_segments${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( SparseMatrixTest_Ellpack_segments ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_Ellpack_segments${CMAKE_EXECUTABLE_SUFFIX} )
diff --git a/src/UnitTests/Matrices/TridiagonalMatrixTest.h b/src/UnitTests/Matrices/TridiagonalMatrixTest.h
index dcd14302a..2c476670b 100644
--- a/src/UnitTests/Matrices/TridiagonalMatrixTest.h
+++ b/src/UnitTests/Matrices/TridiagonalMatrixTest.h
@@ -8,6 +8,7 @@
 
 /* See Copyright Notice in tnl/Copyright */
 
+#include <sstream>
 #include <TNL/Devices/Host.h>
 #include <TNL/Matrices/Matrix.h>
 #include <TNL/Matrices/Tridiagonal.h>
@@ -774,8 +775,11 @@ void test_VectorProduct()
    RealType value = 1;
    for( IndexType i = 0; i < rows; i++ )
       for( IndexType j = 0; j < cols; j++)
+      {
          if( abs( i - j ) <= 1 )
-            m.setElement( i, j, value++ );
+            m.setElement( i, j, value );
+         value++;
+      }
 
    using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >;
 
@@ -787,7 +791,6 @@ void test_VectorProduct()
 
    m.vectorProduct( inVector, outVector);
 
-   std::cerr << outVector << std::endl;
    EXPECT_EQ( outVector.getElement( 0 ),  6 );
    EXPECT_EQ( outVector.getElement( 1 ), 36 );
    EXPECT_EQ( outVector.getElement( 2 ), 66 );
@@ -795,122 +798,123 @@ void test_VectorProduct()
    EXPECT_EQ( outVector.getElement( 4 ), 40 );
 }
 
-template< typename Matrix >
+template< typename Matrix1, typename Matrix2 = Matrix1 >
 void test_AddMatrix()
 {
-    using RealType = typename Matrix::RealType;
-    using DeviceType = typename Matrix::DeviceType;
-    using IndexType = typename Matrix::IndexType;
-/*
- * Sets up the following 5x4 dense matrix:
- *
- *    /  1  2  3  4 \
- *    |  5  6  7  8 |
- *    |  9 10 11 12 |
- *    | 13 14 15 16 |
- *    \ 17 18 19 20 /
- */
-    const IndexType rows = 5;
-    const IndexType cols = 4;
+   using RealType = typename Matrix1::RealType;
+   using DeviceType = typename Matrix1::DeviceType;
+   using IndexType = typename Matrix1::IndexType;
 
-    Matrix m;
-    m.reset();
-    m.setDimensions( rows, cols );
+   /*
+    * Sets up the following 5x4 dense matrix:
+    *
+    *    /  1  2  0  0 \
+    *    |  5  6  7  0 |
+    *    |  0 10 11 12 |
+    *    |  0  0 15 16 |
+    *    \  0  0  0 20 /
+    */
+   const IndexType rows = 5;
+   const IndexType cols = 4;
 
-    RealType value = 1;
-    for( IndexType i = 0; i < rows; i++ )
-        for( IndexType j = 0; j < cols; j++)
-            m.setElement( i, j, value++ );
+   Matrix1 m( rows, cols );
 
-/*
- * Sets up the following 5x4 dense matrix:
- *
- *    /  1  2  3  4 \
- *    |  5  6  7  8 |
- *    |  9 10 11 12 |
- *    | 13 14 15 16 |
- *    \ 17 18 19 20 /
- */
+   RealType value = 1;
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < cols; j++)
+      {
+         if( abs( i - j ) <= 1 )
+            m.setElement( i, j, value );
+         value++;
+      }
 
-    Matrix m2;
-    m2.reset();
-    m2.setDimensions( rows, cols );
+   /*
+    * Sets up the following 5x4 dense matrix:
+    *
+    *    /  1  2  0  0 \
+    *    |  3  4  5  0 |
+    *    |  0  6  7  8 |
+    *    |  0  0  9 10 |
+    *    \  0  0  0 11 /
+    */
+   Matrix2 m2( rows, cols );
 
-    RealType newValue = 1;
-    for( IndexType i = 0; i < rows; i++ )
-        for( IndexType j = 0; j < cols; j++)
+   RealType newValue = 1;
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < cols; j++)
+         if( abs( i - j ) <= 1 )
             m2.setElement( i, j, newValue++ );
 
-    /*
- * Sets up the following 5x4 dense matrix:
- *
- *    /  1  2  3  4 \
- *    |  5  6  7  8 |
- *    |  9 10 11 12 |
- *    | 13 14 15 16 |
- *    \ 17 18 19 20 /
- */
+   /*
+    * Compute the following 5x4 dense matrix:
+    *
+    *  /  1  2  0  0 \       /  1  2  0  0 \    /  3  6  0  0 \
+    *  |  5  6  7  0 |       |  3  4  5  0 |    | 11 14 17  0 |
+    *  |  0 10 11 12 | + 2 * |  0  6  7  8 | =  |  0 22 25 28 |
+    *  |  0  0 15 16 |       |  0  0  9 10 |    |  0  0 33 36 |
+    *  \  0  0  0 20 /       \  0  0  0 11 /    \  0  0  0 42 /
+    */
 
-    Matrix mResult;
-    mResult.reset();
-    mResult.setDimensions( rows, cols );
-
-    mResult = m;
-
-    RealType matrixMultiplicator = 2;
-    RealType thisMatrixMultiplicator = 1;
-
-    mResult.addMatrix( m2, matrixMultiplicator, thisMatrixMultiplicator );
-
-    EXPECT_EQ( mResult.getElement( 0, 0 ), matrixMultiplicator * m2.getElement( 0, 0 ) + thisMatrixMultiplicator * m.getElement( 0, 0 ) );
-    EXPECT_EQ( mResult.getElement( 0, 1 ), matrixMultiplicator * m2.getElement( 0, 1 ) + thisMatrixMultiplicator * m.getElement( 0, 1 ) );
-    EXPECT_EQ( mResult.getElement( 0, 2 ), matrixMultiplicator * m2.getElement( 0, 2 ) + thisMatrixMultiplicator * m.getElement( 0, 2 ) );
-    EXPECT_EQ( mResult.getElement( 0, 3 ), matrixMultiplicator * m2.getElement( 0, 3 ) + thisMatrixMultiplicator * m.getElement( 0, 3 ) );
-
-    EXPECT_EQ( mResult.getElement( 1, 0 ), matrixMultiplicator * m2.getElement( 1, 0 ) + thisMatrixMultiplicator * m.getElement( 1, 0 ) );
-    EXPECT_EQ( mResult.getElement( 1, 1 ), matrixMultiplicator * m2.getElement( 1, 1 ) + thisMatrixMultiplicator * m.getElement( 1, 1 ) );
-    EXPECT_EQ( mResult.getElement( 1, 2 ), matrixMultiplicator * m2.getElement( 1, 2 ) + thisMatrixMultiplicator * m.getElement( 1, 2 ) );
-    EXPECT_EQ( mResult.getElement( 1, 3 ), matrixMultiplicator * m2.getElement( 1, 3 ) + thisMatrixMultiplicator * m.getElement( 1, 3 ) );
-
-    EXPECT_EQ( mResult.getElement( 2, 0 ), matrixMultiplicator * m2.getElement( 2, 0 ) + thisMatrixMultiplicator * m.getElement( 2, 0 ) );
-    EXPECT_EQ( mResult.getElement( 2, 1 ), matrixMultiplicator * m2.getElement( 2, 1 ) + thisMatrixMultiplicator * m.getElement( 2, 1 ) );
-    EXPECT_EQ( mResult.getElement( 2, 2 ), matrixMultiplicator * m2.getElement( 2, 2 ) + thisMatrixMultiplicator * m.getElement( 2, 2 ) );
-    EXPECT_EQ( mResult.getElement( 2, 3 ), matrixMultiplicator * m2.getElement( 2, 3 ) + thisMatrixMultiplicator * m.getElement( 2, 3 ) );
-
-    EXPECT_EQ( mResult.getElement( 3, 0 ), matrixMultiplicator * m2.getElement( 3, 0 ) + thisMatrixMultiplicator * m.getElement( 3, 0 ) );
-    EXPECT_EQ( mResult.getElement( 3, 1 ), matrixMultiplicator * m2.getElement( 3, 1 ) + thisMatrixMultiplicator * m.getElement( 3, 1 ) );
-    EXPECT_EQ( mResult.getElement( 3, 2 ), matrixMultiplicator * m2.getElement( 3, 2 ) + thisMatrixMultiplicator * m.getElement( 3, 2 ) );
-    EXPECT_EQ( mResult.getElement( 3, 3 ), matrixMultiplicator * m2.getElement( 3, 3 ) + thisMatrixMultiplicator * m.getElement( 3, 3 ) );
-
-    EXPECT_EQ( mResult.getElement( 4, 0 ), matrixMultiplicator * m2.getElement( 4, 0 ) + thisMatrixMultiplicator * m.getElement( 4, 0 ) );
-    EXPECT_EQ( mResult.getElement( 4, 1 ), matrixMultiplicator * m2.getElement( 4, 1 ) + thisMatrixMultiplicator * m.getElement( 4, 1 ) );
-    EXPECT_EQ( mResult.getElement( 4, 2 ), matrixMultiplicator * m2.getElement( 4, 2 ) + thisMatrixMultiplicator * m.getElement( 4, 2 ) );
-    EXPECT_EQ( mResult.getElement( 4, 3 ), matrixMultiplicator * m2.getElement( 4, 3 ) + thisMatrixMultiplicator * m.getElement( 4, 3 ) );
-
-    EXPECT_EQ( mResult.getElement( 0, 0 ),  3 );
-    EXPECT_EQ( mResult.getElement( 0, 1 ),  6 );
-    EXPECT_EQ( mResult.getElement( 0, 2 ),  9 );
-    EXPECT_EQ( mResult.getElement( 0, 3 ), 12 );
-
-    EXPECT_EQ( mResult.getElement( 1, 0 ), 15 );
-    EXPECT_EQ( mResult.getElement( 1, 1 ), 18 );
-    EXPECT_EQ( mResult.getElement( 1, 2 ), 21 );
-    EXPECT_EQ( mResult.getElement( 1, 3 ), 24 );
-
-    EXPECT_EQ( mResult.getElement( 2, 0 ), 27 );
-    EXPECT_EQ( mResult.getElement( 2, 1 ), 30 );
-    EXPECT_EQ( mResult.getElement( 2, 2 ), 33 );
-    EXPECT_EQ( mResult.getElement( 2, 3 ), 36 );
-
-    EXPECT_EQ( mResult.getElement( 3, 0 ), 39 );
-    EXPECT_EQ( mResult.getElement( 3, 1 ), 42 );
-    EXPECT_EQ( mResult.getElement( 3, 2 ), 45 );
-    EXPECT_EQ( mResult.getElement( 3, 3 ), 48 );
-
-    EXPECT_EQ( mResult.getElement( 4, 0 ), 51 );
-    EXPECT_EQ( mResult.getElement( 4, 1 ), 54 );
-    EXPECT_EQ( mResult.getElement( 4, 2 ), 57 );
-    EXPECT_EQ( mResult.getElement( 4, 3 ), 60 );
+   Matrix1 mResult;
+   mResult.reset();
+   mResult.setDimensions( rows, cols );
+
+   mResult = m;
+
+   RealType matrixMultiplicator = 2;
+   RealType thisMatrixMultiplicator = 1;
+
+   mResult.addMatrix( m2, matrixMultiplicator, thisMatrixMultiplicator );
+
+   EXPECT_EQ( mResult.getElement( 0, 0 ), matrixMultiplicator * m2.getElement( 0, 0 ) + thisMatrixMultiplicator * m.getElement( 0, 0 ) );
+   EXPECT_EQ( mResult.getElement( 0, 1 ), matrixMultiplicator * m2.getElement( 0, 1 ) + thisMatrixMultiplicator * m.getElement( 0, 1 ) );
+   EXPECT_EQ( mResult.getElement( 0, 2 ), matrixMultiplicator * m2.getElement( 0, 2 ) + thisMatrixMultiplicator * m.getElement( 0, 2 ) );
+   EXPECT_EQ( mResult.getElement( 0, 3 ), matrixMultiplicator * m2.getElement( 0, 3 ) + thisMatrixMultiplicator * m.getElement( 0, 3 ) );
+
+   EXPECT_EQ( mResult.getElement( 1, 0 ), matrixMultiplicator * m2.getElement( 1, 0 ) + thisMatrixMultiplicator * m.getElement( 1, 0 ) );
+   EXPECT_EQ( mResult.getElement( 1, 1 ), matrixMultiplicator * m2.getElement( 1, 1 ) + thisMatrixMultiplicator * m.getElement( 1, 1 ) );
+   EXPECT_EQ( mResult.getElement( 1, 2 ), matrixMultiplicator * m2.getElement( 1, 2 ) + thisMatrixMultiplicator * m.getElement( 1, 2 ) );
+   EXPECT_EQ( mResult.getElement( 1, 3 ), matrixMultiplicator * m2.getElement( 1, 3 ) + thisMatrixMultiplicator * m.getElement( 1, 3 ) );
+
+   EXPECT_EQ( mResult.getElement( 2, 0 ), matrixMultiplicator * m2.getElement( 2, 0 ) + thisMatrixMultiplicator * m.getElement( 2, 0 ) );
+   EXPECT_EQ( mResult.getElement( 2, 1 ), matrixMultiplicator * m2.getElement( 2, 1 ) + thisMatrixMultiplicator * m.getElement( 2, 1 ) );
+   EXPECT_EQ( mResult.getElement( 2, 2 ), matrixMultiplicator * m2.getElement( 2, 2 ) + thisMatrixMultiplicator * m.getElement( 2, 2 ) );
+   EXPECT_EQ( mResult.getElement( 2, 3 ), matrixMultiplicator * m2.getElement( 2, 3 ) + thisMatrixMultiplicator * m.getElement( 2, 3 ) );
+
+   EXPECT_EQ( mResult.getElement( 3, 0 ), matrixMultiplicator * m2.getElement( 3, 0 ) + thisMatrixMultiplicator * m.getElement( 3, 0 ) );
+   EXPECT_EQ( mResult.getElement( 3, 1 ), matrixMultiplicator * m2.getElement( 3, 1 ) + thisMatrixMultiplicator * m.getElement( 3, 1 ) );
+   EXPECT_EQ( mResult.getElement( 3, 2 ), matrixMultiplicator * m2.getElement( 3, 2 ) + thisMatrixMultiplicator * m.getElement( 3, 2 ) );
+   EXPECT_EQ( mResult.getElement( 3, 3 ), matrixMultiplicator * m2.getElement( 3, 3 ) + thisMatrixMultiplicator * m.getElement( 3, 3 ) );
+
+   EXPECT_EQ( mResult.getElement( 4, 0 ), matrixMultiplicator * m2.getElement( 4, 0 ) + thisMatrixMultiplicator * m.getElement( 4, 0 ) );
+   EXPECT_EQ( mResult.getElement( 4, 1 ), matrixMultiplicator * m2.getElement( 4, 1 ) + thisMatrixMultiplicator * m.getElement( 4, 1 ) );
+   EXPECT_EQ( mResult.getElement( 4, 2 ), matrixMultiplicator * m2.getElement( 4, 2 ) + thisMatrixMultiplicator * m.getElement( 4, 2 ) );
+   EXPECT_EQ( mResult.getElement( 4, 3 ), matrixMultiplicator * m2.getElement( 4, 3 ) + thisMatrixMultiplicator * m.getElement( 4, 3 ) );
+
+   EXPECT_EQ( mResult.getElement( 0, 0 ),  3 );
+   EXPECT_EQ( mResult.getElement( 0, 1 ),  6 );
+   EXPECT_EQ( mResult.getElement( 0, 2 ),  0 );
+   EXPECT_EQ( mResult.getElement( 0, 3 ),  0 );
+
+   EXPECT_EQ( mResult.getElement( 1, 0 ), 11 );
+   EXPECT_EQ( mResult.getElement( 1, 1 ), 14 );
+   EXPECT_EQ( mResult.getElement( 1, 2 ), 17 );
+   EXPECT_EQ( mResult.getElement( 1, 3 ),  0 );
+
+   EXPECT_EQ( mResult.getElement( 2, 0 ),  0 );
+   EXPECT_EQ( mResult.getElement( 2, 1 ), 22 );
+   EXPECT_EQ( mResult.getElement( 2, 2 ), 25 );
+   EXPECT_EQ( mResult.getElement( 2, 3 ), 28 );
+
+   EXPECT_EQ( mResult.getElement( 3, 0 ),  0 );
+   EXPECT_EQ( mResult.getElement( 3, 1 ),  0 );
+   EXPECT_EQ( mResult.getElement( 3, 2 ), 33 );
+   EXPECT_EQ( mResult.getElement( 3, 3 ), 36 );
+
+   EXPECT_EQ( mResult.getElement( 4, 0 ),  0 );
+   EXPECT_EQ( mResult.getElement( 4, 1 ),  0 );
+   EXPECT_EQ( mResult.getElement( 4, 2 ),  0 );
+   EXPECT_EQ( mResult.getElement( 4, 3 ), 42 );
 }
 
 template< typename Matrix >
@@ -1162,43 +1166,44 @@ void test_AssignmentOperator()
    using RealType = typename Matrix::RealType;
    using DeviceType = typename Matrix::DeviceType;
    using IndexType = typename Matrix::IndexType;
+   constexpr bool rowMajorOrder = Matrix::getRowMajorOrder();
 
-   using TridiagonalHost = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Host, IndexType >;
-   using TridiagonalCuda = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Cuda, IndexType >;
+   using TridiagonalHost = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Host, IndexType, rowMajorOrder >;
+   using TridiagonalCuda = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Cuda, IndexType, !rowMajorOrder >;
 
    const IndexType rows( 10 ), columns( 10 );
    TridiagonalHost hostMatrix( rows, columns );
-   for( IndexType i = 0; i < columns; i++ )
-      for( IndexType j = 0; j <= i; j++ )
-         hostMatrix.setElement( i, j,  i + j );
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j <  columns; j++ )
+         if( abs( i - j ) <= 1 )
+            hostMatrix.setElement( i, j,  i + j );
 
    Matrix matrix( rows, columns );
    matrix.getValues() = 0.0;
    matrix = hostMatrix;
    for( IndexType i = 0; i < columns; i++ )
       for( IndexType j = 0; j < rows; j++ )
-      {
-         if( j > i )
-            EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
-         else
-            EXPECT_EQ( matrix.getElement( i, j ), i + j );
-      }
+            if( abs( i - j ) <= 1 )
+               EXPECT_EQ( matrix.getElement( i, j ), i + j );
+            else
+               EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
 
 #ifdef HAVE_CUDA
    TridiagonalCuda cudaMatrix( rows, columns );
-   for( IndexType i = 0; i < columns; i++ )
-      for( IndexType j = 0; j <= i; j++ )
-         cudaMatrix.setElement( i, j, i + j );
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < columns; j++ )
+         if( abs( i - j ) <= 1 )
+            cudaMatrix.setElement( i, j, i + j );
 
    matrix.getValues() = 0.0;
    matrix = cudaMatrix;
-   for( IndexType i = 0; i < columns; i++ )
-      for( IndexType j = 0; j < rows; j++ )
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < columns; j++ )
       {
-         if( j > i )
-            EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
-         else
+         if( abs( i - j ) <= 1 )
             EXPECT_EQ( matrix.getElement( i, j ), i + j );
+         else
+            EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
       }
 #endif
 }
@@ -1207,123 +1212,125 @@ void test_AssignmentOperator()
 template< typename Matrix >
 void test_SaveAndLoad()
 {
-    using RealType = typename Matrix::RealType;
-    using DeviceType = typename Matrix::DeviceType;
-    using IndexType = typename Matrix::IndexType;
-/*
- * Sets up the following 4x4 dense matrix:
- *
- *    /  1  2  3  4 \
- *    |  5  6  7  8 |
- *    |  9 10 11 12 |
- *    \ 13 14 15 16 /
- */
-    const IndexType rows = 4;
-    const IndexType cols = 4;
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
 
-    Matrix savedMatrix;
-    savedMatrix.reset();
-    savedMatrix.setDimensions( rows, cols );
+   /*
+    * Sets up the following 4x4 dense matrix:
+    *
+    *    /  1  2  0  0 \
+    *    |  5  6  7  0 |
+    *    |  0 10 11 12 |
+    *    \  0  0 15 16 /
+    */
+   const IndexType rows = 4;
+   const IndexType cols = 4;
 
-    RealType value = 1;
-    for( IndexType i = 0; i < rows; i++ )
-        for( IndexType j = 0; j < cols; j++ )
-            savedMatrix.setElement( i, j, value++ );
-
-    ASSERT_NO_THROW( savedMatrix.save( TEST_FILE_NAME ) );
-
-    Matrix loadedMatrix;
-    loadedMatrix.reset();
-    loadedMatrix.setDimensions( rows, cols );
-
-    ASSERT_NO_THROW( loadedMatrix.load( TEST_FILE_NAME ) );
-
-    EXPECT_EQ( savedMatrix.getElement( 0, 0 ), loadedMatrix.getElement( 0, 0 ) );
-    EXPECT_EQ( savedMatrix.getElement( 0, 1 ), loadedMatrix.getElement( 0, 1 ) );
-    EXPECT_EQ( savedMatrix.getElement( 0, 2 ), loadedMatrix.getElement( 0, 2 ) );
-    EXPECT_EQ( savedMatrix.getElement( 0, 3 ), loadedMatrix.getElement( 0, 3 ) );
-
-    EXPECT_EQ( savedMatrix.getElement( 1, 0 ), loadedMatrix.getElement( 1, 0 ) );
-    EXPECT_EQ( savedMatrix.getElement( 1, 1 ), loadedMatrix.getElement( 1, 1 ) );
-    EXPECT_EQ( savedMatrix.getElement( 1, 2 ), loadedMatrix.getElement( 1, 2 ) );
-    EXPECT_EQ( savedMatrix.getElement( 1, 3 ), loadedMatrix.getElement( 1, 3 ) );
-
-    EXPECT_EQ( savedMatrix.getElement( 2, 0 ), loadedMatrix.getElement( 2, 0 ) );
-    EXPECT_EQ( savedMatrix.getElement( 2, 1 ), loadedMatrix.getElement( 2, 1 ) );
-    EXPECT_EQ( savedMatrix.getElement( 2, 2 ), loadedMatrix.getElement( 2, 2 ) );
-    EXPECT_EQ( savedMatrix.getElement( 2, 3 ), loadedMatrix.getElement( 2, 3 ) );
-
-    EXPECT_EQ( savedMatrix.getElement( 3, 0 ), loadedMatrix.getElement( 3, 0 ) );
-    EXPECT_EQ( savedMatrix.getElement( 3, 1 ), loadedMatrix.getElement( 3, 1 ) );
-    EXPECT_EQ( savedMatrix.getElement( 3, 2 ), loadedMatrix.getElement( 3, 2 ) );
-    EXPECT_EQ( savedMatrix.getElement( 3, 3 ), loadedMatrix.getElement( 3, 3 ) );
-
-    EXPECT_EQ( savedMatrix.getElement( 0, 0 ),  1 );
-    EXPECT_EQ( savedMatrix.getElement( 0, 1 ),  2 );
-    EXPECT_EQ( savedMatrix.getElement( 0, 2 ),  3 );
-    EXPECT_EQ( savedMatrix.getElement( 0, 3 ),  4 );
-
-    EXPECT_EQ( savedMatrix.getElement( 1, 0 ),  5 );
-    EXPECT_EQ( savedMatrix.getElement( 1, 1 ),  6 );
-    EXPECT_EQ( savedMatrix.getElement( 1, 2 ),  7 );
-    EXPECT_EQ( savedMatrix.getElement( 1, 3 ),  8 );
-
-    EXPECT_EQ( savedMatrix.getElement( 2, 0 ),  9 );
-    EXPECT_EQ( savedMatrix.getElement( 2, 1 ), 10 );
-    EXPECT_EQ( savedMatrix.getElement( 2, 2 ), 11 );
-    EXPECT_EQ( savedMatrix.getElement( 2, 3 ), 12 );
-
-    EXPECT_EQ( savedMatrix.getElement( 3, 0 ), 13 );
-    EXPECT_EQ( savedMatrix.getElement( 3, 1 ), 14 );
-    EXPECT_EQ( savedMatrix.getElement( 3, 2 ), 15 );
-    EXPECT_EQ( savedMatrix.getElement( 3, 3 ), 16 );
+   Matrix savedMatrix( rows, cols );
+
+   RealType value = 1;
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < cols; j++ )
+      {
+         if( abs( i - j ) <= 1 )
+            savedMatrix.setElement( i, j, value );
+         value++;
+      }
+
+   ASSERT_NO_THROW( savedMatrix.save( TEST_FILE_NAME ) );
+
+   Matrix loadedMatrix;
+
+   ASSERT_NO_THROW( loadedMatrix.load( TEST_FILE_NAME ) );
+
+   EXPECT_EQ( savedMatrix.getElement( 0, 0 ), loadedMatrix.getElement( 0, 0 ) );
+   EXPECT_EQ( savedMatrix.getElement( 0, 1 ), loadedMatrix.getElement( 0, 1 ) );
+   EXPECT_EQ( savedMatrix.getElement( 0, 2 ), loadedMatrix.getElement( 0, 2 ) );
+   EXPECT_EQ( savedMatrix.getElement( 0, 3 ), loadedMatrix.getElement( 0, 3 ) );
+
+   EXPECT_EQ( savedMatrix.getElement( 1, 0 ), loadedMatrix.getElement( 1, 0 ) );
+   EXPECT_EQ( savedMatrix.getElement( 1, 1 ), loadedMatrix.getElement( 1, 1 ) );
+   EXPECT_EQ( savedMatrix.getElement( 1, 2 ), loadedMatrix.getElement( 1, 2 ) );
+   EXPECT_EQ( savedMatrix.getElement( 1, 3 ), loadedMatrix.getElement( 1, 3 ) );
+
+   EXPECT_EQ( savedMatrix.getElement( 2, 0 ), loadedMatrix.getElement( 2, 0 ) );
+   EXPECT_EQ( savedMatrix.getElement( 2, 1 ), loadedMatrix.getElement( 2, 1 ) );
+   EXPECT_EQ( savedMatrix.getElement( 2, 2 ), loadedMatrix.getElement( 2, 2 ) );
+   EXPECT_EQ( savedMatrix.getElement( 2, 3 ), loadedMatrix.getElement( 2, 3 ) );
+
+   EXPECT_EQ( savedMatrix.getElement( 3, 0 ), loadedMatrix.getElement( 3, 0 ) );
+   EXPECT_EQ( savedMatrix.getElement( 3, 1 ), loadedMatrix.getElement( 3, 1 ) );
+   EXPECT_EQ( savedMatrix.getElement( 3, 2 ), loadedMatrix.getElement( 3, 2 ) );
+   EXPECT_EQ( savedMatrix.getElement( 3, 3 ), loadedMatrix.getElement( 3, 3 ) );
+
+   EXPECT_EQ( savedMatrix.getElement( 0, 0 ),  1 );
+   EXPECT_EQ( savedMatrix.getElement( 0, 1 ),  2 );
+   EXPECT_EQ( savedMatrix.getElement( 0, 2 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 0, 3 ),  0 );
+
+   EXPECT_EQ( savedMatrix.getElement( 1, 0 ),  5 );
+   EXPECT_EQ( savedMatrix.getElement( 1, 1 ),  6 );
+   EXPECT_EQ( savedMatrix.getElement( 1, 2 ),  7 );
+   EXPECT_EQ( savedMatrix.getElement( 1, 3 ),  0 );
+
+   EXPECT_EQ( savedMatrix.getElement( 2, 0 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 2, 1 ), 10 );
+   EXPECT_EQ( savedMatrix.getElement( 2, 2 ), 11 );
+   EXPECT_EQ( savedMatrix.getElement( 2, 3 ), 12 );
+
+   EXPECT_EQ( savedMatrix.getElement( 3, 0 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 3, 1 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 3, 2 ), 15 );
+   EXPECT_EQ( savedMatrix.getElement( 3, 3 ), 16 );
 }
 
 template< typename Matrix >
 void test_Print()
 {
-    using RealType = typename Matrix::RealType;
-    using DeviceType = typename Matrix::DeviceType;
-    using IndexType = typename Matrix::IndexType;
-/*
- * Sets up the following 5x4 sparse matrix:
- *
- *    /  1  2  3  4 \
- *    |  5  6  7  8 |
- *    |  9 10 11 12 |
- *    | 13 14 15 16 |
- *    \ 17 18 19 20 /
- */
-    const IndexType rows = 5;
-    const IndexType cols = 4;
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
 
-    Matrix m;
-    m.reset();
-    m.setDimensions( rows, cols );
+   /*
+    * Sets up the following 5x4 sparse matrix:
+    *
+    *    /  1  2  0  0 \
+    *    |  5  6  7  0 |
+    *    |  0 10 11 12 |
+    *    |  0  0 15 16 |
+    *    \  0  0  0 20 /
+    */
+   const IndexType rows = 5;
+   const IndexType cols = 4;
 
-    RealType value = 1;
-    for( IndexType i = 0; i < rows; i++)
-        for( IndexType j = 0; j < cols; j++)
-            m.setElement( i, j, value++ );
+   Matrix m( rows, cols );
 
-    #include <sstream>
-    std::stringstream printed;
-    std::stringstream couted;
+   RealType value = 1;
+   for( IndexType i = 0; i < rows; i++)
+      for( IndexType j = 0; j < cols; j++)
+      {
+         if( abs( i - j ) <= 1 )
+            m.setElement( i, j, value );
+         value++;
+      }
 
-    //change the underlying buffer and save the old buffer
-    auto old_buf = std::cout.rdbuf(printed.rdbuf());
+   std::stringstream printed;
+   std::stringstream couted;
 
-    m.print( std::cout ); //all the std::cout goes to ss
+   //change the underlying buffer and save the old buffer
+   auto old_buf = std::cout.rdbuf(printed.rdbuf());
 
-    std::cout.rdbuf(old_buf); //reset
+   m.print( std::cout ); //all the std::cout goes to ss
 
-    couted << "Row: 0 ->  Col:0->1	 Col:1->2	 Col:2->3	 Col:3->4\t\n"
-              "Row: 1 ->  Col:0->5	 Col:1->6	 Col:2->7	 Col:3->8\t\n"
-              "Row: 2 ->  Col:0->9	 Col:1->10	 Col:2->11	 Col:3->12\t\n"
-              "Row: 3 ->  Col:0->13	 Col:1->14	 Col:2->15	 Col:3->16\t\n"
-              "Row: 4 ->  Col:0->17	 Col:1->18	 Col:2->19	 Col:3->20\t\n";
+   std::cout.rdbuf(old_buf); //reset
+   couted << "Row: 0 ->  Col:0->1\t Col:1->2\t\n"
+             "Row: 1 ->  Col:0->5\t Col:1->6\t Col:2->7\t\n"
+             "Row: 2 ->  Col:1->10\t Col:2->11\t Col:3->12\t\n"
+             "Row: 3 ->  Col:2->15\t Col:3->16\t\n"
+             "Row: 4 ->  Col:3->20\t\n";
 
-    EXPECT_EQ( printed.str(), couted.str() );
+   EXPECT_EQ( printed.str(), couted.str() );
 }
 
 // test fixture for typed tests
@@ -1470,6 +1477,19 @@ TYPED_TEST( MatrixTest, addMatrixTest )
     test_AddMatrix< MatrixType >();
 }
 
+TYPED_TEST( MatrixTest, addMatrixTest_differentOrdering )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    using RealType = typename MatrixType::RealType;
+    using DeviceType = typename MatrixType::DeviceType;
+    using IndexType = typename MatrixType::IndexType;
+    using RealAllocatorType = typename MatrixType::RealAllocatorType;
+    using MatrixType2 = TNL::Matrices::Tridiagonal< RealType, DeviceType, IndexType, ! MatrixType::getRowMajorOrder(), RealAllocatorType >;
+
+    test_AddMatrix< MatrixType, MatrixType2 >();
+}
+
 TYPED_TEST( MatrixTest, assignmentOperatorTest )
 {
     using MatrixType = typename TestFixture::MatrixType;
-- 
GitLab


From 97de6cd9c525615b191a5ead10d2746f0c8e8060 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Sat, 11 Jan 2020 13:48:13 +0100
Subject: [PATCH 084/179] Commenting DistributedMatrixTest of
 getCompressedRowLength - it does not work with the new meaning of the method.

---
 src/UnitTests/Matrices/DistributedMatrixTest.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/UnitTests/Matrices/DistributedMatrixTest.h b/src/UnitTests/Matrices/DistributedMatrixTest.h
index a1a9f3eb8..9487e5977 100644
--- a/src/UnitTests/Matrices/DistributedMatrixTest.h
+++ b/src/UnitTests/Matrices/DistributedMatrixTest.h
@@ -171,7 +171,7 @@ TYPED_TEST( DistributedMatrixTest, getCompressedRowLengths )
 
    this->matrix.setCompressedRowLengths( this->rowLengths );
    RowLengthsVector output;
-   this->matrix.getCompressedRowLengths( output );
+   this->matrix.getCompressedRowLengths( output ); // TODO: replace this with getRowCapacities
    EXPECT_EQ( output, this->rowLengths );
 }
 
-- 
GitLab


From 7104d153305e1f8e3ee9aae3ead38db516cb5577 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Sat, 11 Jan 2020 13:50:25 +0100
Subject: [PATCH 085/179] Renaming Matrix::getNumberOfMetrixElements to
 getAllocatedElementdCount.

---
 src/Python/pytnl/tnl/SparseMatrix.h   | 2 +-
 src/TNL/Matrices/Legacy/Sparse_impl.h | 2 +-
 src/TNL/Matrices/MatrixReader_impl.h  | 2 +-
 src/TNL/Matrices/SparseMatrix.hpp     | 3 +++
 4 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/Python/pytnl/tnl/SparseMatrix.h b/src/Python/pytnl/tnl/SparseMatrix.h
index 03ec5814c..b4cc0fc1a 100644
--- a/src/Python/pytnl/tnl/SparseMatrix.h
+++ b/src/Python/pytnl/tnl/SparseMatrix.h
@@ -72,7 +72,7 @@ void export_Matrix( py::module & m, const char* name )
         .def("getCompressedRowLengths", _getCompressedRowLengths)
         // TODO: export for more types
         .def("setLike",                 &Matrix::template setLike< typename Matrix::RealType, typename Matrix::DeviceType, typename Matrix::IndexType >)
-        .def("getNumberOfMatrixElements", &Matrix::getNumberOfMatrixElements)
+        .def("getAllocatedElementsCount", &Matrix::getAllocatedElementsCount)
         .def("getNumberOfNonzeroMatrixElements", &Matrix::getNumberOfNonzeroMatrixElements)
         .def("reset",                   &Matrix::reset)
         .def("getRows",                 &Matrix::getRows)
diff --git a/src/TNL/Matrices/Legacy/Sparse_impl.h b/src/TNL/Matrices/Legacy/Sparse_impl.h
index 889d92e62..3e4794412 100644
--- a/src/TNL/Matrices/Legacy/Sparse_impl.h
+++ b/src/TNL/Matrices/Legacy/Sparse_impl.h
@@ -33,7 +33,7 @@ template< typename Real,
 void Sparse< Real, Device, Index >::setLike( const Sparse< Real2, Device2, Index2 >& matrix )
 {
    Matrix< Real, Device, Index >::setLike( matrix );
-   this->allocateMatrixElements( matrix.getNumberOfMatrixElements() );
+   this->allocateMatrixElements( matrix.getAllocatedElementsCount() );
 }
 
 
diff --git a/src/TNL/Matrices/MatrixReader_impl.h b/src/TNL/Matrices/MatrixReader_impl.h
index 476a7327e..a80d00283 100644
--- a/src/TNL/Matrices/MatrixReader_impl.h
+++ b/src/TNL/Matrices/MatrixReader_impl.h
@@ -340,7 +340,7 @@ void MatrixReader< Matrix >::readMatrixElementsFromMtxFile( std::istream& file,
    long int fileSize = file.tellg();
    timer.stop();
    if( verbose )
-     std::cout << " Reading the matrix elements ... " << processedElements << " / " << matrix.getNumberOfMatrixElements()
+     std::cout << " Reading the matrix elements ... " << processedElements << " / " << matrix.getAllocatedElementsCount()
               << " -> " << timer.getRealTime()
               << " sec. i.e. " << fileSize / ( timer.getRealTime() * ( 1 << 20 ))  << "MB/s." << std::endl;
 }
diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp
index 6189d43d3..3f5636bb6 100644
--- a/src/TNL/Matrices/SparseMatrix.hpp
+++ b/src/TNL/Matrices/SparseMatrix.hpp
@@ -412,6 +412,9 @@ vectorProduct( const InVector& inVector,
                const RealType& matrixMultiplicator,
                const RealType& inVectorAddition ) const
 {
+   TNL_ASSERT_EQ( this->getColumns(), inVector.getSize(), "Matrix columns do not fit with input vector." );
+   TNL_ASSERT_EQ( this->getRows(), outVector.getSize(), "Matrix rows do not fit with output vector." );
+
    const auto inVectorView = inVector.getConstView();
    auto outVectorView = outVector.getView();
    const auto valuesView = this->values.getConstView();
-- 
GitLab


From 43533ef1b605924e2bdc6ee4b36021b1c6a6b49b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Sat, 11 Jan 2020 13:52:16 +0100
Subject: [PATCH 086/179] Making MatrixView::getValues() __cuda_callable__.

---
 src/TNL/Matrices/MatrixView.h   | 2 ++
 src/TNL/Matrices/MatrixView.hpp | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/src/TNL/Matrices/MatrixView.h b/src/TNL/Matrices/MatrixView.h
index b8adfd791..467d02349 100644
--- a/src/TNL/Matrices/MatrixView.h
+++ b/src/TNL/Matrices/MatrixView.h
@@ -85,8 +85,10 @@ public:
    virtual Real getElement( const IndexType row,
                             const IndexType column ) const = 0;
 
+   __cuda_callable__
    const ValuesView& getValues() const;
 
+   __cuda_callable__
    ValuesView& getValues();
 
    /**
diff --git a/src/TNL/Matrices/MatrixView.hpp b/src/TNL/Matrices/MatrixView.hpp
index b2739ae1d..275a22870 100644
--- a/src/TNL/Matrices/MatrixView.hpp
+++ b/src/TNL/Matrices/MatrixView.hpp
@@ -102,6 +102,7 @@ Index MatrixView< Real, Device, Index >::getColumns() const
 template< typename Real,
           typename Device,
           typename Index >
+__cuda_callable__
 const typename MatrixView< Real, Device, Index >::ValuesView&
 MatrixView< Real, Device, Index >::
 getValues() const
@@ -112,6 +113,7 @@ getValues() const
 template< typename Real,
           typename Device,
           typename Index >
+__cuda_callable__
 typename MatrixView< Real, Device, Index >::ValuesView& 
 MatrixView< Real, Device, Index >::
 getValues()
-- 
GitLab


From 897b35c3b1548f07fc60962592765a025d88d074 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Sat, 11 Jan 2020 13:53:20 +0100
Subject: [PATCH 087/179] Commenting out dated implementation of
 Matrix::getCompressedRowLengths.

---
 src/TNL/Matrices/Matrix.hpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/TNL/Matrices/Matrix.hpp b/src/TNL/Matrices/Matrix.hpp
index efd26e1fa..4ddbacde5 100644
--- a/src/TNL/Matrices/Matrix.hpp
+++ b/src/TNL/Matrices/Matrix.hpp
@@ -72,9 +72,9 @@ template< typename Real,
           typename RealAllocator >
 void Matrix< Real, Device, Index, RealAllocator >::getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const
 {
-   TNL_ASSERT_EQ( rowLengths.getSize(), this->getRows(), "invalid size of the rowLengths vector" );
-   for( IndexType row = 0; row < this->getRows(); row++ )
-      rowLengths.setElement( row, this->getRowLength( row ) );
+   //TNL_ASSERT_EQ( rowLengths.getSize(), this->getRows(), "invalid size of the rowLengths vector" );
+   //for( IndexType row = 0; row < this->getRows(); row++ )
+   //   rowLengths.setElement( row, this->getRowLength( row ) );
 }
 
 template< typename Real,
-- 
GitLab


From e9fc173c4ebc43475983c7dfbadcc39ed44bb8df Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Sat, 11 Jan 2020 17:13:53 +0100
Subject: [PATCH 088/179] Moving Multidiagonal matrix to Legacy.

---
 src/TNL/Matrices/{ => Legacy}/Multidiagonal.h                 | 4 ++--
 src/TNL/Matrices/{ => Legacy}/MultidiagonalMatrixSetter.h     | 4 ++--
 .../Matrices/{ => Legacy}/MultidiagonalMatrixSetter_impl.h    | 0
 src/TNL/Matrices/{ => Legacy}/MultidiagonalRow.h              | 2 +-
 src/TNL/Matrices/{ => Legacy}/MultidiagonalRow_impl.h         | 0
 src/TNL/Matrices/{ => Legacy}/Multidiagonal_impl.h            | 2 +-
 src/TNL/Problems/HeatEquationProblem_impl.h                   | 2 +-
 7 files changed, 7 insertions(+), 7 deletions(-)
 rename src/TNL/Matrices/{ => Legacy}/Multidiagonal.h (98%)
 rename src/TNL/Matrices/{ => Legacy}/MultidiagonalMatrixSetter.h (96%)
 rename src/TNL/Matrices/{ => Legacy}/MultidiagonalMatrixSetter_impl.h (100%)
 rename src/TNL/Matrices/{ => Legacy}/MultidiagonalRow.h (96%)
 rename src/TNL/Matrices/{ => Legacy}/MultidiagonalRow_impl.h (100%)
 rename src/TNL/Matrices/{ => Legacy}/Multidiagonal_impl.h (99%)

diff --git a/src/TNL/Matrices/Multidiagonal.h b/src/TNL/Matrices/Legacy/Multidiagonal.h
similarity index 98%
rename from src/TNL/Matrices/Multidiagonal.h
rename to src/TNL/Matrices/Legacy/Multidiagonal.h
index 1ee6a25e9..d9f1379f7 100644
--- a/src/TNL/Matrices/Multidiagonal.h
+++ b/src/TNL/Matrices/Legacy/Multidiagonal.h
@@ -12,7 +12,7 @@
 
 #include <TNL/Matrices/Matrix.h>
 #include <TNL/Containers/Vector.h>
-#include <TNL/Matrices/MultidiagonalRow.h>
+#include <TNL/Matrices/Legacy/MultidiagonalRow.h>
 
 namespace TNL {
 namespace Matrices {   
@@ -221,4 +221,4 @@ protected:
 } // namespace Matrices
 } // namespace TNL
 
-#include <TNL/Matrices/Multidiagonal_impl.h>
+#include <TNL/Matrices/Legacy/Multidiagonal_impl.h>
diff --git a/src/TNL/Matrices/MultidiagonalMatrixSetter.h b/src/TNL/Matrices/Legacy/MultidiagonalMatrixSetter.h
similarity index 96%
rename from src/TNL/Matrices/MultidiagonalMatrixSetter.h
rename to src/TNL/Matrices/Legacy/MultidiagonalMatrixSetter.h
index c10d0cc57..f9e7ef135 100644
--- a/src/TNL/Matrices/MultidiagonalMatrixSetter.h
+++ b/src/TNL/Matrices/Legacy/MultidiagonalMatrixSetter.h
@@ -11,7 +11,7 @@
 #pragma once
 
 #include <TNL/Meshes/Grid.h>
-#include <TNL/Matrices/Multidiagonal.h>
+#include <TNL/Matrices/Legacy/Multidiagonal.h>
 
 namespace TNL {
 namespace Matrices {   
@@ -85,4 +85,4 @@ class MultidiagonalMatrixSetter< Meshes::Grid< 3, MeshReal, Device, MeshIndex >
 } // namespace Matrices
 } // namespace TNL
 
-#include <TNL/Matrices/MultidiagonalMatrixSetter_impl.h>
+#include <TNL/Matrices/Legacy/MultidiagonalMatrixSetter_impl.h>
diff --git a/src/TNL/Matrices/MultidiagonalMatrixSetter_impl.h b/src/TNL/Matrices/Legacy/MultidiagonalMatrixSetter_impl.h
similarity index 100%
rename from src/TNL/Matrices/MultidiagonalMatrixSetter_impl.h
rename to src/TNL/Matrices/Legacy/MultidiagonalMatrixSetter_impl.h
diff --git a/src/TNL/Matrices/MultidiagonalRow.h b/src/TNL/Matrices/Legacy/MultidiagonalRow.h
similarity index 96%
rename from src/TNL/Matrices/MultidiagonalRow.h
rename to src/TNL/Matrices/Legacy/MultidiagonalRow.h
index 1d465d229..c41541ead 100644
--- a/src/TNL/Matrices/MultidiagonalRow.h
+++ b/src/TNL/Matrices/Legacy/MultidiagonalRow.h
@@ -54,5 +54,5 @@ class MultidiagonalRow
 } // namespace Matrices
 } // namespace TNL
 
-#include <TNL/Matrices/MultidiagonalRow_impl.h>
+#include <TNL/Matrices/Legacy/MultidiagonalRow_impl.h>
 
diff --git a/src/TNL/Matrices/MultidiagonalRow_impl.h b/src/TNL/Matrices/Legacy/MultidiagonalRow_impl.h
similarity index 100%
rename from src/TNL/Matrices/MultidiagonalRow_impl.h
rename to src/TNL/Matrices/Legacy/MultidiagonalRow_impl.h
diff --git a/src/TNL/Matrices/Multidiagonal_impl.h b/src/TNL/Matrices/Legacy/Multidiagonal_impl.h
similarity index 99%
rename from src/TNL/Matrices/Multidiagonal_impl.h
rename to src/TNL/Matrices/Legacy/Multidiagonal_impl.h
index 76f54f748..375e01c6d 100644
--- a/src/TNL/Matrices/Multidiagonal_impl.h
+++ b/src/TNL/Matrices/Legacy/Multidiagonal_impl.h
@@ -10,7 +10,7 @@
 
 #pragma once
 
-#include <TNL/Matrices/Multidiagonal.h>
+#include <TNL/Matrices/Legacy/Multidiagonal.h>
 #include <TNL/Containers/Vector.h>
 #include <TNL/Math.h>
 #include <TNL/Exceptions/NotImplementedError.h>
diff --git a/src/TNL/Problems/HeatEquationProblem_impl.h b/src/TNL/Problems/HeatEquationProblem_impl.h
index bc339e9b3..98cd6d5e4 100644
--- a/src/TNL/Problems/HeatEquationProblem_impl.h
+++ b/src/TNL/Problems/HeatEquationProblem_impl.h
@@ -18,7 +18,7 @@
 
 #include <TNL/FileName.h>
 #include <TNL/Matrices/MatrixSetter.h>
-#include <TNL/Matrices/MultidiagonalMatrixSetter.h>
+#include <TNL/Matrices/Legacy/MultidiagonalMatrixSetter.h>
 #include <TNL/Logger.h>
 #include <TNL/Solvers/PDE/BoundaryConditionsSetter.h>
 
-- 
GitLab


From f40eb2d70fa1cd5f8ba729b93ca43df273f912cc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Sun, 12 Jan 2020 13:03:39 +0100
Subject: [PATCH 089/179] Fixing tridiagonal matrix unit tests comments.

---
 .../Matrices/TridiagonalMatrixTest.h          | 40 +++++++++----------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/src/UnitTests/Matrices/TridiagonalMatrixTest.h b/src/UnitTests/Matrices/TridiagonalMatrixTest.h
index 2c476670b..d9dc06599 100644
--- a/src/UnitTests/Matrices/TridiagonalMatrixTest.h
+++ b/src/UnitTests/Matrices/TridiagonalMatrixTest.h
@@ -182,7 +182,7 @@ void test_GetNumberOfNonzeroMatrixElements()
    using IndexType = typename Matrix::IndexType;
 
    /*
-    * Sets up the following 7x6 dense matrix:
+    * Sets up the following 7x6 matrix:
     *
     *    /  0  1  0  0  0  0 \
     *    |  2  3  4  0  0  0 |
@@ -215,7 +215,7 @@ void test_Reset()
    using IndexType = typename Matrix::IndexType;
 
    /*
-    * Sets up the following 5x4 dense matrix:
+    * Sets up the following 5x4 matrix:
     *
     *    /  0  0  0  0 \
     *    |  0  0  0  0 |
@@ -242,7 +242,7 @@ void test_SetValue()
    using IndexType = typename Matrix::IndexType;
 
    /*
-    * Sets up the following 7x6 dense matrix:
+    * Sets up the following 7x6 matrix:
     *
     *    /  0  1  0  0  0  0 \
     *    |  2  3  4  0  0  0 |
@@ -374,7 +374,7 @@ void test_SetElement()
    using IndexType = typename Matrix::IndexType;
 
    /*
-    * Sets up the following 5x5 dense matrix:
+    * Sets up the following 5x5 matrix:
     *
     *    /  1  2  0  0  0 \
     *    |  6  7  8  0  0 |
@@ -438,7 +438,7 @@ void test_AddElement()
    using IndexType = typename Matrix::IndexType;
 
    /*
-    * Sets up the following 6x5 dense matrix:
+    * Sets up the following 6x5 matrix:
     *
     *    /  1  2  0  0  0 \
     *    |  6  7  8  0  0 |
@@ -501,7 +501,7 @@ void test_AddElement()
 
    // Add new elements to the old elements with a multiplying factor applied to the old elements.
    /*
-    * The following setup results in the following 6x5 dense matrix:
+    * The following setup results in the following 6x5 matrix:
     *
     *     /  1  2  0  0  0 \    /  1  2  0  0  0 \   /  3  6  0  0  0 \
     *     |  6  7  8  0  0 |    |  3  4  5  0  0 |   | 15 18 21  0  0 |
@@ -563,7 +563,7 @@ void test_SetRow()
    using IndexType = typename Matrix::IndexType;
 
    /*
-    * Sets up the following 3x7 dense matrix:
+    * Sets up the following 3x7 matrix:
     *
     *    /  1  2  0  0  0  0  0 \
     *    |  8  9 10  0  0  0  0 |
@@ -622,7 +622,7 @@ void test_AddRow()
    using DeviceType = typename Matrix::DeviceType;
    using IndexType = typename Matrix::IndexType;
    /*
-    * Sets up the following 6x5 dense matrix:
+    * Sets up the following 6x5 matrix:
     *
     *    /  1  2  0  0  0 \
     *    |  6  7  8  0  0 |
@@ -759,7 +759,7 @@ void test_VectorProduct()
    using IndexType = typename Matrix::IndexType;
 
    /*
-    * Sets up the following 5x4 dense matrix:
+    * Sets up the following 5x4 matrix:
     *
     *    /  1  2  0  0 \
     *    |  5  6  7  0 |
@@ -806,7 +806,7 @@ void test_AddMatrix()
    using IndexType = typename Matrix1::IndexType;
 
    /*
-    * Sets up the following 5x4 dense matrix:
+    * Sets up the following 5x4 matrix:
     *
     *    /  1  2  0  0 \
     *    |  5  6  7  0 |
@@ -829,7 +829,7 @@ void test_AddMatrix()
       }
 
    /*
-    * Sets up the following 5x4 dense matrix:
+    * Sets up the following 5x4 matrix:
     *
     *    /  1  2  0  0 \
     *    |  3  4  5  0 |
@@ -846,7 +846,7 @@ void test_AddMatrix()
             m2.setElement( i, j, newValue++ );
 
    /*
-    * Compute the following 5x4 dense matrix:
+    * Compute the following 5x4 matrix:
     *
     *  /  1  2  0  0 \       /  1  2  0  0 \    /  3  6  0  0 \
     *  |  5  6  7  0 |       |  3  4  5  0 |    | 11 14 17  0 |
@@ -924,7 +924,7 @@ void test_GetMatrixProduct()
     using DeviceType = typename Matrix::DeviceType;
     using IndexType = typename Matrix::IndexType;
 /*
- * Sets up the following 5x4 dense matrix:
+ * Sets up the following 5x4 matrix:
  *
  *    /  1  2  3  4 \
  *    |  5  6  7  8 |
@@ -945,7 +945,7 @@ void test_GetMatrixProduct()
             leftMatrix.setElement( i, j, value++ );
 
 /*
- * Sets up the following 4x5 dense matrix:
+ * Sets up the following 4x5 matrix:
  *
  *    /  1  2  3  4  5 \
  *    |  6  7  8  9 10 |
@@ -965,7 +965,7 @@ void test_GetMatrixProduct()
             rightMatrix.setElement( i, j, newValue++ );
 
 /*
- * Sets up the following 5x5 resulting dense matrix:
+ * Sets up the following 5x5 resulting matrix:
  *
  *    /  0  0  0  0 \
  *    |  0  0  0  0 |
@@ -1029,7 +1029,7 @@ void test_GetTransposition()
     using DeviceType = typename Matrix::DeviceType;
     using IndexType = typename Matrix::IndexType;
 /*
- * Sets up the following 3x2 dense matrix:
+ * Sets up the following 3x2 matrix:
  *
  *    /  1  2 \
  *    |  3  4 |
@@ -1050,7 +1050,7 @@ void test_GetTransposition()
     m.print( std::cout );
 
 /*
- * Sets up the following 2x3 dense matrix:
+ * Sets up the following 2x3 matrix:
  *
  *    /  0  0  0 \
  *    \  0  0  0 /
@@ -1068,7 +1068,7 @@ void test_GetTransposition()
     mTransposed.print( std::cout );
 
 /*
- * Should result in the following 2x3 dense matrix:
+ * Should result in the following 2x3 matrix:
  *
  *    /  1  3  5 \
  *    \  2  4  6 /
@@ -1091,7 +1091,7 @@ void test_PerformSORIteration()
     using DeviceType = typename Matrix::DeviceType;
     using IndexType = typename Matrix::IndexType;
 /*
- * Sets up the following 4x4 dense matrix:
+ * Sets up the following 4x4  matrix:
  *
  *    /  4  1  1  1 \
  *    |  1  4  1  1 |
@@ -1217,7 +1217,7 @@ void test_SaveAndLoad()
    using IndexType = typename Matrix::IndexType;
 
    /*
-    * Sets up the following 4x4 dense matrix:
+    * Sets up the following 4x4 matrix:
     *
     *    /  1  2  0  0 \
     *    |  5  6  7  0 |
-- 
GitLab


From bac4cdfab2edee1613c286047ce7e38d787064f7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Sun, 12 Jan 2020 13:04:38 +0100
Subject: [PATCH 090/179] Revision of multidiagonal matrix.

---
 src/TNL/Matrices/Multidiagonal.h              |  217 +++
 src/TNL/Matrices/Multidiagonal.hpp            |  909 +++++++++
 src/TNL/Matrices/MultidiagonalMatrixRowView.h |   59 +
 .../Matrices/MultidiagonalMatrixRowView.hpp   |   75 +
 src/TNL/Matrices/MultidiagonalMatrixView.h    |  181 ++
 src/TNL/Matrices/MultidiagonalMatrixView.hpp  |  729 +++++++
 src/TNL/Matrices/TridiagonalMatrixView.h      |    3 -
 .../details/MultidiagonalMatrixIndexer.h      |  106 ++
 src/UnitTests/Matrices/CMakeLists.txt         |   12 +-
 .../Matrices/MultidiagonalMatrixTest.cpp      |    2 +-
 .../Matrices/MultidiagonalMatrixTest.cu       |    2 +-
 .../Matrices/MultidiagonalMatrixTest.h        | 1679 +++++++++--------
 12 files changed, 3169 insertions(+), 805 deletions(-)
 create mode 100644 src/TNL/Matrices/Multidiagonal.h
 create mode 100644 src/TNL/Matrices/Multidiagonal.hpp
 create mode 100644 src/TNL/Matrices/MultidiagonalMatrixRowView.h
 create mode 100644 src/TNL/Matrices/MultidiagonalMatrixRowView.hpp
 create mode 100644 src/TNL/Matrices/MultidiagonalMatrixView.h
 create mode 100644 src/TNL/Matrices/MultidiagonalMatrixView.hpp
 create mode 100644 src/TNL/Matrices/details/MultidiagonalMatrixIndexer.h

diff --git a/src/TNL/Matrices/Multidiagonal.h b/src/TNL/Matrices/Multidiagonal.h
new file mode 100644
index 000000000..5d23cd960
--- /dev/null
+++ b/src/TNL/Matrices/Multidiagonal.h
@@ -0,0 +1,217 @@
+/***************************************************************************
+                          Multidiagonal.h  -  description
+                             -------------------
+    begin                : Oct 13, 2011
+    copyright            : (C) 2011 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Matrices/Matrix.h>
+#include <TNL/Containers/Vector.h>
+#include <TNL/Matrices/MultidiagonalMatrixRowView.h>
+#include <TNL/Containers/Segments/Ellpack.h>
+#include <TNL/Matrices/details/MultidiagonalMatrixIndexer.h>
+#include <TNL/Matrices/MultidiagonalMatrixView.h>
+
+namespace TNL {
+namespace Matrices {
+
+template< typename Real = double,
+          typename Device = Devices::Host,
+          typename Index = int,
+          bool RowMajorOrder = std::is_same< Device, Devices::Host >::value,
+          typename RealAllocator = typename Allocators::Default< Device >::template Allocator< Real >,
+          typename IndexAllocator = typename Allocators::Default< Device >::template Allocator< Index > >
+class Multidiagonal : public Matrix< Real, Device, Index, RealAllocator >
+{
+   public:
+      using RealType = Real;
+      using DeviceType = Device;
+      using IndexType = Index;
+      using RealAllocatorType = RealAllocator;
+      using IndexAllocatorType = IndexAllocator;
+      using BaseType = Matrix< Real, Device, Index, RealAllocator >;
+      using ValuesType = typename BaseType::ValuesVector;
+      using ValuesViewType = typename ValuesType::ViewType;
+      using IndexerType = details::MultidiagonalMatrixIndexer< IndexType, RowMajorOrder >;
+      using RowView = MultidiagonalMatrixRowView< ValuesViewType, IndexerType >;
+      using ViewType = MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >;
+      using ConstViewType = MultidiagonalMatrixView< typename std::add_const< Real >::type, Device, Index, RowMajorOrder >;
+
+      using DiagonalsShiftsType = Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocatorType >;
+      using DiagonalsShiftsView = typename DiagonalsShiftsType::ViewType;
+      using HostDiagonalsShiftsType = Containers::Vector< IndexType, Devices::Host, IndexType >;
+      using HostDiagonalsShiftsView = typename HostDiagonalsShiftsType::ViewType;
+
+
+      // TODO: remove this - it is here only for compatibility with original matrix implementation
+      typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector;
+      typedef Containers::VectorView< IndexType, DeviceType, IndexType > CompressedRowLengthsVectorView;
+      typedef typename CompressedRowLengthsVectorView::ConstViewType ConstCompressedRowLengthsVectorView;
+
+      template< typename _Real = Real,
+                typename _Device = Device,
+                typename _Index = Index >
+      using Self = Multidiagonal< _Real, _Device, _Index >;
+
+      static constexpr bool getRowMajorOrder() { return RowMajorOrder; };
+
+      Multidiagonal();
+
+      Multidiagonal( const IndexType rows,
+                     const IndexType columns );
+
+      template< typename Vector >
+      Multidiagonal( const IndexType rows,
+                     const IndexType columns,
+                     const Vector& diagonalsShifts );
+
+      ViewType getView() const; // TODO: remove const
+
+      //ConstViewType getConstView() const;
+
+      static String getSerializationType();
+
+      virtual String getSerializationTypeVirtual() const;
+
+      template< typename Vector >
+      void setDimensions( const IndexType rows,
+                          const IndexType columns,
+                          const Vector&  diagonalsShifts );
+
+      //template< typename Vector >
+      void setCompressedRowLengths( const ConstCompressedRowLengthsVectorView rowCapacities );
+
+      const IndexType& getDiagonalsCount() const;
+
+      const DiagonalsShiftsType& getDiagonalsShifts() const;
+
+      template< typename Vector >
+      void getCompressedRowLengths( Vector& rowLengths ) const;
+
+      IndexType getNonemptyRowsCount() const;
+
+      [[deprecated]]
+      IndexType getRowLength( const IndexType row ) const;
+
+      IndexType getMaxRowLength() const;
+
+      template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ >
+      void setLike( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& m );
+
+      IndexType getNumberOfNonzeroMatrixElements() const;
+
+      void reset();
+
+      template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ >
+      bool operator == ( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix ) const;
+
+      template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ >
+      bool operator != ( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix ) const;
+
+      RowView getRow( const IndexType& rowIdx );
+
+      const RowView getRow( const IndexType& rowIdx ) const;
+
+      void setValue( const RealType& v );
+
+      bool setElement( const IndexType row,
+                       const IndexType column,
+                       const RealType& value );
+
+      bool addElement( const IndexType row,
+                       const IndexType column,
+                       const RealType& value,
+                       const RealType& thisElementMultiplicator = 1.0 );
+
+      RealType getElement( const IndexType row,
+                           const IndexType column ) const;
+
+      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+      void rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
+
+      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+      void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
+
+      template< typename Function >
+      void forRows( IndexType first, IndexType last, Function& function ) const;
+
+      template< typename Function >
+      void forRows( IndexType first, IndexType last, Function& function );
+
+      template< typename Function >
+      void forAllRows( Function& function ) const;
+
+      template< typename Function >
+      void forAllRows( Function& function );
+
+      template< typename Vector >
+      __cuda_callable__
+      typename Vector::RealType rowVectorProduct( const IndexType row,
+                                                  const Vector& vector ) const;
+
+      template< typename InVector,
+                typename OutVector >
+      void vectorProduct( const InVector& inVector,
+                          OutVector& outVector ) const;
+
+      template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ >
+      void addMatrix( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix,
+                      const RealType& matrixMultiplicator = 1.0,
+                      const RealType& thisMatrixMultiplicator = 1.0 );
+
+      template< typename Real2, typename Index2 >
+      void getTransposition( const Multidiagonal< Real2, Device, Index2 >& matrix,
+                             const RealType& matrixMultiplicator = 1.0 );
+
+      template< typename Vector1, typename Vector2 >
+      __cuda_callable__
+      void performSORIteration( const Vector1& b,
+                                const IndexType row,
+                                Vector2& x,
+                                const RealType& omega = 1.0 ) const;
+
+      // copy assignment
+      Multidiagonal& operator=( const Multidiagonal& matrix );
+
+      // cross-device copy assignment
+      template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ >
+      Multidiagonal& operator=( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix );
+
+      void save( File& file ) const;
+
+      void load( File& file );
+
+      void save( const String& fileName ) const;
+
+      void load( const String& fileName );
+
+      void print( std::ostream& str ) const;
+
+      const IndexerType& getIndexer() const;
+
+      IndexerType& getIndexer();
+
+   protected:
+
+      __cuda_callable__
+      IndexType getElementIndex( const IndexType row,
+                                 const IndexType localIdx ) const;
+
+      DiagonalsShiftsType diagonalsShifts;
+
+      HostDiagonalsShiftsType hostDiagonalsShifts;
+
+      IndexerType indexer;
+
+      ViewType view;
+};
+
+} // namespace Matrices
+} // namespace TNL
+
+#include <TNL/Matrices/Multidiagonal.hpp>
diff --git a/src/TNL/Matrices/Multidiagonal.hpp b/src/TNL/Matrices/Multidiagonal.hpp
new file mode 100644
index 000000000..95f6667c1
--- /dev/null
+++ b/src/TNL/Matrices/Multidiagonal.hpp
@@ -0,0 +1,909 @@
+/***************************************************************************
+                          Multidiagonal.hpp  -  description
+                             -------------------
+    begin                : Oct 13, 2011
+    copyright            : (C) 2011 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <sstream>
+#include <TNL/Assert.h>
+#include <TNL/Matrices/Multidiagonal.h>
+#include <TNL/Exceptions/NotImplementedError.h>
+
+namespace TNL {
+namespace Matrices {
+
+template< typename Device >
+class MultidiagonalDeviceDependentCode;
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+Multidiagonal()
+{
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename Vector >
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+Multidiagonal( const IndexType rows,
+               const IndexType columns,
+               const Vector& diagonalsShifts )
+{
+   this->setDimensions( rows, columns, diagonalsShifts );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+auto
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+getView() const -> ViewType
+{
+   // TODO: fix when getConstView works
+   return ViewType( const_cast< Multidiagonal* >( this )->values.getView(),
+                    const_cast< Multidiagonal* >( this )->diagonalsShifts.getView(),
+                    indexer );
+}
+
+/*template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+auto
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+getConstView() const -> ConstViewType
+{
+   return ConstViewType( this->values.getConstView(), indexer );
+}*/
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+String
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+getSerializationType()
+{
+   return String( "Matrices::Multidiagonal< " ) +
+          TNL::getSerializationType< RealType >() + ", [any_device], " +
+          TNL::getSerializationType< IndexType >() + ", " +
+          ( RowMajorOrder ? "true" : "false" ) + ", [any_allocator], [any_allocator] >";
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+String
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+getSerializationTypeVirtual() const
+{
+   return this->getSerializationType();
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename Vector >
+void
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+setDimensions( const IndexType rows,
+               const IndexType columns,
+               const Vector& diagonalsShifts )
+{
+   Matrix< Real, Device, Index >::setDimensions( rows, columns );
+   this->diagonalsShifts = diagonalsShifts;
+   this->hostDiagonalsShifts = diagonalsShifts;
+   const IndexType minShift = min( diagonalsShifts );
+   IndexType nonemptyRows = min( rows, columns );
+   if( rows > columns && minShift < 0 )
+      nonemptyRows = min( rows, nonemptyRows - minShift );
+   this->indexer.set( rows, columns, diagonalsShifts.getSize(), nonemptyRows );
+   this->values.setSize( this->indexer.getStorageSize() );
+   this->values = 0.0;
+   this->view = this->getView();
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+ //  template< typename Vector >
+void
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+setCompressedRowLengths( const ConstCompressedRowLengthsVectorView rowLengths )
+{
+   if( max( rowLengths ) > 3 )
+      throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." );
+   if( rowLengths.getElement( 0 ) > 2 )
+      throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." );
+   const IndexType diagonalLength = min( this->getRows(), this->getColumns() );
+   if( this->getRows() > this->getColumns() )
+      if( rowLengths.getElement( this->getRows()-1 ) > 1 )
+         throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." );
+   if( this->getRows() == this->getColumns() )
+      if( rowLengths.getElement( this->getRows()-1 ) > 2 )
+         throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." );
+   if( this->getRows() < this->getColumns() )
+      if( rowLengths.getElement( this->getRows()-1 ) > 3 )
+         throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+const Index&
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+getDiagonalsCount() const
+{
+   return this->view.getDiagonalsCount();
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+auto
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+getDiagonalsShifts() const -> const DiagonalsShiftsType&
+{
+   return this->diagonalsShifts;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename Vector >
+void
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+getCompressedRowLengths( Vector& rowLengths ) const
+{
+   return this->view.getCompressedRowLengths( rowLengths );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+Index
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+getNonemptyRowsCount() const
+{
+   return this->indexer.getNonemptyRowsCount();
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+Index
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+getRowLength( const IndexType row ) const
+{
+   return this->view.getRowLength( row );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+Index
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+getMaxRowLength() const
+{
+   return this->view.getMaxRowLength();
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ >
+void
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+setLike( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& m )
+{
+   this->setDimensions( m.getRows(), m.getColumns(), m.getDiagonalsShifts() );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+Index
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+getNumberOfNonzeroMatrixElements() const
+{
+   return this->view.getNumberOfNonzeroMatrixElements();
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+void
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+reset()
+{
+   Matrix< Real, Device, Index >::reset();
+   this->values.reset();
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ >
+bool
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+operator == ( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix ) const
+{
+   if( RowMajorOrder == RowMajorOrder_ )
+      return this->values == matrix.values;
+   else
+   {
+      TNL_ASSERT( false, "TODO" );
+   }
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ >
+bool
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+operator != ( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix ) const
+{
+   return ! this->operator==( matrix );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+void
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+setValue( const RealType& v )
+{
+   this->view.setValue( v );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+__cuda_callable__
+auto
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+getRow( const IndexType& rowIdx ) const -> const RowView
+{
+   return this->view.getRow( rowIdx );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+__cuda_callable__
+auto
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+getRow( const IndexType& rowIdx ) -> RowView
+{
+   return this->view.getRow( rowIdx );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+bool
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+setElement( const IndexType row, const IndexType column, const RealType& value )
+{
+   return this->view.setElement( row, column, value );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+bool
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+addElement( const IndexType row,
+            const IndexType column,
+            const RealType& value,
+            const RealType& thisElementMultiplicator )
+{
+   return this->view.addElement( row, column, value, thisElementMultiplicator );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+Real
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+getElement( const IndexType row, const IndexType column ) const
+{
+   return this->view.getElement( row, column );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+void
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const
+{
+   this->view.rowsReduction( first, last, fetch, reduce, keep, zero );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+void
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const
+{
+   this->view.rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename Function >
+void
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+forRows( IndexType first, IndexType last, Function& function ) const
+{
+   this->view.forRows( first, last, function );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+  template< typename Function >
+void
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+forRows( IndexType first, IndexType last, Function& function )
+{
+   this->view.forRows( first, last, function );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename Function >
+void
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+forAllRows( Function& function ) const
+{
+   this->view.forRows( 0, this->getRows(), function );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename Function >
+void
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+forAllRows( Function& function )
+{
+   this->view.forRows( 0, this->getRows(), function );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+template< typename Vector >
+__cuda_callable__
+typename Vector::RealType 
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+rowVectorProduct( const IndexType row, const Vector& vector ) const
+{
+   return this->view.rowVectorProduct();
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename InVector,
+             typename OutVector >
+void 
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+vectorProduct( const InVector& inVector, OutVector& outVector ) const
+{
+   this->view.vectorProduct( inVector, outVector );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ >
+void
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+addMatrix( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix,
+           const RealType& matrixMultiplicator,
+           const RealType& thisMatrixMultiplicator )
+{
+   this->view.addMatrix( matrix.getView(), matrixMultiplicator, thisMatrixMultiplicator );
+}
+
+#ifdef HAVE_CUDA
+template< typename Real,
+          typename Real2,
+          typename Index,
+          typename Index2 >
+__global__ void MultidiagonalTranspositionCudaKernel( const Multidiagonal< Real2, Devices::Cuda, Index2 >* inMatrix,
+                                                             Multidiagonal< Real, Devices::Cuda, Index >* outMatrix,
+                                                             const Real matrixMultiplicator,
+                                                             const Index gridIdx )
+{
+   const Index rowIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
+   if( rowIdx < inMatrix->getRows() )
+   {
+      if( rowIdx > 0 )
+        outMatrix->setElementFast( rowIdx-1,
+                                   rowIdx,
+                                   matrixMultiplicator * inMatrix->getElementFast( rowIdx, rowIdx-1 ) );
+      outMatrix->setElementFast( rowIdx,
+                                 rowIdx,
+                                 matrixMultiplicator * inMatrix->getElementFast( rowIdx, rowIdx ) );
+      if( rowIdx < inMatrix->getRows()-1 )
+         outMatrix->setElementFast( rowIdx+1,
+                                    rowIdx,
+                                    matrixMultiplicator * inMatrix->getElementFast( rowIdx, rowIdx+1 ) );
+   }
+}
+#endif
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename Real2, typename Index2 >
+void Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::getTransposition( const Multidiagonal< Real2, Device, Index2 >& matrix,
+                                                                    const RealType& matrixMultiplicator )
+{
+   TNL_ASSERT( this->getRows() == matrix.getRows(),
+               std::cerr << "This matrix rows: " << this->getRows() << std::endl
+                    << "That matrix rows: " << matrix.getRows() << std::endl );
+   if( std::is_same< Device, Devices::Host >::value )
+   {
+      const IndexType& rows = matrix.getRows();
+      for( IndexType i = 1; i < rows; i++ )
+      {
+         RealType aux = matrix. getElement( i, i - 1 );
+         this->setElement( i, i - 1, matrix.getElement( i - 1, i ) );
+         this->setElement( i, i, matrix.getElement( i, i ) );
+         this->setElement( i - 1, i, aux );
+      }
+   }
+   if( std::is_same< Device, Devices::Cuda >::value )
+   {
+#ifdef HAVE_CUDA
+      Multidiagonal* kernel_this = Cuda::passToDevice( *this );
+      typedef  Multidiagonal< Real2, Device, Index2 > InMatrixType;
+      InMatrixType* kernel_inMatrix = Cuda::passToDevice( matrix );
+      dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() );
+      const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x );
+      const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() );
+      for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
+      {
+         if( gridIdx == cudaGrids - 1 )
+            cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize();
+         MultidiagonalTranspositionCudaKernel<<< cudaGridSize, cudaBlockSize >>>
+                                                    ( kernel_inMatrix,
+                                                      kernel_this,
+                                                      matrixMultiplicator,
+                                                      gridIdx );
+      }
+      Cuda::freeFromDevice( kernel_this );
+      Cuda::freeFromDevice( kernel_inMatrix );
+      TNL_CHECK_CUDA_DEVICE;
+#endif
+   }
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename Vector1, typename Vector2 >
+__cuda_callable__
+void Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::performSORIteration( const Vector1& b,
+                                                              const IndexType row,
+                                                              Vector2& x,
+                                                              const RealType& omega ) const
+{
+   RealType sum( 0.0 );
+   if( row > 0 )
+      sum += this->getElementFast( row, row - 1 ) * x[ row - 1 ];
+   if( row < this->getColumns() - 1 )
+      sum += this->getElementFast( row, row + 1 ) * x[ row + 1 ];
+   x[ row ] = ( 1.0 - omega ) * x[ row ] + omega / this->getElementFast( row, row ) * ( b[ row ] - sum );
+}
+
+
+// copy assignment
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >&
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::operator=( const Multidiagonal& matrix )
+{
+   this->setLike( matrix );
+   this->values = matrix.values;
+   return *this;
+}
+
+// cross-device copy assignment
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ >
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >&
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+operator=( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix )
+{
+   static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Cuda >::value,
+                  "unknown device" );
+   static_assert( std::is_same< Device_, Devices::Host >::value || std::is_same< Device_, Devices::Cuda >::value,
+                  "unknown device" );
+
+   this->setLike( matrix );
+   if( RowMajorOrder == RowMajorOrder_ )
+      this->values = matrix.getValues();
+   else
+   {
+      if( std::is_same< Device, Device_ >::value )
+      {
+         const auto matrix_view = matrix.getView();
+         auto f = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value ) mutable {
+            value = matrix_view.getValues()[ matrix_view.getIndexer().getGlobalIndex( rowIdx, localIdx ) ];
+         };
+         this->forAllRows( f );
+      }
+      else
+      {
+         Multidiagonal< Real, Device, Index, RowMajorOrder_ > auxMatrix;
+         auxMatrix = matrix;
+         const auto matrix_view = auxMatrix.getView();
+         auto f = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value ) mutable {
+            value = matrix_view.getValues()[ matrix_view.getIndexer().getGlobalIndex( rowIdx, localIdx ) ];
+         };
+         this->forAllRows( f );
+      }
+   }
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+void Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::save( File& file ) const
+{
+   Matrix< Real, Device, Index >::save( file );
+   file << diagonalsShifts;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+void Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::load( File& file )
+{
+   Matrix< Real, Device, Index >::load( file );
+   file >> this->diagonalsShifts;
+   this->hostDiagonalsShifts = this->diagonalsShifts;
+   const IndexType minShift = min( diagonalsShifts );
+   IndexType nonemptyRows = min( this->getRows(), this->getColumns() );
+   if( this->getRows() > this->getColumns() && minShift < 0 )
+      nonemptyRows = min( this->getRows(), nonemptyRows - minShift );
+   this->indexer.set( this->getRows(), this->getColumns(), diagonalsShifts.getSize(), nonemptyRows );
+   this->view = this->getView();
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+void Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::save( const String& fileName ) const
+{
+   Object::save( fileName );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+void Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::load( const String& fileName )
+{
+   Object::load( fileName );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+void
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+print( std::ostream& str ) const
+{
+   for( IndexType row = 0; row < this->getRows(); row++ )
+   {
+      str <<"Row: " << row << " -> ";
+      for( IndexType column = row - 1; column < row + 2; column++ )
+         if( column >= 0 && column < this->columns )
+            str << " Col:" << column << "->" << this->getElement( row, column ) << "\t";
+      str << std::endl;
+   }
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+auto
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+getIndexer() const -> const IndexerType&
+{
+   return this->indexer;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+auto
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+getIndexer() -> IndexerType&
+{
+   return this->indexer;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+__cuda_callable__
+Index Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+getElementIndex( const IndexType row, const IndexType column ) const
+{
+   IndexType localIdx = column - row;
+   if( row > 0 )
+      localIdx++;
+
+   TNL_ASSERT_GE( localIdx, 0, "" );
+   TNL_ASSERT_LT( localIdx, 3, "" );
+
+   return this->indexer.getGlobalIndex( row, localIdx );
+}
+
+/*
+template<>
+class MultidiagonalDeviceDependentCode< Devices::Host >
+{
+   public:
+
+      typedef Devices::Host Device;
+
+      template< typename Index >
+      __cuda_callable__
+      static Index getElementIndex( const Index rows,
+                                    const Index row,
+                                    const Index column )
+      {
+         return 2*row + column;
+      }
+
+      template< typename Vector,
+                typename Index,
+                typename ValuesType  >
+      __cuda_callable__
+      static typename Vector::RealType rowVectorProduct( const Index rows,
+                                                         const ValuesType& values,
+                                                         const Index row,
+                                                         const Vector& vector )
+      {
+         if( row == 0 )
+            return vector[ 0 ] * values[ 0 ] +
+                   vector[ 1 ] * values[ 1 ];
+         Index i = 3 * row;
+         if( row == rows - 1 )
+            return vector[ row - 1 ] * values[ i - 1 ] +
+                   vector[ row ] * values[ i ];
+         return vector[ row - 1 ] * values[ i - 1 ] +
+                vector[ row ] * values[ i ] +
+                vector[ row + 1 ] * values[ i + 1 ];
+      }
+
+      template< typename Real,
+                typename Index,
+                typename InVector,
+                typename OutVector >
+      static void vectorProduct( const Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >& matrix,
+                                 const InVector& inVector,
+                                 OutVector& outVector )
+      {
+#ifdef HAVE_OPENMP
+#pragma omp parallel for if( Devices::Host::isOMPEnabled() )
+#endif
+         for( Index row = 0; row < matrix.getRows(); row ++ )
+            outVector[ row ] = matrix.rowVectorProduct( row, inVector );
+      }
+};
+
+template<>
+class MultidiagonalDeviceDependentCode< Devices::Cuda >
+{
+   public:
+
+      typedef Devices::Cuda Device;
+
+      template< typename Index >
+      __cuda_callable__
+      static Index getElementIndex( const Index rows,
+                                    const Index row,
+                                    const Index column )
+      {
+         return ( column - row + 1 )*rows + row - 1;
+      }
+
+      template< typename Vector,
+                typename Index,
+                typename ValuesType >
+      __cuda_callable__
+      static typename Vector::RealType rowVectorProduct( const Index rows,
+                                                         const ValuesType& values,
+                                                         const Index row,
+                                                         const Vector& vector )
+      {
+         if( row == 0 )
+            return vector[ 0 ] * values[ 0 ] +
+                   vector[ 1 ] * values[ rows - 1 ];
+         Index i = row - 1;
+         if( row == rows - 1 )
+            return vector[ row - 1 ] * values[ i ] +
+                   vector[ row ] * values[ i + rows ];
+         return vector[ row - 1 ] * values[ i ] +
+                vector[ row ] * values[ i + rows ] +
+                vector[ row + 1 ] * values[ i + 2*rows ];
+      }
+
+      template< typename Real,
+                typename Index,
+                typename InVector,
+                typename OutVector >
+      static void vectorProduct( const Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >& matrix,
+                                 const InVector& inVector,
+                                 OutVector& outVector )
+      {
+         MatrixVectorProductCuda( matrix, inVector, outVector );
+      }
+};
+ */
+
+} // namespace Matrices
+} // namespace TNL
diff --git a/src/TNL/Matrices/MultidiagonalMatrixRowView.h b/src/TNL/Matrices/MultidiagonalMatrixRowView.h
new file mode 100644
index 000000000..68b5be55c
--- /dev/null
+++ b/src/TNL/Matrices/MultidiagonalMatrixRowView.h
@@ -0,0 +1,59 @@
+/***************************************************************************
+                          MultidiagonalMatrixRowView.h  -  description
+                             -------------------
+    begin                : Jan 11, 2020
+    copyright            : (C) 2020 by oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+namespace TNL {
+namespace Matrices {   
+
+template< typename ValuesView,
+          typename Indexer >
+class MultidiagonalMatrixRowView
+{
+   public:
+
+      using RealType = typename ValuesView::RealType;
+      using IndexType = typename ValuesView::IndexType;
+      using ValuesViewType = ValuesView;
+      using IndexerType = Indexer;
+
+      __cuda_callable__
+      MultidiagonalMatrixRowView( const IndexType rowIdx,
+                                const ValuesViewType& values,
+                                const IndexerType& indexer );
+
+      __cuda_callable__
+      IndexType getSize() const;
+
+      __cuda_callable__
+      const IndexType getColumnIndex( const IndexType localIdx ) const;
+
+      __cuda_callable__
+      const RealType& getValue( const IndexType localIdx ) const;
+
+      __cuda_callable__
+      RealType& getValue( const IndexType localIdx );
+
+      __cuda_callable__
+      void setElement( const IndexType localIdx,
+                       const RealType& value );
+   protected:
+
+      IndexType rowIdx;
+
+      ValuesViewType values;
+
+      Indexer indexer;
+};
+
+} // namespace Matrices
+} // namespace TNL
+
+#include <TNL/Matrices/MultidiagonalMatrixRowView.hpp>
diff --git a/src/TNL/Matrices/MultidiagonalMatrixRowView.hpp b/src/TNL/Matrices/MultidiagonalMatrixRowView.hpp
new file mode 100644
index 000000000..349fbe8ea
--- /dev/null
+++ b/src/TNL/Matrices/MultidiagonalMatrixRowView.hpp
@@ -0,0 +1,75 @@
+/***************************************************************************
+                          MultidiagonalMatrixRowView.hpp  -  description
+                             -------------------
+    begin                : Jan 11, 2020
+    copyright            : (C) 2020 by oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+namespace TNL {
+namespace Matrices {   
+
+template< typename ValuesView, typename Indexer >
+__cuda_callable__
+MultidiagonalMatrixRowView< ValuesView, Indexer >::
+MultidiagonalMatrixRowView( const IndexType rowIdx,
+                          const ValuesViewType& values,
+                          const IndexerType& indexer )
+: rowIdx( rowIdx ), values( values ), indexer( indexer )
+{
+}
+
+template< typename ValuesView, typename Indexer >
+__cuda_callable__
+auto
+MultidiagonalMatrixRowView< ValuesView, Indexer >::
+getSize() const -> IndexType
+{
+   return indexer.getRowSize();
+}
+
+template< typename ValuesView, typename Indexer >
+__cuda_callable__
+auto
+MultidiagonalMatrixRowView< ValuesView, Indexer >::
+getColumnIndex( const IndexType localIdx ) const -> const IndexType
+{
+   TNL_ASSERT_GE( localIdx, 0, "" );
+   TNL_ASSERT_LT( localIdx, 3, "" );
+   return rowIdx + localIdx - 1;
+}
+
+template< typename ValuesView, typename Indexer >
+__cuda_callable__
+auto
+MultidiagonalMatrixRowView< ValuesView, Indexer >::
+getValue( const IndexType localIdx ) const -> const RealType&
+{
+   return this->values[ this->indexer.getGlobalIndex( rowIdx, localIdx ) ];
+}
+
+template< typename ValuesView, typename Indexer >
+__cuda_callable__
+auto
+MultidiagonalMatrixRowView< ValuesView, Indexer >::
+getValue( const IndexType localIdx ) -> RealType&
+{
+   return this->values[ this->indexer.getGlobalIndex( rowIdx, localIdx ) ];
+}
+
+template< typename ValuesView, typename Indexer >
+__cuda_callable__
+void 
+MultidiagonalMatrixRowView< ValuesView, Indexer >::
+setElement( const IndexType localIdx,
+            const RealType& value )
+{
+   this->values[ indexer.getGlobalIndex( rowIdx, localIdx ) ] = value;
+}
+
+} // namespace Matrices
+} // namespace TNL
diff --git a/src/TNL/Matrices/MultidiagonalMatrixView.h b/src/TNL/Matrices/MultidiagonalMatrixView.h
new file mode 100644
index 000000000..addeb18b3
--- /dev/null
+++ b/src/TNL/Matrices/MultidiagonalMatrixView.h
@@ -0,0 +1,181 @@
+/***************************************************************************
+                          MultidiagonalMatrixView.h  -  description
+                             -------------------
+    begin                : Jan 11, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Matrices/MatrixView.h>
+#include <TNL/Containers/Vector.h>
+#include <TNL/Matrices/MultidiagonalMatrixRowView.h>
+#include <TNL/Containers/Segments/Ellpack.h>
+#include <TNL/Matrices/details/MultidiagonalMatrixIndexer.h>
+
+namespace TNL {
+namespace Matrices {
+
+template< typename Real = double,
+          typename Device = Devices::Host,
+          typename Index = int,
+          bool RowMajorOrder = std::is_same< Device, Devices::Host >::value >
+class MultidiagonalMatrixView : public MatrixView< Real, Device, Index >
+{
+   public:
+      using RealType = Real;
+      using DeviceType = Device;
+      using IndexType = Index;
+      using BaseType = MatrixView< Real, Device, Index >;
+      using DiagonalsShiftsType = Containers::Vector< IndexType, DeviceType, IndexType >;
+      using DiagonalsShiftsView = typename DiagonalsShiftsType::ViewType;
+      using HostDiagonalsShiftsType = Containers::Vector< IndexType, Devices::Host, IndexType >;
+      using HostDiagonalsShiftsView = typename DiagonalsShiftsType::ViewType;
+      using IndexerType = details::MultidiagonalMatrixIndexer< IndexType, RowMajorOrder >;
+      using ValuesViewType = typename BaseType::ValuesView;
+      using ViewType = MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >;
+      using ConstViewType = MultidiagonalMatrixView< typename std::add_const< Real >::type, Device, Index, RowMajorOrder >;
+      using RowView = MultidiagonalMatrixRowView< ValuesViewType, IndexerType >;
+
+      // TODO: remove this - it is here only for compatibility with original matrix implementation
+      typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector;
+      typedef Containers::VectorView< IndexType, DeviceType, IndexType > CompressedRowLengthsVectorView;
+      typedef typename CompressedRowLengthsVectorView::ConstViewType ConstCompressedRowLengthsVectorView;
+
+      template< typename _Real = Real,
+                typename _Device = Device,
+                typename _Index = Index,
+                bool RowMajorOrder_ = std::is_same< Device, Devices::Host >::value >
+      using Self = MultidiagonalMatrixView< _Real, _Device, _Index, RowMajorOrder_ >;
+
+      MultidiagonalMatrixView();
+
+      MultidiagonalMatrixView( const ValuesViewType& values,
+                               const DiagonalsShiftsView& diagonalsShifts,
+                               const IndexerType& indexer );
+
+      ViewType getView();
+
+      ConstViewType getConstView() const;
+
+      static String getSerializationType();
+
+      virtual String getSerializationTypeVirtual() const;
+
+      __cuda_callable__
+      const IndexType& getDiagonalsCount() const;
+
+      template< typename Vector >
+      void getCompressedRowLengths( Vector& rowLengths ) const;
+
+      IndexType getNonemptyRowsCount() const;
+
+      [[deprecated]]
+      IndexType getRowLength( const IndexType row ) const;
+
+      IndexType getMaxRowLength() const;
+
+      IndexType getNumberOfNonzeroMatrixElements() const;
+
+      template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ >
+      bool operator == ( const MultidiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix ) const;
+
+      template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ >
+      bool operator != ( const MultidiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix ) const;
+
+      RowView getRow( const IndexType& rowIdx );
+
+      const RowView getRow( const IndexType& rowIdx ) const;
+
+      void setValue( const RealType& v );
+
+      bool setElement( const IndexType row,
+                       const IndexType column,
+                       const RealType& value );
+
+      bool addElement( const IndexType row,
+                       const IndexType column,
+                       const RealType& value,
+                       const RealType& thisElementMultiplicator = 1.0 );
+
+      RealType getElement( const IndexType row,
+                           const IndexType column ) const;
+
+      MultidiagonalMatrixView& operator=( const MultidiagonalMatrixView& view );
+
+      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+      void rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
+
+      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+      void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
+
+      template< typename Function >
+      void forRows( IndexType first, IndexType last, Function& function ) const;
+
+      template< typename Function >
+      void forRows( IndexType first, IndexType last, Function& function );
+
+      template< typename Function >
+      void forAllRows( Function& function ) const;
+
+      template< typename Function >
+      void forAllRows( Function& function );
+
+      template< typename Vector >
+      __cuda_callable__
+      typename Vector::RealType rowVectorProduct( const IndexType row,
+                                                  const Vector& vector ) const;
+
+      template< typename InVector,
+                typename OutVector >
+      void vectorProduct( const InVector& inVector,
+                          OutVector& outVector ) const;
+
+      template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ >
+      void addMatrix( const MultidiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix,
+                      const RealType& matrixMultiplicator = 1.0,
+                      const RealType& thisMatrixMultiplicator = 1.0 );
+
+      template< typename Real2, typename Index2 >
+      void getTransposition( const MultidiagonalMatrixView< Real2, Device, Index2 >& matrix,
+                             const RealType& matrixMultiplicator = 1.0 );
+
+      template< typename Vector1, typename Vector2 >
+      __cuda_callable__
+      void performSORIteration( const Vector1& b,
+                                const IndexType row,
+                                Vector2& x,
+                                const RealType& omega = 1.0 ) const;
+
+      void save( File& file ) const;
+
+      void save( const String& fileName ) const;
+
+      void print( std::ostream& str ) const;
+
+      __cuda_callable__
+      const IndexerType& getIndexer() const;
+
+      __cuda_callable__
+      IndexerType& getIndexer();
+
+   protected:
+
+      __cuda_callable__
+      IndexType getElementIndex( const IndexType row,
+                                 const IndexType localIdx ) const;
+
+      DiagonalsShiftsView diagonalsShifts;
+
+      HostDiagonalsShiftsView hostDiagonalsShifts;
+
+      IndexerType indexer;
+};
+
+} // namespace Matrices
+} // namespace TNL
+
+#include <TNL/Matrices/MultidiagonalMatrixView.hpp>
diff --git a/src/TNL/Matrices/MultidiagonalMatrixView.hpp b/src/TNL/Matrices/MultidiagonalMatrixView.hpp
new file mode 100644
index 000000000..3d9b0237f
--- /dev/null
+++ b/src/TNL/Matrices/MultidiagonalMatrixView.hpp
@@ -0,0 +1,729 @@
+/***************************************************************************
+                          MultidiagonalMatrixView.hpp  -  description
+                             -------------------
+    begin                : Jan 11, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Assert.h>
+#include <TNL/Matrices/MultidiagonalMatrixView.h>
+#include <TNL/Exceptions/NotImplementedError.h>
+
+namespace TNL {
+namespace Matrices {
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+MultidiagonalMatrixView()
+{
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+MultidiagonalMatrixView( const ValuesViewType& values,
+                         const DiagonalsShiftsView& diagonalsShifts,
+                         const IndexerType& indexer )
+: MatrixView< Real, Device, Index >( indexer.getRows(), indexer.getColumns(), values ),
+  diagonalsShifts( diagonalsShifts ),
+  indexer( indexer )
+{
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+auto
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getView() -> ViewType
+{
+   return ViewType( this->values.getView(), indexer );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+auto
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getConstView() const -> ConstViewType
+{
+   return ConstViewType( this->values.getConstView(), indexer );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+String
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getSerializationType()
+{
+   return String( "Matrices::Multidiagonal< " ) +
+          TNL::getSerializationType< RealType >() + ", [any_device], " +
+          TNL::getSerializationType< IndexType >() + ", " +
+          ( RowMajorOrder ? "true" : "false" ) + ", [any_allocator] >";
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+String
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getSerializationTypeVirtual() const
+{
+   return this->getSerializationType();
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+__cuda_callable__
+const Index&
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getDiagonalsCount() const
+{
+   return this->diagonalsShifts.getSize();
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+   template< typename Vector >
+void
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getCompressedRowLengths( Vector& rowLengths ) const
+{
+   rowLengths.setSize( this->getRows() );
+   rowLengths = 0;
+   auto rowLengths_view = rowLengths.getView();
+   auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, const RealType& value ) -> IndexType {
+      return ( value != 0.0 );
+   };
+   auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) {
+      aux += a;
+   };
+   auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable {
+      rowLengths_view[ rowIdx ] = value;
+   };
+   this->allRowsReduction( fetch, reduce, keep, 0 );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+Index
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getNonemptyRowsCount() const
+{
+   return this->indexer.getNonemptyRowsCount();
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+Index
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getRowLength( const IndexType row ) const
+{
+   return this->diagonalsShifts.getSize();
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+Index
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getMaxRowLength() const
+{
+   return this->diagonalsShifts.getSize();
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+Index
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getNumberOfNonzeroMatrixElements() const
+{
+   const auto values_view = this->values.getConstView();
+   auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType {
+      return ( values_view[ i ] != 0.0 );
+   };
+   return Algorithms::Reduction< DeviceType >::reduce( this->values.getSize(), std::plus<>{}, fetch, 0 );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+   template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ >
+bool
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+operator == ( const MultidiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix ) const
+{
+   if( RowMajorOrder == RowMajorOrder_ )
+      return this->values == matrix.values;
+   else
+   {
+      TNL_ASSERT( false, "TODO" );
+   }
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+   template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ >
+bool
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+operator != ( const MultidiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix ) const
+{
+   return ! this->operator==( matrix );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+void
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+setValue( const RealType& v )
+{
+   this->values = v;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+__cuda_callable__
+auto
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getRow( const IndexType& rowIdx ) const -> const RowView
+{
+   return RowView( rowIdx, this->values.getView(), this->indexer );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+__cuda_callable__
+auto
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getRow( const IndexType& rowIdx ) -> RowView
+{
+   return RowView( rowIdx, this->values.getView(), this->indexer );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+bool
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+setElement( const IndexType row, const IndexType column, const RealType& value )
+{
+   TNL_ASSERT_GE( row, 0, "" );
+   TNL_ASSERT_LT( row, this->getRows(), "" );
+   TNL_ASSERT_GE( column, 0, "" );
+   TNL_ASSERT_LT( column, this->getColumns(), "" );
+   if( abs( row - column ) > 1 )
+   {
+      std::stringstream msg;
+      msg << "Wrong matrix element coordinates ( "  << row << ", " << column << " ) in tridiagonal matrix.";
+      throw std::logic_error( msg.str() );
+   }
+   this->values.setElement( this->getElementIndex( row, column ), value );
+   return true;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+bool
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+addElement( const IndexType row,
+            const IndexType column,
+            const RealType& value,
+            const RealType& thisElementMultiplicator )
+{
+   TNL_ASSERT_GE( row, 0, "" );
+   TNL_ASSERT_LT( row, this->getRows(), "" );
+   TNL_ASSERT_GE( column, 0, "" );
+   TNL_ASSERT_LT( column, this->getColumns(), "" );
+   if( abs( row - column ) > 1 )
+   {
+      std::stringstream msg;
+      msg << "Wrong matrix element coordinates ( "  << row << ", " << column << " ) in tridiagonal matrix.";
+      throw std::logic_error( msg.str() );
+   }
+   const Index i = this->getElementIndex( row, column );
+   this->values.setElement( i, thisElementMultiplicator * this->values.getElement( i ) + value );
+   return true;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+Real
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getElement( const IndexType row, const IndexType column ) const
+{
+   TNL_ASSERT_GE( row, 0, "" );
+   TNL_ASSERT_LT( row, this->getRows(), "" );
+   TNL_ASSERT_GE( column, 0, "" );
+   TNL_ASSERT_LT( column, this->getColumns(), "" );
+
+   if( abs( column - row ) > 1 )
+      return 0.0;
+   return this->values.getElement( this->getElementIndex( row, column ) );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >&
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+operator=( const MultidiagonalMatrixView& view )
+{
+   MatrixView< Real, Device, Index >::operator=( view );
+   this->diagonalsShifts.copy( view.diagonalsShifts );
+   this->hostDiagonalsShifts.copy( view.hostDiagonalsShifts );
+   this->indexer = view.indexer;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+   template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+void
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero_ ) const
+{
+   using Real_ = decltype( fetch( IndexType(), IndexType(), RealType() ) );
+   const auto values_view = this->values.getConstView();
+   const auto indexer = this->indexer;
+   const auto zero = zero_;
+   auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
+      Real_ sum( zero );
+      if( rowIdx == 0 )
+      {
+         reduce( sum, fetch( 0, 0, values_view[ indexer.getGlobalIndex( 0, 0 ) ] ) );
+         reduce( sum, fetch( 0, 1, values_view[ indexer.getGlobalIndex( 0, 1 ) ] ) );
+         keep( 0, sum );
+         return;
+      }
+      if( rowIdx + 1 < indexer.getColumns() )
+      {
+         reduce( sum, fetch( rowIdx, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ) );
+         reduce( sum, fetch( rowIdx, rowIdx,     values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] ) );
+         reduce( sum, fetch( rowIdx, rowIdx + 1, values_view[ indexer.getGlobalIndex( rowIdx, 2 ) ] ) );
+         keep( rowIdx, sum );
+         return;
+      }
+      if( rowIdx < indexer.getColumns() )
+      {
+         reduce( sum, fetch( rowIdx, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ) );
+         reduce( sum, fetch( rowIdx, rowIdx,     values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] ) );
+         keep( rowIdx, sum );
+      }
+      else
+      {
+         keep( rowIdx, fetch( rowIdx, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ) );
+      }
+   };
+   Algorithms::ParallelFor< DeviceType >::exec( first, last, f );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+   template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+void
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const
+{
+   this->rowsReduction( 0, this->indexer.getNonEmptyRowsCount(), fetch, reduce, keep, zero );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+   template< typename Function >
+void
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+forRows( IndexType first, IndexType last, Function& function ) const
+{
+   const auto values_view = this->values.getConstView();
+   const auto indexer = this->indexer;
+   auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
+      if( rowIdx == 0 )
+      {
+         function( 0, 0, 0, values_view[ indexer.getGlobalIndex( 0, 0 ) ] );
+         function( 0, 1, 1, values_view[ indexer.getGlobalIndex( 0, 1 ) ] );
+      } 
+      else if( rowIdx + 1 < indexer.getColumns() )
+      {
+         function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] );
+         function( rowIdx, 1, rowIdx,     values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] );
+         function( rowIdx, 2, rowIdx + 1, values_view[ indexer.getGlobalIndex( rowIdx, 2 ) ] );
+      }
+      else if( rowIdx < indexer.getColumns() )
+      {
+         function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] );
+         function( rowIdx, 1, rowIdx,     values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] );
+      }
+      else
+         function( rowIdx, 0, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] );
+   };
+   Algorithms::ParallelFor< DeviceType >::exec( first, last, f );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+  template< typename Function >
+void
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+forRows( IndexType first, IndexType last, Function& function )
+{
+   auto values_view = this->values.getView();
+   const auto indexer = this->indexer;
+   auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
+      if( rowIdx == 0 )
+      {
+         function( 0, 0, 0, values_view[ indexer.getGlobalIndex( 0, 0 ) ] );
+         function( 0, 1, 1, values_view[ indexer.getGlobalIndex( 0, 1 ) ] );
+      } 
+      else if( rowIdx + 1 < indexer.getColumns() )
+      {
+         function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] );
+         function( rowIdx, 1, rowIdx,     values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] );
+         function( rowIdx, 2, rowIdx + 1, values_view[ indexer.getGlobalIndex( rowIdx, 2 ) ] );
+      }
+      else if( rowIdx < indexer.getColumns() )
+      {
+         function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] );
+         function( rowIdx, 1, rowIdx,     values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] );
+      }
+      else
+         function( rowIdx, 0, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] );
+   };
+   Algorithms::ParallelFor< DeviceType >::exec( first, last, f );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+   template< typename Function >
+void
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+forAllRows( Function& function ) const
+{
+   this->forRows( 0, this->indxer.getNonEmptyRowsCount(), function );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+   template< typename Function >
+void
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+forAllRows( Function& function )
+{
+   this->forRows( 0, this->indexer.getNonEmptyRowsCount(), function );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+template< typename Vector >
+__cuda_callable__
+typename Vector::RealType 
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+rowVectorProduct( const IndexType row, const Vector& vector ) const
+{
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+   template< typename InVector,
+             typename OutVector >
+void 
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+vectorProduct( const InVector& inVector, OutVector& outVector ) const
+{
+   TNL_ASSERT_EQ( this->getColumns(), inVector.getSize(), "Matrix columns do not fit with input vector." );
+   TNL_ASSERT_EQ( this->getRows(), outVector.getSize(), "Matrix rows do not fit with output vector." );
+
+   const auto inVectorView = inVector.getConstView();
+   auto outVectorView = outVector.getView();
+   const auto valuesView = this->values.getConstView();
+   auto fetch = [=] __cuda_callable__ ( const IndexType& row, const IndexType& column, const RealType& value ) -> RealType {
+      return value * inVectorView[ column ];
+   };
+   auto reduction = [] __cuda_callable__ ( RealType& sum, const RealType& value ) {
+      sum += value;
+   };
+   auto keeper = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable {
+      outVectorView[ row ] = value;
+   };
+   this->allRowsReduction( fetch, reduction, keeper, ( RealType ) 0.0 );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+   template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ >
+void
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+addMatrix( const MultidiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix,
+           const RealType& matrixMultiplicator,
+           const RealType& thisMatrixMultiplicator )
+{
+   TNL_ASSERT_EQ( this->getRows(), matrix.getRows(), "Matrices rows are not equal." );
+   TNL_ASSERT_EQ( this->getColumns(), matrix.getColumns(), "Matrices columns are not equal." );
+
+   if( RowMajorOrder == RowMajorOrder_ )
+   {
+      if( thisMatrixMultiplicator == 1.0 )
+         this->values += matrixMultiplicator * matrix.getValues();
+      else
+         this->values = thisMatrixMultiplicator * this->values + matrixMultiplicator * matrix.getValues();
+   }
+   else
+   {
+      const auto matrix_view = matrix;
+      const auto matrixMult = matrixMultiplicator;
+      const auto thisMult = thisMatrixMultiplicator;
+      auto add0 = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value ) mutable {
+         value = matrixMult * matrix.getValues()[ matrix.getIndexer().getGlobalIndex( rowIdx, localIdx ) ];
+      };
+      auto add1 = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value ) mutable {
+         value += matrixMult * matrix.getValues()[ matrix.getIndexer().getGlobalIndex( rowIdx, localIdx ) ];
+      };
+      auto addGen = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value ) mutable {
+         value = thisMult * value + matrixMult * matrix.getValues()[ matrix.getIndexer().getGlobalIndex( rowIdx, localIdx ) ];
+      };
+      if( thisMult == 0.0 )
+         this->forAllRows( add0 );
+      else if( thisMult == 1.0 )
+         this->forAllRows( add1 );
+      else
+         this->forAllRows( addGen );
+   }
+}
+
+#ifdef HAVE_CUDA
+/*template< typename Real,
+          typename Real2,
+          typename Index,
+          typename Index2 >
+__global__ void MultidiagonalTranspositionCudaKernel( const Multidiagonal< Real2, Devices::Cuda, Index2 >* inMatrix,
+                                                             Multidiagonal< Real, Devices::Cuda, Index >* outMatrix,
+                                                             const Real matrixMultiplicator,
+                                                             const Index gridIdx )
+{
+   const Index rowIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
+   if( rowIdx < inMatrix->getRows() )
+   {
+      if( rowIdx > 0 )
+        outMatrix->setElementFast( rowIdx-1,
+                                   rowIdx,
+                                   matrixMultiplicator * inMatrix->getElementFast( rowIdx, rowIdx-1 ) );
+      outMatrix->setElementFast( rowIdx,
+                                 rowIdx,
+                                 matrixMultiplicator * inMatrix->getElementFast( rowIdx, rowIdx ) );
+      if( rowIdx < inMatrix->getRows()-1 )
+         outMatrix->setElementFast( rowIdx+1,
+                                    rowIdx,
+                                    matrixMultiplicator * inMatrix->getElementFast( rowIdx, rowIdx+1 ) );
+   }
+}*/
+#endif
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+   template< typename Real2, typename Index2 >
+void
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getTransposition( const MultidiagonalMatrixView< Real2, Device, Index2 >& matrix,
+                  const RealType& matrixMultiplicator )
+{
+   TNL_ASSERT( this->getRows() == matrix.getRows(),
+               std::cerr << "This matrix rows: " << this->getRows() << std::endl
+                    << "That matrix rows: " << matrix.getRows() << std::endl );
+   if( std::is_same< Device, Devices::Host >::value )
+   {
+      const IndexType& rows = matrix.getRows();
+      for( IndexType i = 1; i < rows; i++ )
+      {
+         RealType aux = matrix. getElement( i, i - 1 );
+         this->setElement( i, i - 1, matrix.getElement( i - 1, i ) );
+         this->setElement( i, i, matrix.getElement( i, i ) );
+         this->setElement( i - 1, i, aux );
+      }
+   }
+   if( std::is_same< Device, Devices::Cuda >::value )
+   {
+#ifdef HAVE_CUDA
+      /*Multidiagonal* kernel_this = Cuda::passToDevice( *this );
+      typedef  Multidiagonal< Real2, Device, Index2 > InMatrixType;
+      InMatrixType* kernel_inMatrix = Cuda::passToDevice( matrix );
+      dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() );
+      const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x );
+      const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() );
+      for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
+      {
+         if( gridIdx == cudaGrids - 1 )
+            cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize();
+         MultidiagonalTranspositionCudaKernel<<< cudaGridSize, cudaBlockSize >>>
+                                                    ( kernel_inMatrix,
+                                                      kernel_this,
+                                                      matrixMultiplicator,
+                                                      gridIdx );
+      }
+      Cuda::freeFromDevice( kernel_this );
+      Cuda::freeFromDevice( kernel_inMatrix );
+      TNL_CHECK_CUDA_DEVICE;*/
+#endif
+   }
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+   template< typename Vector1, typename Vector2 >
+__cuda_callable__
+void
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+performSORIteration( const Vector1& b,
+                     const IndexType row,
+                     Vector2& x,
+                     const RealType& omega ) const
+{
+   RealType sum( 0.0 );
+   if( row > 0 )
+      sum += this->getElementFast( row, row - 1 ) * x[ row - 1 ];
+   if( row < this->getColumns() - 1 )
+      sum += this->getElementFast( row, row + 1 ) * x[ row + 1 ];
+   x[ row ] = ( 1.0 - omega ) * x[ row ] + omega / this->getElementFast( row, row ) * ( b[ row ] - sum );
+}
+
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+void MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::save( File& file ) const
+{
+   MatrixView< Real, Device, Index >::save( file );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+void
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+save( const String& fileName ) const
+{
+   Object::save( fileName );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+void MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::print( std::ostream& str ) const
+{
+   for( IndexType row = 0; row < this->getRows(); row++ )
+   {
+      str <<"Row: " << row << " -> ";
+      for( IndexType column = row - 1; column < row + 2; column++ )
+         if( column >= 0 && column < this->columns )
+            str << " Col:" << column << "->" << this->getElement( row, column ) << "\t";
+      str << std::endl;
+   }
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+__cuda_callable__
+auto
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getIndexer() const -> const IndexerType&
+{
+   return this->indexer;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+__cuda_callable__
+auto
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getIndexer() -> IndexerType&
+{
+   return this->indexer;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+__cuda_callable__
+Index MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getElementIndex( const IndexType row, const IndexType column ) const
+{
+   IndexType localIdx = column - row;
+   if( row > 0 )
+      localIdx++;
+
+   TNL_ASSERT_GE( localIdx, 0, "" );
+   TNL_ASSERT_LT( localIdx, 3, "" );
+
+   return this->indexer.getGlobalIndex( row, localIdx );
+}
+
+} // namespace Matrices
+} // namespace TNL
diff --git a/src/TNL/Matrices/TridiagonalMatrixView.h b/src/TNL/Matrices/TridiagonalMatrixView.h
index 290062793..128b48494 100644
--- a/src/TNL/Matrices/TridiagonalMatrixView.h
+++ b/src/TNL/Matrices/TridiagonalMatrixView.h
@@ -59,9 +59,6 @@ class TridiagonalMatrixView : public MatrixView< Real, Device, Index >
 
       virtual String getSerializationTypeVirtual() const;
 
-      void setDimensions( const IndexType rows,
-                          const IndexType columns );
-
       template< typename Vector >
       void getCompressedRowLengths( Vector& rowLengths ) const;
 
diff --git a/src/TNL/Matrices/details/MultidiagonalMatrixIndexer.h b/src/TNL/Matrices/details/MultidiagonalMatrixIndexer.h
new file mode 100644
index 000000000..0f0436d74
--- /dev/null
+++ b/src/TNL/Matrices/details/MultidiagonalMatrixIndexer.h
@@ -0,0 +1,106 @@
+/***************************************************************************
+                          MultidiagonalMatrixIndexer.h  -  description
+                             -------------------
+    begin                : Jan 11, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+namespace TNL {
+   namespace Matrices {
+      namespace details {
+
+template< typename Index,
+          bool RowMajorOrder >
+class MultidiagonalMatrixIndexer
+{
+   public:
+
+      using IndexType = Index;
+
+      static constexpr bool getRowMajorOrder() { return RowMajorOrder; };
+
+      __cuda_callable__
+      MultidiagonalMatrixIndexer()
+      : rows( 0 ), columns( 0 ), nonemptyRows( 0 ){};
+
+      __cuda_callable__
+      MultidiagonalMatrixIndexer( const IndexType& rows,
+                                  const IndexType& columns,
+                                  const IndexType& diagonals,
+                                  const IndexType& nonemptyRows )
+      : rows( rows ), 
+        columns( columns ),
+        diagonals( diagonals ),
+        nonemptyRows( nonemptyRows ) {};
+
+      __cuda_callable__
+      MultidiagonalMatrixIndexer( const MultidiagonalMatrixIndexer& indexer )
+      : rows( indexer.rows ),
+        columns( indexer.columns ),
+        diagonals( indexer.diagonals ),
+        nonemptyRows( indexer.nonemptyRows ) {};
+
+      void set( const IndexType& rows,
+                const IndexType& columns,
+                const IndexType& diagonals,
+                const IndexType& nonemptyRows )
+      {
+         this->rows = rows;
+         this->columns = columns;
+         this->diagonals = diagonals;
+         this->nonemptyRows = nonemptyRows;
+      };
+
+      /*__cuda_callable__
+      IndexType getRowSize( const IndexType rowIdx ) const
+      {
+         if( rowIdx == 0 )
+            return 2;
+         if( columns <= rows )
+         {
+            if( rowIdx == columns - 1 )
+               return 2;
+            if( rowIdx == columns )
+               return 1;
+         }
+         return 3;
+      };*/
+
+      __cuda_callable__
+      const IndexType& getRows() const { return this->rows; };
+
+      __cuda_callable__
+      const IndexType& getColumns() const { return this->columns; };
+
+      __cuda_callable__
+      const IndexType& getNonemptyRowsCount() const { return this->nonemptyRows; };
+
+      __cuda_callable__
+      IndexType getStorageSize() const { return diagonals * this->nonemptyRows; };
+
+      __cuda_callable__
+      IndexType getGlobalIndex( const Index rowIdx, const Index localIdx ) const
+      {
+         TNL_ASSERT_GE( localIdx, 0, "" );
+         TNL_ASSERT_LT( localIdx, diagonals, "" );
+         TNL_ASSERT_GE( rowIdx, 0, "" );
+         TNL_ASSERT_LT( rowIdx, this->rows, "" );
+         
+         if( RowMajorOrder )
+            return diagonals * rowIdx + localIdx;
+         else
+            return localIdx * nonemptyRows + rowIdx;
+      };
+
+      protected:
+
+         IndexType rows, columns, diagonals, nonemptyRows;
+};
+      } //namespace details
+   } // namespace Materices
+} // namespace TNL
diff --git a/src/UnitTests/Matrices/CMakeLists.txt b/src/UnitTests/Matrices/CMakeLists.txt
index 4b95380c4..287495405 100644
--- a/src/UnitTests/Matrices/CMakeLists.txt
+++ b/src/UnitTests/Matrices/CMakeLists.txt
@@ -13,8 +13,8 @@ IF( BUILD_CUDA )
    CUDA_ADD_EXECUTABLE( TridiagonalMatrixTest TridiagonalMatrixTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
    TARGET_LINK_LIBRARIES( TridiagonalMatrixTest ${GTEST_BOTH_LIBRARIES} )
 
-#   CUDA_ADD_EXECUTABLE( MultidiagonalMatrixTest MultidiagonalMatrixTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
-#   TARGET_LINK_LIBRARIES( MultidiagonalMatrixTest ${GTEST_BOTH_LIBRARIES} )
+   CUDA_ADD_EXECUTABLE( MultidiagonalMatrixTest MultidiagonalMatrixTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( MultidiagonalMatrixTest ${GTEST_BOTH_LIBRARIES} )
 
    CUDA_ADD_EXECUTABLE( SparseMatrixTest_CSR_segments SparseMatrixTest_CSR_segments.cu OPTIONS ${CXX_TESTS_FLAGS} )
    TARGET_LINK_LIBRARIES( SparseMatrixTest_CSR_segments ${GTEST_BOTH_LIBRARIES} )
@@ -42,9 +42,9 @@ ELSE(  BUILD_CUDA )
    TARGET_COMPILE_OPTIONS( TridiagonalMatrixTest PRIVATE ${CXX_TESTS_FLAGS} )
    TARGET_LINK_LIBRARIES( TridiagonalMatrixTest ${GTEST_BOTH_LIBRARIES} )
 
-#   ADD_EXECUTABLE( MultidiagonalMatrixTest MultidiagonalMatrixTest.cpp )
-#   TARGET_COMPILE_OPTIONS( MultidiagonalMatrixTest PRIVATE ${CXX_TESTS_FLAGS} )
-#   TARGET_LINK_LIBRARIES( MultidiagonalMatrixTest ${GTEST_BOTH_LIBRARIES} )
+   ADD_EXECUTABLE( MultidiagonalMatrixTest MultidiagonalMatrixTest.cpp )
+   TARGET_COMPILE_OPTIONS( MultidiagonalMatrixTest PRIVATE ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( MultidiagonalMatrixTest ${GTEST_BOTH_LIBRARIES} )
 
    ADD_EXECUTABLE( SparseMatrixTest_CSR_segments SparseMatrixTest_CSR_segments.cpp )
    TARGET_COMPILE_OPTIONS( SparseMatrixTest_CSR_segments PRIVATE ${CXX_TESTS_FLAGS} )
@@ -65,7 +65,7 @@ ADD_TEST( SparseMatrixCopyTest ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixCopyTest${C
 ADD_TEST( SparseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( DenseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/DenseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( TridiagonalMatrixTest ${EXECUTABLE_OUTPUT_PATH}/TridiagonalMatrixTest${CMAKE_EXECUTABLE_SUFFIX} )
-#ADD_TEST( MultidiagonalMatrixTest ${EXECUTABLE_OUTPUT_PATH}/MultidiagonalMatrixTest${CMAKE_EXECUTABLE_SUFFIX} )
+ADD_TEST( MultidiagonalMatrixTest ${EXECUTABLE_OUTPUT_PATH}/MultidiagonalMatrixTest${CMAKE_EXECUTABLE_SUFFIX} )
 
 ADD_TEST( SparseMatrixTest_CSR_segments ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_CSR_segments${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( SparseMatrixTest_Ellpack_segments ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_Ellpack_segments${CMAKE_EXECUTABLE_SUFFIX} )
diff --git a/src/UnitTests/Matrices/MultidiagonalMatrixTest.cpp b/src/UnitTests/Matrices/MultidiagonalMatrixTest.cpp
index 73406d0df..639f19640 100644
--- a/src/UnitTests/Matrices/MultidiagonalMatrixTest.cpp
+++ b/src/UnitTests/Matrices/MultidiagonalMatrixTest.cpp
@@ -1,7 +1,7 @@
 /***************************************************************************
                           MultidiagonalMatrixTest.cpp -  description
                              -------------------
-    begin                : Jan 9, 2020
+    begin                : Jan 8, 2020
     copyright            : (C) 2020 by Tomas Oberhuber et al.
     email                : tomas.oberhuber@fjfi.cvut.cz
  ***************************************************************************/
diff --git a/src/UnitTests/Matrices/MultidiagonalMatrixTest.cu b/src/UnitTests/Matrices/MultidiagonalMatrixTest.cu
index e3dab545c..53541edbd 100644
--- a/src/UnitTests/Matrices/MultidiagonalMatrixTest.cu
+++ b/src/UnitTests/Matrices/MultidiagonalMatrixTest.cu
@@ -1,7 +1,7 @@
 /***************************************************************************
                           MultidiagonalMatrixTest.cu -  description
                              -------------------
-    begin                : Jan 9, 2020
+    begin                : Jan 8, 2020
     copyright            : (C) 2020 by Tomas Oberhuber et al.
     email                : tomas.oberhuber@fjfi.cvut.cz
  ***************************************************************************/
diff --git a/src/UnitTests/Matrices/MultidiagonalMatrixTest.h b/src/UnitTests/Matrices/MultidiagonalMatrixTest.h
index abe6b64c5..cb9916e4c 100644
--- a/src/UnitTests/Matrices/MultidiagonalMatrixTest.h
+++ b/src/UnitTests/Matrices/MultidiagonalMatrixTest.h
@@ -8,6 +8,7 @@
 
 /* See Copyright Notice in tnl/Copyright */
 
+#include <sstream>
 #include <TNL/Devices/Host.h>
 #include <TNL/Matrices/Matrix.h>
 #include <TNL/Matrices/Multidiagonal.h>
@@ -33,55 +34,110 @@ static const char* TEST_FILE_NAME = "test_MultidiagonalMatrixTest.tnl";
 
 void test_GetSerializationType()
 {
-   EXPECT_EQ( ( TNL::Matrices::Multidiagonal< float, TNL::Devices::Host, int, true >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< float, [any_device], int, true, [any_allocator] >" ) );
-   EXPECT_EQ( ( TNL::Matrices::Multidiagonal< int,   TNL::Devices::Host, int, true >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< int, [any_device], int, true, [any_allocator] >" ) );
-   EXPECT_EQ( ( TNL::Matrices::Multidiagonal< float, TNL::Devices::Cuda, int, true >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< float, [any_device], int, true, [any_allocator] >" ) );
-   EXPECT_EQ( ( TNL::Matrices::Multidiagonal< int,   TNL::Devices::Cuda, int, true >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< int, [any_device], int, true, [any_allocator] >" ) );
-   EXPECT_EQ( ( TNL::Matrices::Multidiagonal< float, TNL::Devices::Host, int, false >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< float, [any_device], int, false, [any_allocator] >" ) );
-   EXPECT_EQ( ( TNL::Matrices::Multidiagonal< int,   TNL::Devices::Host, int, false >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< int, [any_device], int, false, [any_allocator] >" ) );
-   EXPECT_EQ( ( TNL::Matrices::Multidiagonal< float, TNL::Devices::Cuda, int, false >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< float, [any_device], int, false, [any_allocator] >" ) );
-   EXPECT_EQ( ( TNL::Matrices::Multidiagonal< int,   TNL::Devices::Cuda, int, false >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< int, [any_device], int, false, [any_allocator] >" ) );
+   EXPECT_EQ( ( TNL::Matrices::Multidiagonal< float, TNL::Devices::Host, int, true >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< float, [any_device], int, true, [any_allocator], [any_allocator] >" ) );
+   EXPECT_EQ( ( TNL::Matrices::Multidiagonal< int,   TNL::Devices::Host, int, true >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< int, [any_device], int, true, [any_allocator], [any_allocator] >" ) );
+   EXPECT_EQ( ( TNL::Matrices::Multidiagonal< float, TNL::Devices::Cuda, int, true >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< float, [any_device], int, true, [any_allocator], [any_allocator] >" ) );
+   EXPECT_EQ( ( TNL::Matrices::Multidiagonal< int,   TNL::Devices::Cuda, int, true >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< int, [any_device], int, true, [any_allocator], [any_allocator] >" ) );
+   EXPECT_EQ( ( TNL::Matrices::Multidiagonal< float, TNL::Devices::Host, int, false >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< float, [any_device], int, false, [any_allocator], [any_allocator] >" ) );
+   EXPECT_EQ( ( TNL::Matrices::Multidiagonal< int,   TNL::Devices::Host, int, false >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< int, [any_device], int, false, [any_allocator], [any_allocator] >" ) );
+   EXPECT_EQ( ( TNL::Matrices::Multidiagonal< float, TNL::Devices::Cuda, int, false >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< float, [any_device], int, false, [any_allocator], [any_allocator] >" ) );
+   EXPECT_EQ( ( TNL::Matrices::Multidiagonal< int,   TNL::Devices::Cuda, int, false >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< int, [any_device], int, false, [any_allocator], [any_allocator] >" ) );
 }
 
 template< typename Matrix >
 void test_SetDimensions()
 {
-    using RealType = typename Matrix::RealType;
-    using DeviceType = typename Matrix::DeviceType;
-    using IndexType = typename Matrix::IndexType;
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+   using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType;
 
-    const IndexType rows = 9;
-    const IndexType cols = 8;
+   const IndexType rows = 9;
+   const IndexType cols = 8;
+   const DiagonalsShiftsType diagonalsShifts{ -3, -1, 0, 2, 4 };
 
-    Matrix m;
-    m.setDimensions( rows, cols );
+   Matrix m;
+   m.setDimensions( rows, cols, diagonalsShifts );
 
-    EXPECT_EQ( m.getRows(), 9 );
-    EXPECT_EQ( m.getColumns(), 8 );
+   EXPECT_EQ( m.getRows(), 9 );
+   EXPECT_EQ( m.getColumns(), 8 );
 }
 
+
 template< typename Matrix1, typename Matrix2 >
 void test_SetLike()
 {
-    using RealType = typename Matrix1::RealType;
-    using DeviceType = typename Matrix1::DeviceType;
-    using IndexType = typename Matrix1::IndexType;
+   using RealType = typename Matrix1::RealType;
+   using DeviceType = typename Matrix1::DeviceType;
+   using IndexType = typename Matrix1::IndexType;
+   using DiagonalsShiftsType = typename Matrix1::DiagonalsShiftsType;
 
-    const IndexType rows = 8;
-    const IndexType cols = 7;
+   const IndexType rows = 8;
+   const IndexType cols = 7;
+   const DiagonalsShiftsType diagonalsShifts{ -3, -1, 0, 2, 4 };
 
-    Matrix1 m1;
-    m1.reset();
-    m1.setDimensions( rows + 1, cols + 2 );
+   Matrix1 m1;
+   m1.setDimensions( rows + 1, cols + 2, diagonalsShifts );
 
-    Matrix2 m2;
-    m2.reset();
-    m2.setDimensions( rows, cols );
+   Matrix2 m2;
+   m2.setDimensions( rows, cols, diagonalsShifts );
+
+   m1.setLike( m2 );
+
+   EXPECT_EQ( m1.getRows(), m2.getRows() );
+   EXPECT_EQ( m1.getColumns(), m2.getColumns() );
+}
+
+template< typename Matrix >
+void test_GetNonemptyRowsCount()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+   using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType;
+
+   /*
+    * Sets up the following 5x8 matrix:
+    *
+    *    /  1  0  0  1  0  1  0  0 \
+    *    |  0  1  0  0  1  0  1  0 |
+    *    |  1  0  1  0  0  1  0  1 |
+    *    |  0  1  0  1  0  0  1  0 |
+    *    \  0  0  1  0  1  0  0  1 /
+    */
+   Matrix m1( 5, 8, DiagonalsShiftsType({ -2, 0, 3, 5 }) );
+   m1.setValue( 1.0 );
+   EXPECT_EQ( m1.getNonemptyRowsCount(), 5 );
+
+   /*
+    * Sets up the following 5x5 matrix:
+    *
+    *    /  1  0  0  1  0  \
+    *    |  0  1  0  0  1  |
+    *    |  1  0  1  0  0  |
+    *    |  0  1  0  1  0  |
+    *    \  0  0  1  0  1  /
+    */
+   Matrix m2( 5, 5, DiagonalsShiftsType({ -2, 0, 3, 5 }) );
+   m2.setValue( 1.0 );
+   EXPECT_EQ( m2.getNonemptyRowsCount(), 5 );
 
-    m1.setLike( m2 );
+   /*
+    * Sets up the following 8x5 matrix:
+    *
+    *    /  1  0  0  1  0  \
+    *    |  0  1  0  0  1  |
+    *    |  1  0  1  0  0  |
+    *    |  0  1  0  1  0  |
+    *    |  0  0  1  0  1  |
+    *    |  0  0  0  1  0  |
+    *    |  0  0  0  0  1  |
+    *    \  0  0  0  0  0  /
+    */
+   Matrix m3( 8, 5, DiagonalsShiftsType({ -2, 0, 3, 5 }) );
+   m3.setValue( 1.0 );
+   EXPECT_EQ( m3.getNonemptyRowsCount(), 7 );
 
-    EXPECT_EQ( m1.getRows(), m2.getRows() );
-    EXPECT_EQ( m1.getColumns(), m2.getColumns() );
 }
 
 template< typename Matrix >
@@ -90,463 +146,470 @@ void test_GetCompressedRowLengths()
    using RealType = typename Matrix::RealType;
    using DeviceType = typename Matrix::DeviceType;
    using IndexType = typename Matrix::IndexType;
+   using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType;
 
    const IndexType rows = 10;
    const IndexType cols = 11;
 
-    Matrix m( rows, cols );
+   Matrix m( rows, cols );
 
-    // Insert values into the rows.
-    RealType value = 1;
+   // Insert values into the rows.
+   RealType value = 1;
 
-    for( IndexType i = 0; i < 3; i++ )      // 0th row
-        m.setElement( 0, i, value++ );
+   for( IndexType i = 0; i < 2; i++ )  // 0th row -> 2 elements
+      m.setElement( 0, i, value++ );
 
-    for( IndexType i = 0; i < 3; i++ )      // 1st row
-        m.setElement( 1, i, value++ );
+   for( IndexType i = 0; i < 3; i++ )  // 1st row -> 3 elements
+      m.setElement( 1, i, value++ );
 
-    for( IndexType i = 0; i < 1; i++ )      // 2nd row
-        m.setElement( 2, i, value++ );
+   for( IndexType i = 1; i < 3; i++ )  // 2nd row -> 2 elements
+      m.setElement( 2, i, value++ );
 
-    for( IndexType i = 0; i < 2; i++ )      // 3rd row
-        m.setElement( 3, i, value++ );
+   for( IndexType i = 2; i < 5; i++ )  // 3rd row -> 3 elements
+      m.setElement( 3, i, value++ );
 
-    for( IndexType i = 0; i < 3; i++ )      // 4th row
-        m.setElement( 4, i, value++ );
+   for( IndexType i = 3; i < 6; i++ )  // 4th row -> 3 elements
+      m.setElement( 4, i, value++ );
 
-    for( IndexType i = 0; i < 4; i++ )      // 5th row
-        m.setElement( 5, i, value++ );
+   for( IndexType i = 4; i < 6; i++ )  // 5th row -> 2 elements
+      m.setElement( 5, i, value++ );
 
-    for( IndexType i = 0; i < 5; i++ )      // 6th row
-        m.setElement( 6, i, value++ );
+   for( IndexType i = 5; i < 8; i++ )  // 6th row -> 3 elements
+      m.setElement( 6, i, value++ );
 
-    for( IndexType i = 0; i < 6; i++ )      // 7th row
-        m.setElement( 7, i, value++ );
+   for( IndexType i = 6; i < 8; i++ )  // 7th row -> 2 elements
+      m.setElement( 7, i, value++ );
 
-    for( IndexType i = 0; i < 7; i++ )      // 8th row
-        m.setElement( 8, i, value++ );
+   for( IndexType i = 7; i < 10; i++ ) // 8th row -> 3 elements
+      m.setElement( 8, i, value++ );
 
-    for( IndexType i = 0; i < 8; i++ )      // 9th row
-        m.setElement( 9, i, value++ );
+   for( IndexType i = 8; i < 11; i++ ) // 9th row -> 3 elements
+      m.setElement( 9, i, value++ );
 
-   typename Matrix::CompressedRowLengthsVector rowLengths;
+   typename Matrix::CompressedRowLengthsVector rowLengths( rows );
    rowLengths = 0;
    m.getCompressedRowLengths( rowLengths );
-   typename Matrix::CompressedRowLengthsVector correctRowLengths{ 3, 3, 1, 2, 3, 4, 5, 6, 7, 8 };
+   typename Matrix::CompressedRowLengthsVector correctRowLengths{ 2, 3, 2, 3, 3, 2, 3, 2, 3, 3 };
    EXPECT_EQ( rowLengths, correctRowLengths );
 }
 
 template< typename Matrix >
 void test_GetRowLength()
 {
-    using RealType = typename Matrix::RealType;
-    using DeviceType = typename Matrix::DeviceType;
-    using IndexType = typename Matrix::IndexType;
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+   using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType;
 
-    const IndexType rows = 8;
-    const IndexType cols = 7;
+   const IndexType rows = 8;
+   const IndexType cols = 7;
 
-    Matrix m;
-    m.reset();
-    m.setDimensions( rows, cols );
+   Matrix m( rows, cols );
 
-    EXPECT_EQ( m.getRowLength( 0 ), 7 );
-    EXPECT_EQ( m.getRowLength( 1 ), 7 );
-    EXPECT_EQ( m.getRowLength( 2 ), 7 );
-    EXPECT_EQ( m.getRowLength( 3 ), 7 );
-    EXPECT_EQ( m.getRowLength( 4 ), 7 );
-    EXPECT_EQ( m.getRowLength( 5 ), 7 );
-    EXPECT_EQ( m.getRowLength( 6 ), 7 );
-    EXPECT_EQ( m.getRowLength( 7 ), 7 );
+   EXPECT_EQ( m.getRowLength( 0 ), 2 );
+   EXPECT_EQ( m.getRowLength( 1 ), 3 );
+   EXPECT_EQ( m.getRowLength( 2 ), 3 );
+   EXPECT_EQ( m.getRowLength( 3 ), 3 );
+   EXPECT_EQ( m.getRowLength( 4 ), 3 );
+   EXPECT_EQ( m.getRowLength( 5 ), 3 );
+   EXPECT_EQ( m.getRowLength( 6 ), 2 );
+   EXPECT_EQ( m.getRowLength( 7 ), 1 );
 }
 
 template< typename Matrix >
-void test_GetNumberOfMatrixElements()
+void test_GetAllocatedElementsCount()
 {
-    using RealType = typename Matrix::RealType;
-    using DeviceType = typename Matrix::DeviceType;
-    using IndexType = typename Matrix::IndexType;
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
 
-    const IndexType rows = 7;
-    const IndexType cols = 6;
+   const IndexType rows = 7;
+   const IndexType cols = 6;
 
-    Matrix m;
-    m.reset();
-    m.setDimensions( rows, cols );
+   Matrix m( rows, cols );
 
-    EXPECT_EQ( m.getNumberOfMatrixElements(), 42 );
+   EXPECT_EQ( m.getAllocatedElementsCount(), 21 );
 }
 
 template< typename Matrix >
 void test_GetNumberOfNonzeroMatrixElements()
 {
-    using RealType = typename Matrix::RealType;
-    using DeviceType = typename Matrix::DeviceType;
-    using IndexType = typename Matrix::IndexType;
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
 
-/*
- * Sets up the following 7x6 dense matrix:
- *
- *    /  0  2  3  4  5  6 \
- *    |  7  8  9 10 11 12 |
- *    | 13 14 15 16 17 18 |
- *    | 19 20 21 22 23 24 |
- *    | 25 26 27 28 29 30 |
- *    | 31 32 33 34 35 36 |
- *    \ 37 38 39 40 41  0 /
- */
-    const IndexType rows = 7;
-    const IndexType cols = 6;
+   /*
+    * Sets up the following 7x6 matrix:
+    *
+    *    /  0  1  0  0  0  0 \
+    *    |  2  3  4  0  0  0 |
+    *    |  0  5  6  7  0  0 |
+    *    |  0  0  8  9 10  0 |
+    *    |  0  0  0 11 12 13 |
+    *    |  0  0  0  0 14  0 |
+    *    \  0  0  0  0  0 16 /
+    */
+   const IndexType rows = 7;
+   const IndexType cols = 6;
 
-    Matrix m;
-    m.reset();
-    m.setDimensions( rows, cols );
+   Matrix m( rows, cols );
 
-    RealType value = 1;
-    for( IndexType i = 0; i < rows; i++ )
-        for( IndexType j = 0; j < cols; j++ )
-            m.setElement( i, j, value++ );
+   RealType value = 0;
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = TNL::max( 0, i - 1 ); j < TNL::min( cols, i + 2 ); j++ )
+         m.setElement( i, j, value++ );
 
-    m.setElement( 0, 0, 0); // Set the first element of the diagonal to 0.
-    m.setElement( 6, 5, 0); // Set the last element of the diagonal to 0.
+   m.setElement( 5, 5, 0);
 
-    EXPECT_EQ( m.getNumberOfNonzeroMatrixElements(), 40 );
+   EXPECT_EQ( m.getNumberOfNonzeroMatrixElements(), 15 );
 }
 
 template< typename Matrix >
 void test_Reset()
 {
-    using RealType = typename Matrix::RealType;
-    using DeviceType = typename Matrix::DeviceType;
-    using IndexType = typename Matrix::IndexType;
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
 
-/*
- * Sets up the following 5x4 dense matrix:
- *
- *    /  0  0  0  0 \
- *    |  0  0  0  0 |
- *    |  0  0  0  0 |
- *    |  0  0  0  0 |
- *    \  0  0  0  0 /
- */
-    const IndexType rows = 5;
-    const IndexType cols = 4;
+   /*
+    * Sets up the following 5x4 matrix:
+    *
+    *    /  0  0  0  0 \
+    *    |  0  0  0  0 |
+    *    |  0  0  0  0 |
+    *    |  0  0  0  0 |
+    *    \  0  0  0  0 /
+    */
+   const IndexType rows = 5;
+   const IndexType cols = 4;
 
-    Matrix m;
-    m.setDimensions( rows, cols );
+   Matrix m( rows, cols );
 
-    m.reset();
+   m.reset();
 
-    EXPECT_EQ( m.getRows(), 0 );
-    EXPECT_EQ( m.getColumns(), 0 );
+   EXPECT_EQ( m.getRows(), 0 );
+   EXPECT_EQ( m.getColumns(), 0 );
 }
 
 template< typename Matrix >
 void test_SetValue()
 {
-    using RealType = typename Matrix::RealType;
-    using DeviceType = typename Matrix::DeviceType;
-    using IndexType = typename Matrix::IndexType;
-/*
- * Sets up the following 7x6 dense matrix:
- *
- *    /  1  2  3  4  5  6 \
- *    |  7  8  9 10 11 12 |
- *    | 13 14 15 16 17 18 |
- *    | 19 20 21 22 23 24 |
- *    | 25 26 27 28 29 30 |
- *    | 31 32 33 34 35 36 |
- *    \ 37 38 39 40 41 42 /
- */
-    const IndexType rows = 7;
-    const IndexType cols = 6;
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
 
-    Matrix m;
-    m.reset();
-    m.setDimensions( rows, cols );
+   /*
+    * Sets up the following 7x6 matrix:
+    *
+    *    /  0  1  0  0  0  0 \
+    *    |  2  3  4  0  0  0 |
+    *    |  0  5  6  7  0  0 |
+    *    |  0  0  8  9 10  0 |
+    *    |  0  0  0 11 12 13 |
+    *    |  0  0  0  0 14  0 |
+    *    \  0  0  0  0  0 16 /
+    */
+   const IndexType rows = 7;
+   const IndexType cols = 6;
 
-    RealType value = 1;
-    for( IndexType i = 0; i < rows; i++ )
-        for( IndexType j = 0; j < cols; j++ )
-            m.setElement( i, j, value++ );
+   Matrix m( rows, cols );
 
-    EXPECT_EQ( m.getElement( 0, 0 ),  1 );
-    EXPECT_EQ( m.getElement( 0, 1 ),  2 );
-    EXPECT_EQ( m.getElement( 0, 2 ),  3 );
-    EXPECT_EQ( m.getElement( 0, 3 ),  4 );
-    EXPECT_EQ( m.getElement( 0, 4 ),  5 );
-    EXPECT_EQ( m.getElement( 0, 5 ),  6 );
-
-    EXPECT_EQ( m.getElement( 1, 0 ),  7 );
-    EXPECT_EQ( m.getElement( 1, 1 ),  8 );
-    EXPECT_EQ( m.getElement( 1, 2 ),  9 );
-    EXPECT_EQ( m.getElement( 1, 3 ), 10 );
-    EXPECT_EQ( m.getElement( 1, 4 ), 11 );
-    EXPECT_EQ( m.getElement( 1, 5 ), 12 );
-
-    EXPECT_EQ( m.getElement( 2, 0 ), 13 );
-    EXPECT_EQ( m.getElement( 2, 1 ), 14 );
-    EXPECT_EQ( m.getElement( 2, 2 ), 15 );
-    EXPECT_EQ( m.getElement( 2, 3 ), 16 );
-    EXPECT_EQ( m.getElement( 2, 4 ), 17 );
-    EXPECT_EQ( m.getElement( 2, 5 ), 18 );
-
-    EXPECT_EQ( m.getElement( 3, 0 ), 19 );
-    EXPECT_EQ( m.getElement( 3, 1 ), 20 );
-    EXPECT_EQ( m.getElement( 3, 2 ), 21 );
-    EXPECT_EQ( m.getElement( 3, 3 ), 22 );
-    EXPECT_EQ( m.getElement( 3, 4 ), 23 );
-    EXPECT_EQ( m.getElement( 3, 5 ), 24 );
-
-    EXPECT_EQ( m.getElement( 4, 0 ), 25 );
-    EXPECT_EQ( m.getElement( 4, 1 ), 26 );
-    EXPECT_EQ( m.getElement( 4, 2 ), 27 );
-    EXPECT_EQ( m.getElement( 4, 3 ), 28 );
-    EXPECT_EQ( m.getElement( 4, 4 ), 29 );
-    EXPECT_EQ( m.getElement( 4, 5 ), 30 );
-
-    EXPECT_EQ( m.getElement( 5, 0 ), 31 );
-    EXPECT_EQ( m.getElement( 5, 1 ), 32 );
-    EXPECT_EQ( m.getElement( 5, 2 ), 33 );
-    EXPECT_EQ( m.getElement( 5, 3 ), 34 );
-    EXPECT_EQ( m.getElement( 5, 4 ), 35 );
-    EXPECT_EQ( m.getElement( 5, 5 ), 36 );
-
-    EXPECT_EQ( m.getElement( 6, 0 ), 37 );
-    EXPECT_EQ( m.getElement( 6, 1 ), 38 );
-    EXPECT_EQ( m.getElement( 6, 2 ), 39 );
-    EXPECT_EQ( m.getElement( 6, 3 ), 40 );
-    EXPECT_EQ( m.getElement( 6, 4 ), 41 );
-    EXPECT_EQ( m.getElement( 6, 5 ), 42 );
-
-    // Set the values of all elements to a certain number
-    m.setValue( 42 );
-
-    EXPECT_EQ( m.getElement( 0, 0 ), 42 );
-    EXPECT_EQ( m.getElement( 0, 1 ), 42 );
-    EXPECT_EQ( m.getElement( 0, 2 ), 42 );
-    EXPECT_EQ( m.getElement( 0, 3 ), 42 );
-    EXPECT_EQ( m.getElement( 0, 4 ), 42 );
-    EXPECT_EQ( m.getElement( 0, 5 ), 42 );
-
-    EXPECT_EQ( m.getElement( 1, 0 ), 42 );
-    EXPECT_EQ( m.getElement( 1, 1 ), 42 );
-    EXPECT_EQ( m.getElement( 1, 2 ), 42 );
-    EXPECT_EQ( m.getElement( 1, 3 ), 42 );
-    EXPECT_EQ( m.getElement( 1, 4 ), 42 );
-    EXPECT_EQ( m.getElement( 1, 5 ), 42 );
-
-    EXPECT_EQ( m.getElement( 2, 0 ), 42 );
-    EXPECT_EQ( m.getElement( 2, 1 ), 42 );
-    EXPECT_EQ( m.getElement( 2, 2 ), 42 );
-    EXPECT_EQ( m.getElement( 2, 3 ), 42 );
-    EXPECT_EQ( m.getElement( 2, 4 ), 42 );
-    EXPECT_EQ( m.getElement( 2, 5 ), 42 );
-
-    EXPECT_EQ( m.getElement( 3, 0 ), 42 );
-    EXPECT_EQ( m.getElement( 3, 1 ), 42 );
-    EXPECT_EQ( m.getElement( 3, 2 ), 42 );
-    EXPECT_EQ( m.getElement( 3, 3 ), 42 );
-    EXPECT_EQ( m.getElement( 3, 4 ), 42 );
-    EXPECT_EQ( m.getElement( 3, 5 ), 42 );
-
-    EXPECT_EQ( m.getElement( 4, 0 ), 42 );
-    EXPECT_EQ( m.getElement( 4, 1 ), 42 );
-    EXPECT_EQ( m.getElement( 4, 2 ), 42 );
-    EXPECT_EQ( m.getElement( 4, 3 ), 42 );
-    EXPECT_EQ( m.getElement( 4, 4 ), 42 );
-    EXPECT_EQ( m.getElement( 4, 5 ), 42 );
-
-    EXPECT_EQ( m.getElement( 5, 0 ), 42 );
-    EXPECT_EQ( m.getElement( 5, 1 ), 42 );
-    EXPECT_EQ( m.getElement( 5, 2 ), 42 );
-    EXPECT_EQ( m.getElement( 5, 3 ), 42 );
-    EXPECT_EQ( m.getElement( 5, 4 ), 42 );
-    EXPECT_EQ( m.getElement( 5, 5 ), 42 );
-
-    EXPECT_EQ( m.getElement( 6, 0 ), 42 );
-    EXPECT_EQ( m.getElement( 6, 1 ), 42 );
-    EXPECT_EQ( m.getElement( 6, 2 ), 42 );
-    EXPECT_EQ( m.getElement( 6, 3 ), 42 );
-    EXPECT_EQ( m.getElement( 6, 4 ), 42 );
-    EXPECT_EQ( m.getElement( 6, 5 ), 42 );
+   RealType value = 0;
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = TNL::max( 0, i - 1 ); j < TNL::min( cols, i + 2 ); j++ )
+         m.setElement( i, j, value++ );
+
+   m.setElement( 5, 5, 0);
+
+   EXPECT_EQ( m.getElement( 0, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 1 ),  1 );
+   EXPECT_EQ( m.getElement( 0, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 1, 0 ),  2 );
+   EXPECT_EQ( m.getElement( 1, 1 ),  3 );
+   EXPECT_EQ( m.getElement( 1, 2 ),  4 );
+   EXPECT_EQ( m.getElement( 1, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 1 ),  5 );
+   EXPECT_EQ( m.getElement( 2, 2 ),  6 );
+   EXPECT_EQ( m.getElement( 2, 3 ),  7 );
+   EXPECT_EQ( m.getElement( 2, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 3, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 2 ),  8 );
+   EXPECT_EQ( m.getElement( 3, 3 ),  9 );
+   EXPECT_EQ( m.getElement( 3, 4 ), 10 );
+   EXPECT_EQ( m.getElement( 3, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 4, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 3 ), 11 );
+   EXPECT_EQ( m.getElement( 4, 4 ), 12 );
+   EXPECT_EQ( m.getElement( 4, 5 ), 13 );
+
+   EXPECT_EQ( m.getElement( 5, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 4 ), 14 );
+   EXPECT_EQ( m.getElement( 5, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 6, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 5 ), 16 );
+
+   // Set the values of all elements to a certain number
+   m.setValue( 42 );
+
+   EXPECT_EQ( m.getElement( 0, 0 ), 42 );
+   EXPECT_EQ( m.getElement( 0, 1 ), 42 );
+   EXPECT_EQ( m.getElement( 0, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 1, 0 ), 42 );
+   EXPECT_EQ( m.getElement( 1, 1 ), 42 );
+   EXPECT_EQ( m.getElement( 1, 2 ), 42 );
+   EXPECT_EQ( m.getElement( 1, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 1 ), 42 );
+   EXPECT_EQ( m.getElement( 2, 2 ), 42 );
+   EXPECT_EQ( m.getElement( 2, 3 ), 42 );
+   EXPECT_EQ( m.getElement( 2, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 3, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 2 ), 42 );
+   EXPECT_EQ( m.getElement( 3, 3 ), 42 );
+   EXPECT_EQ( m.getElement( 3, 4 ), 42 );
+   EXPECT_EQ( m.getElement( 3, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 4, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 3 ), 42 );
+   EXPECT_EQ( m.getElement( 4, 4 ), 42 );
+   EXPECT_EQ( m.getElement( 4, 5 ), 42 );
+
+   EXPECT_EQ( m.getElement( 5, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 4 ), 42 );
+   EXPECT_EQ( m.getElement( 5, 5 ), 42 );
+
+   EXPECT_EQ( m.getElement( 6, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 5 ), 42 );
 }
 
 template< typename Matrix >
 void test_SetElement()
 {
-    using RealType = typename Matrix::RealType;
-    using DeviceType = typename Matrix::DeviceType;
-    using IndexType = typename Matrix::IndexType;
-/*
- * Sets up the following 5x5 dense matrix:
- *
- *    /  1  2  3  4  5 \
- *    |  6  7  8  9 10 |
- *    | 11 12 13 14 15 |
- *    | 16 17 18 19 20 |
- *    \ 21 22 23 24 25 /
- */
-    const IndexType rows = 5;
-    const IndexType cols = 5;
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
 
-    Matrix m;
-    m.reset();
-    m.setDimensions( rows, cols );
+   /*
+    * Sets up the following 5x5 matrix:
+    *
+    *    /  1  2  0  0  0 \
+    *    |  6  7  8  0  0 |
+    *    |  0 12 13 14  0 |
+    *    |  0  0 18 19 20 |
+    *    \  0  0  0 24 25 /
+    */
+   const IndexType rows = 5;
+   const IndexType cols = 5;
 
-    RealType value = 1;
-    for( IndexType i = 0; i < rows; i++ )
-        for( IndexType j = 0; j < cols; j++ )
+   Matrix m( rows, cols );
+
+   RealType value = 1;
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < cols; j++ )
+      {
+         if( abs( i - j ) > 1 )
+         {
+            EXPECT_THROW( m.setElement( i, j, value++ ), std::logic_error );
+         }
+         else
             m.setElement( i, j, value++ );
+      }
+
+   EXPECT_EQ( m.getElement( 0, 0 ),  1 );
+   EXPECT_EQ( m.getElement( 0, 1 ),  2 );
+   EXPECT_EQ( m.getElement( 0, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 4 ),  0 );
+
+   EXPECT_EQ( m.getElement( 1, 0 ),  6 );
+   EXPECT_EQ( m.getElement( 1, 1 ),  7 );
+   EXPECT_EQ( m.getElement( 1, 2 ),  8 );
+   EXPECT_EQ( m.getElement( 1, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 4 ),  0 );
 
-    EXPECT_EQ( m.getElement( 0, 0 ),  1 );
-    EXPECT_EQ( m.getElement( 0, 1 ),  2 );
-    EXPECT_EQ( m.getElement( 0, 2 ),  3 );
-    EXPECT_EQ( m.getElement( 0, 3 ),  4 );
-    EXPECT_EQ( m.getElement( 0, 4 ),  5 );
-
-    EXPECT_EQ( m.getElement( 1, 0 ),  6 );
-    EXPECT_EQ( m.getElement( 1, 1 ),  7 );
-    EXPECT_EQ( m.getElement( 1, 2 ),  8 );
-    EXPECT_EQ( m.getElement( 1, 3 ),  9 );
-    EXPECT_EQ( m.getElement( 1, 4 ), 10 );
-
-    EXPECT_EQ( m.getElement( 2, 0 ), 11 );
-    EXPECT_EQ( m.getElement( 2, 1 ), 12 );
-    EXPECT_EQ( m.getElement( 2, 2 ), 13 );
-    EXPECT_EQ( m.getElement( 2, 3 ), 14 );
-    EXPECT_EQ( m.getElement( 2, 4 ), 15 );
-
-    EXPECT_EQ( m.getElement( 3, 0 ), 16 );
-    EXPECT_EQ( m.getElement( 3, 1 ), 17 );
-    EXPECT_EQ( m.getElement( 3, 2 ), 18 );
-    EXPECT_EQ( m.getElement( 3, 3 ), 19 );
-    EXPECT_EQ( m.getElement( 3, 4 ), 20 );
-
-    EXPECT_EQ( m.getElement( 4, 0 ), 21 );
-    EXPECT_EQ( m.getElement( 4, 1 ), 22 );
-    EXPECT_EQ( m.getElement( 4, 2 ), 23 );
-    EXPECT_EQ( m.getElement( 4, 3 ), 24 );
-    EXPECT_EQ( m.getElement( 4, 4 ), 25 );
+   EXPECT_EQ( m.getElement( 2, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 1 ), 12 );
+   EXPECT_EQ( m.getElement( 2, 2 ), 13 );
+   EXPECT_EQ( m.getElement( 2, 3 ), 14 );
+   EXPECT_EQ( m.getElement( 2, 4 ),  0 );
+
+   EXPECT_EQ( m.getElement( 3, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 2 ), 18 );
+   EXPECT_EQ( m.getElement( 3, 3 ), 19 );
+   EXPECT_EQ( m.getElement( 3, 4 ), 20 );
+
+   EXPECT_EQ( m.getElement( 4, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 3 ), 24 );
+   EXPECT_EQ( m.getElement( 4, 4 ), 25 );
 }
 
 template< typename Matrix >
 void test_AddElement()
 {
-    using RealType = typename Matrix::RealType;
-    using DeviceType = typename Matrix::DeviceType;
-    using IndexType = typename Matrix::IndexType;
-/*
- * Sets up the following 6x5 dense matrix:
- *
- *    /  1  2  3  4  5 \
- *    |  6  7  8  9 10 |
- *    | 11 12 13 14 15 |
- *    | 16 17 18 19 20 |
- *    | 21 22 23 24 25 |
- *    \ 26 27 28 29 30 /
- */
-    const IndexType rows = 6;
-    const IndexType cols = 5;
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
 
-    Matrix m;
-    m.reset();
-    m.setDimensions( rows, cols );
+   /*
+    * Sets up the following 6x5 matrix:
+    *
+    *    /  1  2  0  0  0 \
+    *    |  6  7  8  0  0 |
+    *    |  0 12 13 14  0 |
+    *    |  0  0 18 19 20 |
+    *    |  0  0  0 24 25 |
+    *    \  0  0  0  0 30 /
+    */
+
+   const IndexType rows = 6;
+   const IndexType cols = 5;
+
+   Matrix m( rows, cols );
 
     RealType value = 1;
     for( IndexType i = 0; i < rows; i++ )
         for( IndexType j = 0; j < cols; j++ )
-            m.setElement( i, j, value++ );
+        {
+           if( abs( i - j ) <= 1 )
+               m.setElement( i, j, value );
+           value++;
+        }
 
-    // Check the added elements
-    EXPECT_EQ( m.getElement( 0, 0 ),  1 );
-    EXPECT_EQ( m.getElement( 0, 1 ),  2 );
-    EXPECT_EQ( m.getElement( 0, 2 ),  3 );
-    EXPECT_EQ( m.getElement( 0, 3 ),  4 );
-    EXPECT_EQ( m.getElement( 0, 4 ),  5 );
-
-    EXPECT_EQ( m.getElement( 1, 0 ),  6 );
-    EXPECT_EQ( m.getElement( 1, 1 ),  7 );
-    EXPECT_EQ( m.getElement( 1, 2 ),  8 );
-    EXPECT_EQ( m.getElement( 1, 3 ),  9 );
-    EXPECT_EQ( m.getElement( 1, 4 ), 10 );
-
-    EXPECT_EQ( m.getElement( 2, 0 ), 11 );
-    EXPECT_EQ( m.getElement( 2, 1 ), 12 );
-    EXPECT_EQ( m.getElement( 2, 2 ), 13 );
-    EXPECT_EQ( m.getElement( 2, 3 ), 14 );
-    EXPECT_EQ( m.getElement( 2, 4 ), 15 );
-
-    EXPECT_EQ( m.getElement( 3, 0 ), 16 );
-    EXPECT_EQ( m.getElement( 3, 1 ), 17 );
-    EXPECT_EQ( m.getElement( 3, 2 ), 18 );
-    EXPECT_EQ( m.getElement( 3, 3 ), 19 );
-    EXPECT_EQ( m.getElement( 3, 4 ), 20 );
-
-    EXPECT_EQ( m.getElement( 4, 0 ), 21 );
-    EXPECT_EQ( m.getElement( 4, 1 ), 22 );
-    EXPECT_EQ( m.getElement( 4, 2 ), 23 );
-    EXPECT_EQ( m.getElement( 4, 3 ), 24 );
-    EXPECT_EQ( m.getElement( 4, 4 ), 25 );
-
-    EXPECT_EQ( m.getElement( 5, 0 ), 26 );
-    EXPECT_EQ( m.getElement( 5, 1 ), 27 );
-    EXPECT_EQ( m.getElement( 5, 2 ), 28 );
-    EXPECT_EQ( m.getElement( 5, 3 ), 29 );
-    EXPECT_EQ( m.getElement( 5, 4 ), 30 );
-
-    // Add new elements to the old elements with a multiplying factor applied to the old elements.
-/*
- * The following setup results in the following 6x5 dense matrix:
- *
- *    /  3  6  9 12 15 \
- *    | 18 21 24 27 30 |
- *    | 33 36 39 42 45 |
- *    | 48 51 54 57 60 |
- *    | 63 66 69 72 75 |
- *    \ 78 81 84 87 90 /
- */
-    RealType newValue = 1;
-    RealType multiplicator = 2;
-    for( IndexType i = 0; i < rows; i++ )
-        for( IndexType j = 0; j < cols; j++ )
+   // Check the added elements
+   EXPECT_EQ( m.getElement( 0, 0 ),  1 );
+   EXPECT_EQ( m.getElement( 0, 1 ),  2 );
+   EXPECT_EQ( m.getElement( 0, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 4 ),  0 );
+
+   EXPECT_EQ( m.getElement( 1, 0 ),  6 );
+   EXPECT_EQ( m.getElement( 1, 1 ),  7 );
+   EXPECT_EQ( m.getElement( 1, 2 ),  8 );
+   EXPECT_EQ( m.getElement( 1, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 4 ),  0 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 1 ), 12 );
+   EXPECT_EQ( m.getElement( 2, 2 ), 13 );
+   EXPECT_EQ( m.getElement( 2, 3 ), 14 );
+   EXPECT_EQ( m.getElement( 2, 4 ),  0 );
+
+   EXPECT_EQ( m.getElement( 3, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 2 ), 18 );
+   EXPECT_EQ( m.getElement( 3, 3 ), 19 );
+   EXPECT_EQ( m.getElement( 3, 4 ), 20 );
+
+   EXPECT_EQ( m.getElement( 4, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 3 ), 24 );
+   EXPECT_EQ( m.getElement( 4, 4 ), 25 );
+
+   EXPECT_EQ( m.getElement( 5, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 4 ), 30 );
+
+   // Add new elements to the old elements with a multiplying factor applied to the old elements.
+   /*
+    * The following setup results in the following 6x5 matrix:
+    *
+    *     /  1  2  0  0  0 \    /  1  2  0  0  0 \   /  3  6  0  0  0 \
+    *     |  6  7  8  0  0 |    |  3  4  5  0  0 |   | 15 18 21  0  0 |
+    * 2 * |  0 12 13 14  0 |  + |  0  6  7  8  0 | = |  0 30 33 36  0 |
+    *     |  0  0 18 19 20 |    |  0  0  9 10 11 |   |  0  0 45 48 51 |
+    *     |  0  0  0 24 25 |    |  0  0  0 12 13 |   |  0  0  0 60 63 |
+    *     \  0  0  0  0 30 /    \  0  0  0  0 14 /   \  0  0  0  0 74 /
+    */
+
+   RealType newValue = 1;
+   RealType multiplicator = 2;
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < cols; j++ )
+         if( abs( i - j ) <= 1 )
             m.addElement( i, j, newValue++, multiplicator );
 
-    EXPECT_EQ( m.getElement( 0, 0 ),  3 );
-    EXPECT_EQ( m.getElement( 0, 1 ),  6 );
-    EXPECT_EQ( m.getElement( 0, 2 ),  9 );
-    EXPECT_EQ( m.getElement( 0, 3 ), 12 );
-    EXPECT_EQ( m.getElement( 0, 4 ), 15 );
-
-    EXPECT_EQ( m.getElement( 1, 0 ), 18 );
-    EXPECT_EQ( m.getElement( 1, 1 ), 21 );
-    EXPECT_EQ( m.getElement( 1, 2 ), 24 );
-    EXPECT_EQ( m.getElement( 1, 3 ), 27 );
-    EXPECT_EQ( m.getElement( 1, 4 ), 30 );
-
-    EXPECT_EQ( m.getElement( 2, 0 ), 33 );
-    EXPECT_EQ( m.getElement( 2, 1 ), 36 );
-    EXPECT_EQ( m.getElement( 2, 2 ), 39 );
-    EXPECT_EQ( m.getElement( 2, 3 ), 42 );
-    EXPECT_EQ( m.getElement( 2, 4 ), 45 );
-
-    EXPECT_EQ( m.getElement( 3, 0 ), 48 );
-    EXPECT_EQ( m.getElement( 3, 1 ), 51 );
-    EXPECT_EQ( m.getElement( 3, 2 ), 54 );
-    EXPECT_EQ( m.getElement( 3, 3 ), 57 );
-    EXPECT_EQ( m.getElement( 3, 4 ), 60 );
-
-    EXPECT_EQ( m.getElement( 4, 0 ), 63 );
-    EXPECT_EQ( m.getElement( 4, 1 ), 66 );
-    EXPECT_EQ( m.getElement( 4, 2 ), 69 );
-    EXPECT_EQ( m.getElement( 4, 3 ), 72 );
-    EXPECT_EQ( m.getElement( 4, 4 ), 75 );
-
-    EXPECT_EQ( m.getElement( 5, 0 ), 78 );
-    EXPECT_EQ( m.getElement( 5, 1 ), 81 );
-    EXPECT_EQ( m.getElement( 5, 2 ), 84 );
-    EXPECT_EQ( m.getElement( 5, 3 ), 87 );
-    EXPECT_EQ( m.getElement( 5, 4 ), 90 );
+   EXPECT_EQ( m.getElement( 0, 0 ),  3 );
+   EXPECT_EQ( m.getElement( 0, 1 ),  6 );
+   EXPECT_EQ( m.getElement( 0, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 4 ),  0 );
+
+   EXPECT_EQ( m.getElement( 1, 0 ), 15 );
+   EXPECT_EQ( m.getElement( 1, 1 ), 18 );
+   EXPECT_EQ( m.getElement( 1, 2 ), 21 );
+   EXPECT_EQ( m.getElement( 1, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 4 ),  0 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 1 ), 30 );
+   EXPECT_EQ( m.getElement( 2, 2 ), 33 );
+   EXPECT_EQ( m.getElement( 2, 3 ), 36 );
+   EXPECT_EQ( m.getElement( 2, 4 ),  0 );
+
+   EXPECT_EQ( m.getElement( 3, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 2 ), 45  );
+   EXPECT_EQ( m.getElement( 3, 3 ), 48 );
+   EXPECT_EQ( m.getElement( 3, 4 ), 51 );
+
+   EXPECT_EQ( m.getElement( 4, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 3 ), 60 );
+   EXPECT_EQ( m.getElement( 4, 4 ), 63 );
+
+   EXPECT_EQ( m.getElement( 5, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 4 ), 74 );
 }
 
 template< typename Matrix >
@@ -557,63 +620,56 @@ void test_SetRow()
    using IndexType = typename Matrix::IndexType;
 
    /*
-    * Sets up the following 3x7 dense matrix:
+    * Sets up the following 3x7 matrix:
     *
-    *    /  1  2  3  4  5  6  7 \
-    *    |  8  9 10 11 12 13 14 |
-    *    \ 15 16 17 18 19 20 21 /
+    *    /  1  2  0  0  0  0  0 \
+    *    |  8  9 10  0  0  0  0 |
+    *    \  0 16 17 18  0  0  0 /
     */
    const IndexType rows = 3;
    const IndexType cols = 7;
 
-   Matrix m;
-   m.reset();
-   m.setDimensions( rows, cols );
-
-   RealType value = 1;
-   for( IndexType i = 0; i < rows; i++ )
-      for( IndexType j = 0; j < cols; j++ )
-         m.setElement( i, j, value++ );
+   Matrix m( rows, cols );
 
    auto matrix_view = m.getView();
    auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
-      RealType values[ 3 ][ 5 ] {
-         { 11, 11, 11, 11, 11 },
-         { 22, 22, 22, 22, 22 },
-         { 33, 33, 33, 33, 33 } };
-      IndexType columnIndexes[ 3 ][ 5 ] {
-         { 0, 1, 2, 3, 4 },
-         { 0, 1, 2, 3, 4 },
-         { 2, 3, 4, 5, 6 } };
+      RealType values[ 3 ][ 3 ] {
+         {  1,  2,  0 },
+         {  8,  9, 10 },
+         { 16, 17, 18 } };
       auto row = matrix_view.getRow( rowIdx );
-      for( IndexType i = 0; i < 5; i++ )
-        row.setElement( i, values[ rowIdx ][ i ] );
+      for( IndexType i = 0; i < 3; i++ )
+      {
+         if( rowIdx == 0 && i > 1 )
+            break;
+         row.setElement( i, values[ rowIdx ][ i ] );
+      }
    };
    TNL::Algorithms::ParallelFor< DeviceType >::exec( 0, 3, f );
 
-   EXPECT_EQ( m.getElement( 0, 0 ), 11 );
-   EXPECT_EQ( m.getElement( 0, 1 ), 11 );
-   EXPECT_EQ( m.getElement( 0, 2 ), 11 );
-   EXPECT_EQ( m.getElement( 0, 3 ), 11 );
-   EXPECT_EQ( m.getElement( 0, 4 ), 11 );
-   EXPECT_EQ( m.getElement( 0, 5 ),  6 );
-   EXPECT_EQ( m.getElement( 0, 6 ),  7 );
-
-   EXPECT_EQ( m.getElement( 1, 0 ), 22 );
-   EXPECT_EQ( m.getElement( 1, 1 ), 22 );
-   EXPECT_EQ( m.getElement( 1, 2 ), 22 );
-   EXPECT_EQ( m.getElement( 1, 3 ), 22 );
-   EXPECT_EQ( m.getElement( 1, 4 ), 22 );
-   EXPECT_EQ( m.getElement( 1, 5 ), 13 );
-   EXPECT_EQ( m.getElement( 1, 6 ), 14 );
-
-   EXPECT_EQ( m.getElement( 2, 0 ), 15 );
+   EXPECT_EQ( m.getElement( 0, 0 ),  1 );
+   EXPECT_EQ( m.getElement( 0, 1 ),  2 );
+   EXPECT_EQ( m.getElement( 0, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 6 ),  0 );
+
+   EXPECT_EQ( m.getElement( 1, 0 ),  8 );
+   EXPECT_EQ( m.getElement( 1, 1 ),  9 );
+   EXPECT_EQ( m.getElement( 1, 2 ), 10 );
+   EXPECT_EQ( m.getElement( 1, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 6 ),  0 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ),  0 );
    EXPECT_EQ( m.getElement( 2, 1 ), 16 );
-   EXPECT_EQ( m.getElement( 2, 2 ), 33 );
-   EXPECT_EQ( m.getElement( 2, 3 ), 33 );
-   EXPECT_EQ( m.getElement( 2, 4 ), 33 );
-   EXPECT_EQ( m.getElement( 2, 5 ), 33 );
-   EXPECT_EQ( m.getElement( 2, 6 ), 33 );
+   EXPECT_EQ( m.getElement( 2, 2 ), 17 );
+   EXPECT_EQ( m.getElement( 2, 3 ), 18 );
+   EXPECT_EQ( m.getElement( 2, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 6 ),  0 );
 }
 
 template< typename Matrix >
@@ -623,14 +679,14 @@ void test_AddRow()
    using DeviceType = typename Matrix::DeviceType;
    using IndexType = typename Matrix::IndexType;
    /*
-    * Sets up the following 6x5 dense matrix:
+    * Sets up the following 6x5 matrix:
     *
-    *    /  1  2  3  4  5 \
-    *    |  6  7  8  9 10 |
-    *    | 11 12 13 14 15 |
-    *    | 16 17 18 19 20 |
-    *    | 21 22 23 24 25 |
-    *    \ 26 27 28 29 30 /
+    *    /  1  2  0  0  0 \
+    *    |  6  7  8  0  0 |
+    *    |  0 12 13 14  0 |
+    *    |  0  0 18 19 20 |
+    *    |  0  0  0 24 25 |
+    *    \  0  0  0  0 30 /
     */
 
    const IndexType rows = 6;
@@ -641,68 +697,72 @@ void test_AddRow()
    RealType value = 1;
    for( IndexType i = 0; i < rows; i++ )
       for( IndexType j = 0; j < cols; j++ )
-         m.setElement( i, j, value++ );
+      {
+         if( abs( i - j ) <= 1 )
+            m.setElement( i, j, value );
+         value++;
+      }
 
    // Check the added elements
    EXPECT_EQ( m.getElement( 0, 0 ),  1 );
    EXPECT_EQ( m.getElement( 0, 1 ),  2 );
-   EXPECT_EQ( m.getElement( 0, 2 ),  3 );
-   EXPECT_EQ( m.getElement( 0, 3 ),  4 );
-   EXPECT_EQ( m.getElement( 0, 4 ),  5 );
+   EXPECT_EQ( m.getElement( 0, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 4 ),  0 );
 
    EXPECT_EQ( m.getElement( 1, 0 ),  6 );
    EXPECT_EQ( m.getElement( 1, 1 ),  7 );
    EXPECT_EQ( m.getElement( 1, 2 ),  8 );
-   EXPECT_EQ( m.getElement( 1, 3 ),  9 );
-   EXPECT_EQ( m.getElement( 1, 4 ), 10 );
+   EXPECT_EQ( m.getElement( 1, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 4 ),  0 );
 
-   EXPECT_EQ( m.getElement( 2, 0 ), 11 );
+   EXPECT_EQ( m.getElement( 2, 0 ),  0 );
    EXPECT_EQ( m.getElement( 2, 1 ), 12 );
    EXPECT_EQ( m.getElement( 2, 2 ), 13 );
    EXPECT_EQ( m.getElement( 2, 3 ), 14 );
-   EXPECT_EQ( m.getElement( 2, 4 ), 15 );
+   EXPECT_EQ( m.getElement( 2, 4 ),  0 );
 
-   EXPECT_EQ( m.getElement( 3, 0 ), 16 );
-   EXPECT_EQ( m.getElement( 3, 1 ), 17 );
+   EXPECT_EQ( m.getElement( 3, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 1 ),  0 );
    EXPECT_EQ( m.getElement( 3, 2 ), 18 );
    EXPECT_EQ( m.getElement( 3, 3 ), 19 );
    EXPECT_EQ( m.getElement( 3, 4 ), 20 );
 
-   EXPECT_EQ( m.getElement( 4, 0 ), 21 );
-   EXPECT_EQ( m.getElement( 4, 1 ), 22 );
-   EXPECT_EQ( m.getElement( 4, 2 ), 23 );
+   EXPECT_EQ( m.getElement( 4, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 2 ),  0 );
    EXPECT_EQ( m.getElement( 4, 3 ), 24 );
    EXPECT_EQ( m.getElement( 4, 4 ), 25 );
 
-   EXPECT_EQ( m.getElement( 5, 0 ), 26 );
-   EXPECT_EQ( m.getElement( 5, 1 ), 27 );
-   EXPECT_EQ( m.getElement( 5, 2 ), 28 );
-   EXPECT_EQ( m.getElement( 5, 3 ), 29 );
+   EXPECT_EQ( m.getElement( 5, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 3 ),  0 );
    EXPECT_EQ( m.getElement( 5, 4 ), 30 );
 
    // Add new elements to the old elements with a multiplying factor applied to the old elements.
    /*
     * The following setup results in the following 6x5 sparse matrix:
     *
-    *    /  3  6  9 12 15 \
-    *    | 18 21 24 27 30 |
-    *    | 33 36 39 42 45 |
-    *    | 48 51 54 57 60 |
-    *    | 63 66 69 72 75 |
-    *    \ 78 81 84 87 90 /
+    *  / 0  0  0  0  0  0 \   /  1  2  0  0  0 \   / 11 11  0  0  0 \   / 11  11  0   0   0 \
+    *  | 0  1  0  0  0  0 |   |  6  7  8  0  0 |   | 22 22 22  0  0 |   | 28  29 30   0   0 |
+    *  | 0  0  2  0  0  0 | * |  0 12 13 14  0 | + |  0 33 33 33  0 | = |  0  57 59  61   0 |
+    *  | 0  0  0  3  0  0 |   |  0  0 18 19 20 |   |  0  0 44 44 44 |   |  0   0 98 101 104 |
+    *  | 0  0  0  0  4  0 |   |  0  0  0 24 25 |   |  0  0  0 55 55 |   |  0   0  0 151 155 |
+    *  \ 0  0  0  0  0  5 /   \  0  0  0  0 30 /   \  0  0  0  0 66 /   \  0   0  0   0 216 /
     */
 
    auto matrix_view = m.getView();
    auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
-      RealType values[ 6 ][ 5 ] {
-         { 11, 11, 11, 11, 0 },
-         { 22, 22, 22, 22, 0 },
-         { 33, 33, 33, 33, 0 },
-         { 44, 44, 44, 44, 0 },
-         { 55, 55, 55, 55, 0 },
-         { 66, 66, 66, 66, 0 } };
+      RealType values[ 6 ][ 3 ] {
+         { 11, 11,  0 },
+         { 22, 22, 22 },
+         { 33, 33, 33 },
+         { 44, 44, 44 },
+         { 55, 55, 55 },
+         { 66, 66, 66 } };
       auto row = matrix_view.getRow( rowIdx );
-      for( IndexType i = 0; i < 5; i++ )
+      for( IndexType i = 0; i < 3; i++ )
       {
          RealType& val = row.getValue( i );
          val = rowIdx * val + values[ rowIdx ][ i ];
@@ -711,208 +771,207 @@ void test_AddRow()
    TNL::Algorithms::ParallelFor< DeviceType >::exec( 0, 6, f );
 
 
-    EXPECT_EQ( m.getElement( 0, 0 ),  11 );
-    EXPECT_EQ( m.getElement( 0, 1 ),  11 );
-    EXPECT_EQ( m.getElement( 0, 2 ),  11 );
-    EXPECT_EQ( m.getElement( 0, 3 ),  11 );
-    EXPECT_EQ( m.getElement( 0, 4 ),   0 );
-
-    EXPECT_EQ( m.getElement( 1, 0 ),  28 );
-    EXPECT_EQ( m.getElement( 1, 1 ),  29 );
-    EXPECT_EQ( m.getElement( 1, 2 ),  30 );
-    EXPECT_EQ( m.getElement( 1, 3 ),  31 );
-    EXPECT_EQ( m.getElement( 1, 4 ),  10 );
-
-    EXPECT_EQ( m.getElement( 2, 0 ),  55 );
-    EXPECT_EQ( m.getElement( 2, 1 ),  57 );
-    EXPECT_EQ( m.getElement( 2, 2 ),  59 );
-    EXPECT_EQ( m.getElement( 2, 3 ),  61 );
-    EXPECT_EQ( m.getElement( 2, 4 ),  30 );
-
-    EXPECT_EQ( m.getElement( 3, 0 ),  92 );
-    EXPECT_EQ( m.getElement( 3, 1 ),  95 );
-    EXPECT_EQ( m.getElement( 3, 2 ),  98 );
-    EXPECT_EQ( m.getElement( 3, 3 ), 101 );
-    EXPECT_EQ( m.getElement( 3, 4 ),  60 );
-
-    EXPECT_EQ( m.getElement( 4, 0 ), 139 );
-    EXPECT_EQ( m.getElement( 4, 1 ), 143 );
-    EXPECT_EQ( m.getElement( 4, 2 ), 147 );
-    EXPECT_EQ( m.getElement( 4, 3 ), 151 );
-    EXPECT_EQ( m.getElement( 4, 4 ), 100 );
-
-    EXPECT_EQ( m.getElement( 5, 0 ), 196 );
-    EXPECT_EQ( m.getElement( 5, 1 ), 201 );
-    EXPECT_EQ( m.getElement( 5, 2 ), 206 );
-    EXPECT_EQ( m.getElement( 5, 3 ), 211 );
-    EXPECT_EQ( m.getElement( 5, 4 ), 150 );
+   EXPECT_EQ( m.getElement( 0, 0 ),  11 );
+   EXPECT_EQ( m.getElement( 0, 1 ),  11 );
+   EXPECT_EQ( m.getElement( 0, 2 ),   0 );
+   EXPECT_EQ( m.getElement( 0, 3 ),   0 );
+   EXPECT_EQ( m.getElement( 0, 4 ),   0 );
+
+   EXPECT_EQ( m.getElement( 1, 0 ),  28 );
+   EXPECT_EQ( m.getElement( 1, 1 ),  29 );
+   EXPECT_EQ( m.getElement( 1, 2 ),  30 );
+   EXPECT_EQ( m.getElement( 1, 3 ),   0 );
+   EXPECT_EQ( m.getElement( 1, 4 ),   0 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ),   0 );
+   EXPECT_EQ( m.getElement( 2, 1 ),  57 );
+   EXPECT_EQ( m.getElement( 2, 2 ),  59 );
+   EXPECT_EQ( m.getElement( 2, 3 ),  61 );
+   EXPECT_EQ( m.getElement( 2, 4 ),   0  );
+
+   EXPECT_EQ( m.getElement( 3, 0 ),   0 );
+   EXPECT_EQ( m.getElement( 3, 1 ),   0 );
+   EXPECT_EQ( m.getElement( 3, 2 ),  98 );
+   EXPECT_EQ( m.getElement( 3, 3 ), 101 );
+   EXPECT_EQ( m.getElement( 3, 4 ), 104 );
+
+   EXPECT_EQ( m.getElement( 4, 0 ),   0 );
+   EXPECT_EQ( m.getElement( 4, 1 ),   0 );
+   EXPECT_EQ( m.getElement( 4, 2 ),   0 );
+   EXPECT_EQ( m.getElement( 4, 3 ), 151 );
+   EXPECT_EQ( m.getElement( 4, 4 ), 155 );
+
+   EXPECT_EQ( m.getElement( 5, 0 ),   0 );
+   EXPECT_EQ( m.getElement( 5, 1 ),   0 );
+   EXPECT_EQ( m.getElement( 5, 2 ),   0 );
+   EXPECT_EQ( m.getElement( 5, 3 ),   0 );
+   EXPECT_EQ( m.getElement( 5, 4 ), 216 );
 }
 
 template< typename Matrix >
 void test_VectorProduct()
 {
-    using RealType = typename Matrix::RealType;
-    using DeviceType = typename Matrix::DeviceType;
-    using IndexType = typename Matrix::IndexType;
-/*
- * Sets up the following 5x4 dense matrix:
- *
- *    /  1  2  3  4 \
- *    |  5  6  7  8 |
- *    |  9 10 11 12 |
- *    | 13 14 15 16 |
- *    \ 17 18 19 20 /
- */
-    const IndexType rows = 5;
-    const IndexType cols = 4;
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
 
-    Matrix m;
-    m.reset();
-    m.setDimensions( rows, cols );
+   /*
+    * Sets up the following 5x4 matrix:
+    *
+    *    /  1  2  0  0 \
+    *    |  5  6  7  0 |
+    *    |  0 10 11 12 |
+    *    |  0  0 15 16 |
+    *    \  0  0  0 20 /
+    */
+   const IndexType rows = 5;
+   const IndexType cols = 4;
 
-    RealType value = 1;
-    for( IndexType i = 0; i < rows; i++ )
-        for( IndexType j = 0; j < cols; j++)
-            m.setElement( i, j, value++ );
+   Matrix m( rows, cols );
 
-    using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >;
+   RealType value = 1;
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < cols; j++)
+      {
+         if( abs( i - j ) <= 1 )
+            m.setElement( i, j, value );
+         value++;
+      }
 
-    VectorType inVector;
-    inVector.setSize( 4 );
-    for( IndexType i = 0; i < inVector.getSize(); i++ )
-        inVector.setElement( i, 2 );
+   using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >;
 
-    VectorType outVector;
-    outVector.setSize( 5 );
-    for( IndexType j = 0; j < outVector.getSize(); j++ )
-        outVector.setElement( j, 0 );
+   VectorType inVector( 4 );
+   inVector = 2;
 
+   VectorType outVector( 5 );
+   outVector = 0;
 
-    m.vectorProduct( inVector, outVector);
+   m.vectorProduct( inVector, outVector);
 
-    EXPECT_EQ( outVector.getElement( 0 ),  20 );
-    EXPECT_EQ( outVector.getElement( 1 ),  52 );
-    EXPECT_EQ( outVector.getElement( 2 ),  84 );
-    EXPECT_EQ( outVector.getElement( 3 ), 116 );
-    EXPECT_EQ( outVector.getElement( 4 ), 148 );
+   EXPECT_EQ( outVector.getElement( 0 ),  6 );
+   EXPECT_EQ( outVector.getElement( 1 ), 36 );
+   EXPECT_EQ( outVector.getElement( 2 ), 66 );
+   EXPECT_EQ( outVector.getElement( 3 ), 62 );
+   EXPECT_EQ( outVector.getElement( 4 ), 40 );
 }
 
-template< typename Matrix >
+template< typename Matrix1, typename Matrix2 = Matrix1 >
 void test_AddMatrix()
 {
-    using RealType = typename Matrix::RealType;
-    using DeviceType = typename Matrix::DeviceType;
-    using IndexType = typename Matrix::IndexType;
-/*
- * Sets up the following 5x4 dense matrix:
- *
- *    /  1  2  3  4 \
- *    |  5  6  7  8 |
- *    |  9 10 11 12 |
- *    | 13 14 15 16 |
- *    \ 17 18 19 20 /
- */
-    const IndexType rows = 5;
-    const IndexType cols = 4;
+   using RealType = typename Matrix1::RealType;
+   using DeviceType = typename Matrix1::DeviceType;
+   using IndexType = typename Matrix1::IndexType;
 
-    Matrix m;
-    m.reset();
-    m.setDimensions( rows, cols );
+   /*
+    * Sets up the following 5x4 matrix:
+    *
+    *    /  1  2  0  0 \
+    *    |  5  6  7  0 |
+    *    |  0 10 11 12 |
+    *    |  0  0 15 16 |
+    *    \  0  0  0 20 /
+    */
+   const IndexType rows = 5;
+   const IndexType cols = 4;
 
-    RealType value = 1;
-    for( IndexType i = 0; i < rows; i++ )
-        for( IndexType j = 0; j < cols; j++)
-            m.setElement( i, j, value++ );
+   Matrix1 m( rows, cols );
 
-/*
- * Sets up the following 5x4 dense matrix:
- *
- *    /  1  2  3  4 \
- *    |  5  6  7  8 |
- *    |  9 10 11 12 |
- *    | 13 14 15 16 |
- *    \ 17 18 19 20 /
- */
+   RealType value = 1;
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < cols; j++)
+      {
+         if( abs( i - j ) <= 1 )
+            m.setElement( i, j, value );
+         value++;
+      }
 
-    Matrix m2;
-    m2.reset();
-    m2.setDimensions( rows, cols );
+   /*
+    * Sets up the following 5x4 matrix:
+    *
+    *    /  1  2  0  0 \
+    *    |  3  4  5  0 |
+    *    |  0  6  7  8 |
+    *    |  0  0  9 10 |
+    *    \  0  0  0 11 /
+    */
+   Matrix2 m2( rows, cols );
 
-    RealType newValue = 1;
-    for( IndexType i = 0; i < rows; i++ )
-        for( IndexType j = 0; j < cols; j++)
+   RealType newValue = 1;
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < cols; j++)
+         if( abs( i - j ) <= 1 )
             m2.setElement( i, j, newValue++ );
 
-    /*
- * Sets up the following 5x4 dense matrix:
- *
- *    /  1  2  3  4 \
- *    |  5  6  7  8 |
- *    |  9 10 11 12 |
- *    | 13 14 15 16 |
- *    \ 17 18 19 20 /
- */
+   /*
+    * Compute the following 5x4 matrix:
+    *
+    *  /  1  2  0  0 \       /  1  2  0  0 \    /  3  6  0  0 \
+    *  |  5  6  7  0 |       |  3  4  5  0 |    | 11 14 17  0 |
+    *  |  0 10 11 12 | + 2 * |  0  6  7  8 | =  |  0 22 25 28 |
+    *  |  0  0 15 16 |       |  0  0  9 10 |    |  0  0 33 36 |
+    *  \  0  0  0 20 /       \  0  0  0 11 /    \  0  0  0 42 /
+    */
 
-    Matrix mResult;
-    mResult.reset();
-    mResult.setDimensions( rows, cols );
-
-    mResult = m;
-
-    RealType matrixMultiplicator = 2;
-    RealType thisMatrixMultiplicator = 1;
-
-    mResult.addMatrix( m2, matrixMultiplicator, thisMatrixMultiplicator );
-
-    EXPECT_EQ( mResult.getElement( 0, 0 ), matrixMultiplicator * m2.getElement( 0, 0 ) + thisMatrixMultiplicator * m.getElement( 0, 0 ) );
-    EXPECT_EQ( mResult.getElement( 0, 1 ), matrixMultiplicator * m2.getElement( 0, 1 ) + thisMatrixMultiplicator * m.getElement( 0, 1 ) );
-    EXPECT_EQ( mResult.getElement( 0, 2 ), matrixMultiplicator * m2.getElement( 0, 2 ) + thisMatrixMultiplicator * m.getElement( 0, 2 ) );
-    EXPECT_EQ( mResult.getElement( 0, 3 ), matrixMultiplicator * m2.getElement( 0, 3 ) + thisMatrixMultiplicator * m.getElement( 0, 3 ) );
-
-    EXPECT_EQ( mResult.getElement( 1, 0 ), matrixMultiplicator * m2.getElement( 1, 0 ) + thisMatrixMultiplicator * m.getElement( 1, 0 ) );
-    EXPECT_EQ( mResult.getElement( 1, 1 ), matrixMultiplicator * m2.getElement( 1, 1 ) + thisMatrixMultiplicator * m.getElement( 1, 1 ) );
-    EXPECT_EQ( mResult.getElement( 1, 2 ), matrixMultiplicator * m2.getElement( 1, 2 ) + thisMatrixMultiplicator * m.getElement( 1, 2 ) );
-    EXPECT_EQ( mResult.getElement( 1, 3 ), matrixMultiplicator * m2.getElement( 1, 3 ) + thisMatrixMultiplicator * m.getElement( 1, 3 ) );
-
-    EXPECT_EQ( mResult.getElement( 2, 0 ), matrixMultiplicator * m2.getElement( 2, 0 ) + thisMatrixMultiplicator * m.getElement( 2, 0 ) );
-    EXPECT_EQ( mResult.getElement( 2, 1 ), matrixMultiplicator * m2.getElement( 2, 1 ) + thisMatrixMultiplicator * m.getElement( 2, 1 ) );
-    EXPECT_EQ( mResult.getElement( 2, 2 ), matrixMultiplicator * m2.getElement( 2, 2 ) + thisMatrixMultiplicator * m.getElement( 2, 2 ) );
-    EXPECT_EQ( mResult.getElement( 2, 3 ), matrixMultiplicator * m2.getElement( 2, 3 ) + thisMatrixMultiplicator * m.getElement( 2, 3 ) );
-
-    EXPECT_EQ( mResult.getElement( 3, 0 ), matrixMultiplicator * m2.getElement( 3, 0 ) + thisMatrixMultiplicator * m.getElement( 3, 0 ) );
-    EXPECT_EQ( mResult.getElement( 3, 1 ), matrixMultiplicator * m2.getElement( 3, 1 ) + thisMatrixMultiplicator * m.getElement( 3, 1 ) );
-    EXPECT_EQ( mResult.getElement( 3, 2 ), matrixMultiplicator * m2.getElement( 3, 2 ) + thisMatrixMultiplicator * m.getElement( 3, 2 ) );
-    EXPECT_EQ( mResult.getElement( 3, 3 ), matrixMultiplicator * m2.getElement( 3, 3 ) + thisMatrixMultiplicator * m.getElement( 3, 3 ) );
-
-    EXPECT_EQ( mResult.getElement( 4, 0 ), matrixMultiplicator * m2.getElement( 4, 0 ) + thisMatrixMultiplicator * m.getElement( 4, 0 ) );
-    EXPECT_EQ( mResult.getElement( 4, 1 ), matrixMultiplicator * m2.getElement( 4, 1 ) + thisMatrixMultiplicator * m.getElement( 4, 1 ) );
-    EXPECT_EQ( mResult.getElement( 4, 2 ), matrixMultiplicator * m2.getElement( 4, 2 ) + thisMatrixMultiplicator * m.getElement( 4, 2 ) );
-    EXPECT_EQ( mResult.getElement( 4, 3 ), matrixMultiplicator * m2.getElement( 4, 3 ) + thisMatrixMultiplicator * m.getElement( 4, 3 ) );
-
-    EXPECT_EQ( mResult.getElement( 0, 0 ),  3 );
-    EXPECT_EQ( mResult.getElement( 0, 1 ),  6 );
-    EXPECT_EQ( mResult.getElement( 0, 2 ),  9 );
-    EXPECT_EQ( mResult.getElement( 0, 3 ), 12 );
-
-    EXPECT_EQ( mResult.getElement( 1, 0 ), 15 );
-    EXPECT_EQ( mResult.getElement( 1, 1 ), 18 );
-    EXPECT_EQ( mResult.getElement( 1, 2 ), 21 );
-    EXPECT_EQ( mResult.getElement( 1, 3 ), 24 );
-
-    EXPECT_EQ( mResult.getElement( 2, 0 ), 27 );
-    EXPECT_EQ( mResult.getElement( 2, 1 ), 30 );
-    EXPECT_EQ( mResult.getElement( 2, 2 ), 33 );
-    EXPECT_EQ( mResult.getElement( 2, 3 ), 36 );
-
-    EXPECT_EQ( mResult.getElement( 3, 0 ), 39 );
-    EXPECT_EQ( mResult.getElement( 3, 1 ), 42 );
-    EXPECT_EQ( mResult.getElement( 3, 2 ), 45 );
-    EXPECT_EQ( mResult.getElement( 3, 3 ), 48 );
-
-    EXPECT_EQ( mResult.getElement( 4, 0 ), 51 );
-    EXPECT_EQ( mResult.getElement( 4, 1 ), 54 );
-    EXPECT_EQ( mResult.getElement( 4, 2 ), 57 );
-    EXPECT_EQ( mResult.getElement( 4, 3 ), 60 );
+   Matrix1 mResult;
+   mResult.reset();
+   mResult.setDimensions( rows, cols );
+
+   mResult = m;
+
+   RealType matrixMultiplicator = 2;
+   RealType thisMatrixMultiplicator = 1;
+
+   mResult.addMatrix( m2, matrixMultiplicator, thisMatrixMultiplicator );
+
+   EXPECT_EQ( mResult.getElement( 0, 0 ), matrixMultiplicator * m2.getElement( 0, 0 ) + thisMatrixMultiplicator * m.getElement( 0, 0 ) );
+   EXPECT_EQ( mResult.getElement( 0, 1 ), matrixMultiplicator * m2.getElement( 0, 1 ) + thisMatrixMultiplicator * m.getElement( 0, 1 ) );
+   EXPECT_EQ( mResult.getElement( 0, 2 ), matrixMultiplicator * m2.getElement( 0, 2 ) + thisMatrixMultiplicator * m.getElement( 0, 2 ) );
+   EXPECT_EQ( mResult.getElement( 0, 3 ), matrixMultiplicator * m2.getElement( 0, 3 ) + thisMatrixMultiplicator * m.getElement( 0, 3 ) );
+
+   EXPECT_EQ( mResult.getElement( 1, 0 ), matrixMultiplicator * m2.getElement( 1, 0 ) + thisMatrixMultiplicator * m.getElement( 1, 0 ) );
+   EXPECT_EQ( mResult.getElement( 1, 1 ), matrixMultiplicator * m2.getElement( 1, 1 ) + thisMatrixMultiplicator * m.getElement( 1, 1 ) );
+   EXPECT_EQ( mResult.getElement( 1, 2 ), matrixMultiplicator * m2.getElement( 1, 2 ) + thisMatrixMultiplicator * m.getElement( 1, 2 ) );
+   EXPECT_EQ( mResult.getElement( 1, 3 ), matrixMultiplicator * m2.getElement( 1, 3 ) + thisMatrixMultiplicator * m.getElement( 1, 3 ) );
+
+   EXPECT_EQ( mResult.getElement( 2, 0 ), matrixMultiplicator * m2.getElement( 2, 0 ) + thisMatrixMultiplicator * m.getElement( 2, 0 ) );
+   EXPECT_EQ( mResult.getElement( 2, 1 ), matrixMultiplicator * m2.getElement( 2, 1 ) + thisMatrixMultiplicator * m.getElement( 2, 1 ) );
+   EXPECT_EQ( mResult.getElement( 2, 2 ), matrixMultiplicator * m2.getElement( 2, 2 ) + thisMatrixMultiplicator * m.getElement( 2, 2 ) );
+   EXPECT_EQ( mResult.getElement( 2, 3 ), matrixMultiplicator * m2.getElement( 2, 3 ) + thisMatrixMultiplicator * m.getElement( 2, 3 ) );
+
+   EXPECT_EQ( mResult.getElement( 3, 0 ), matrixMultiplicator * m2.getElement( 3, 0 ) + thisMatrixMultiplicator * m.getElement( 3, 0 ) );
+   EXPECT_EQ( mResult.getElement( 3, 1 ), matrixMultiplicator * m2.getElement( 3, 1 ) + thisMatrixMultiplicator * m.getElement( 3, 1 ) );
+   EXPECT_EQ( mResult.getElement( 3, 2 ), matrixMultiplicator * m2.getElement( 3, 2 ) + thisMatrixMultiplicator * m.getElement( 3, 2 ) );
+   EXPECT_EQ( mResult.getElement( 3, 3 ), matrixMultiplicator * m2.getElement( 3, 3 ) + thisMatrixMultiplicator * m.getElement( 3, 3 ) );
+
+   EXPECT_EQ( mResult.getElement( 4, 0 ), matrixMultiplicator * m2.getElement( 4, 0 ) + thisMatrixMultiplicator * m.getElement( 4, 0 ) );
+   EXPECT_EQ( mResult.getElement( 4, 1 ), matrixMultiplicator * m2.getElement( 4, 1 ) + thisMatrixMultiplicator * m.getElement( 4, 1 ) );
+   EXPECT_EQ( mResult.getElement( 4, 2 ), matrixMultiplicator * m2.getElement( 4, 2 ) + thisMatrixMultiplicator * m.getElement( 4, 2 ) );
+   EXPECT_EQ( mResult.getElement( 4, 3 ), matrixMultiplicator * m2.getElement( 4, 3 ) + thisMatrixMultiplicator * m.getElement( 4, 3 ) );
+
+   EXPECT_EQ( mResult.getElement( 0, 0 ),  3 );
+   EXPECT_EQ( mResult.getElement( 0, 1 ),  6 );
+   EXPECT_EQ( mResult.getElement( 0, 2 ),  0 );
+   EXPECT_EQ( mResult.getElement( 0, 3 ),  0 );
+
+   EXPECT_EQ( mResult.getElement( 1, 0 ), 11 );
+   EXPECT_EQ( mResult.getElement( 1, 1 ), 14 );
+   EXPECT_EQ( mResult.getElement( 1, 2 ), 17 );
+   EXPECT_EQ( mResult.getElement( 1, 3 ),  0 );
+
+   EXPECT_EQ( mResult.getElement( 2, 0 ),  0 );
+   EXPECT_EQ( mResult.getElement( 2, 1 ), 22 );
+   EXPECT_EQ( mResult.getElement( 2, 2 ), 25 );
+   EXPECT_EQ( mResult.getElement( 2, 3 ), 28 );
+
+   EXPECT_EQ( mResult.getElement( 3, 0 ),  0 );
+   EXPECT_EQ( mResult.getElement( 3, 1 ),  0 );
+   EXPECT_EQ( mResult.getElement( 3, 2 ), 33 );
+   EXPECT_EQ( mResult.getElement( 3, 3 ), 36 );
+
+   EXPECT_EQ( mResult.getElement( 4, 0 ),  0 );
+   EXPECT_EQ( mResult.getElement( 4, 1 ),  0 );
+   EXPECT_EQ( mResult.getElement( 4, 2 ),  0 );
+   EXPECT_EQ( mResult.getElement( 4, 3 ), 42 );
 }
 
 template< typename Matrix >
@@ -922,7 +981,7 @@ void test_GetMatrixProduct()
     using DeviceType = typename Matrix::DeviceType;
     using IndexType = typename Matrix::IndexType;
 /*
- * Sets up the following 5x4 dense matrix:
+ * Sets up the following 5x4 matrix:
  *
  *    /  1  2  3  4 \
  *    |  5  6  7  8 |
@@ -943,7 +1002,7 @@ void test_GetMatrixProduct()
             leftMatrix.setElement( i, j, value++ );
 
 /*
- * Sets up the following 4x5 dense matrix:
+ * Sets up the following 4x5 matrix:
  *
  *    /  1  2  3  4  5 \
  *    |  6  7  8  9 10 |
@@ -963,7 +1022,7 @@ void test_GetMatrixProduct()
             rightMatrix.setElement( i, j, newValue++ );
 
 /*
- * Sets up the following 5x5 resulting dense matrix:
+ * Sets up the following 5x5 resulting matrix:
  *
  *    /  0  0  0  0 \
  *    |  0  0  0  0 |
@@ -1027,7 +1086,7 @@ void test_GetTransposition()
     using DeviceType = typename Matrix::DeviceType;
     using IndexType = typename Matrix::IndexType;
 /*
- * Sets up the following 3x2 dense matrix:
+ * Sets up the following 3x2 matrix:
  *
  *    /  1  2 \
  *    |  3  4 |
@@ -1048,7 +1107,7 @@ void test_GetTransposition()
     m.print( std::cout );
 
 /*
- * Sets up the following 2x3 dense matrix:
+ * Sets up the following 2x3 matrix:
  *
  *    /  0  0  0 \
  *    \  0  0  0 /
@@ -1066,7 +1125,7 @@ void test_GetTransposition()
     mTransposed.print( std::cout );
 
 /*
- * Should result in the following 2x3 dense matrix:
+ * Should result in the following 2x3 matrix:
  *
  *    /  1  3  5 \
  *    \  2  4  6 /
@@ -1089,7 +1148,7 @@ void test_PerformSORIteration()
     using DeviceType = typename Matrix::DeviceType;
     using IndexType = typename Matrix::IndexType;
 /*
- * Sets up the following 4x4 dense matrix:
+ * Sets up the following 4x4 matrix:
  *
  *    /  4  1  1  1 \
  *    |  1  4  1  1 |
@@ -1164,43 +1223,44 @@ void test_AssignmentOperator()
    using RealType = typename Matrix::RealType;
    using DeviceType = typename Matrix::DeviceType;
    using IndexType = typename Matrix::IndexType;
+   constexpr bool rowMajorOrder = Matrix::getRowMajorOrder();
 
-   using MultidiagonalHost = TNL::Matrices::Multidiagonal< RealType, TNL::Devices::Host, IndexType >;
-   using MultidiagonalCuda = TNL::Matrices::Multidiagonal< RealType, TNL::Devices::Cuda, IndexType >;
+   using MultidiagonalHost = TNL::Matrices::Multidiagonal< RealType, TNL::Devices::Host, IndexType, rowMajorOrder >;
+   using MultidiagonalCuda = TNL::Matrices::Multidiagonal< RealType, TNL::Devices::Cuda, IndexType, !rowMajorOrder >;
 
    const IndexType rows( 10 ), columns( 10 );
    MultidiagonalHost hostMatrix( rows, columns );
-   for( IndexType i = 0; i < columns; i++ )
-      for( IndexType j = 0; j <= i; j++ )
-         hostMatrix.setElement( i, j,  i + j );
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j <  columns; j++ )
+         if( abs( i - j ) <= 1 )
+            hostMatrix.setElement( i, j,  i + j );
 
    Matrix matrix( rows, columns );
    matrix.getValues() = 0.0;
    matrix = hostMatrix;
    for( IndexType i = 0; i < columns; i++ )
       for( IndexType j = 0; j < rows; j++ )
-      {
-         if( j > i )
-            EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
-         else
-            EXPECT_EQ( matrix.getElement( i, j ), i + j );
-      }
+            if( abs( i - j ) <= 1 )
+               EXPECT_EQ( matrix.getElement( i, j ), i + j );
+            else
+               EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
 
 #ifdef HAVE_CUDA
    MultidiagonalCuda cudaMatrix( rows, columns );
-   for( IndexType i = 0; i < columns; i++ )
-      for( IndexType j = 0; j <= i; j++ )
-         cudaMatrix.setElement( i, j, i + j );
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < columns; j++ )
+         if( abs( i - j ) <= 1 )
+            cudaMatrix.setElement( i, j, i + j );
 
    matrix.getValues() = 0.0;
    matrix = cudaMatrix;
-   for( IndexType i = 0; i < columns; i++ )
-      for( IndexType j = 0; j < rows; j++ )
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < columns; j++ )
       {
-         if( j > i )
-            EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
-         else
+         if( abs( i - j ) <= 1 )
             EXPECT_EQ( matrix.getElement( i, j ), i + j );
+         else
+            EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
       }
 #endif
 }
@@ -1209,123 +1269,125 @@ void test_AssignmentOperator()
 template< typename Matrix >
 void test_SaveAndLoad()
 {
-    using RealType = typename Matrix::RealType;
-    using DeviceType = typename Matrix::DeviceType;
-    using IndexType = typename Matrix::IndexType;
-/*
- * Sets up the following 4x4 dense matrix:
- *
- *    /  1  2  3  4 \
- *    |  5  6  7  8 |
- *    |  9 10 11 12 |
- *    \ 13 14 15 16 /
- */
-    const IndexType rows = 4;
-    const IndexType cols = 4;
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /*
+    * Sets up the following 4x4 matrix:
+    *
+    *    /  1  2  0  0 \
+    *    |  5  6  7  0 |
+    *    |  0 10 11 12 |
+    *    \  0  0 15 16 /
+    */
+   const IndexType rows = 4;
+   const IndexType cols = 4;
 
-    Matrix savedMatrix;
-    savedMatrix.reset();
-    savedMatrix.setDimensions( rows, cols );
+   Matrix savedMatrix( rows, cols );
 
-    RealType value = 1;
-    for( IndexType i = 0; i < rows; i++ )
-        for( IndexType j = 0; j < cols; j++ )
-            savedMatrix.setElement( i, j, value++ );
-
-    ASSERT_NO_THROW( savedMatrix.save( TEST_FILE_NAME ) );
-
-    Matrix loadedMatrix;
-    loadedMatrix.reset();
-    loadedMatrix.setDimensions( rows, cols );
-
-    ASSERT_NO_THROW( loadedMatrix.load( TEST_FILE_NAME ) );
-
-    EXPECT_EQ( savedMatrix.getElement( 0, 0 ), loadedMatrix.getElement( 0, 0 ) );
-    EXPECT_EQ( savedMatrix.getElement( 0, 1 ), loadedMatrix.getElement( 0, 1 ) );
-    EXPECT_EQ( savedMatrix.getElement( 0, 2 ), loadedMatrix.getElement( 0, 2 ) );
-    EXPECT_EQ( savedMatrix.getElement( 0, 3 ), loadedMatrix.getElement( 0, 3 ) );
-
-    EXPECT_EQ( savedMatrix.getElement( 1, 0 ), loadedMatrix.getElement( 1, 0 ) );
-    EXPECT_EQ( savedMatrix.getElement( 1, 1 ), loadedMatrix.getElement( 1, 1 ) );
-    EXPECT_EQ( savedMatrix.getElement( 1, 2 ), loadedMatrix.getElement( 1, 2 ) );
-    EXPECT_EQ( savedMatrix.getElement( 1, 3 ), loadedMatrix.getElement( 1, 3 ) );
-
-    EXPECT_EQ( savedMatrix.getElement( 2, 0 ), loadedMatrix.getElement( 2, 0 ) );
-    EXPECT_EQ( savedMatrix.getElement( 2, 1 ), loadedMatrix.getElement( 2, 1 ) );
-    EXPECT_EQ( savedMatrix.getElement( 2, 2 ), loadedMatrix.getElement( 2, 2 ) );
-    EXPECT_EQ( savedMatrix.getElement( 2, 3 ), loadedMatrix.getElement( 2, 3 ) );
-
-    EXPECT_EQ( savedMatrix.getElement( 3, 0 ), loadedMatrix.getElement( 3, 0 ) );
-    EXPECT_EQ( savedMatrix.getElement( 3, 1 ), loadedMatrix.getElement( 3, 1 ) );
-    EXPECT_EQ( savedMatrix.getElement( 3, 2 ), loadedMatrix.getElement( 3, 2 ) );
-    EXPECT_EQ( savedMatrix.getElement( 3, 3 ), loadedMatrix.getElement( 3, 3 ) );
-
-    EXPECT_EQ( savedMatrix.getElement( 0, 0 ),  1 );
-    EXPECT_EQ( savedMatrix.getElement( 0, 1 ),  2 );
-    EXPECT_EQ( savedMatrix.getElement( 0, 2 ),  3 );
-    EXPECT_EQ( savedMatrix.getElement( 0, 3 ),  4 );
-
-    EXPECT_EQ( savedMatrix.getElement( 1, 0 ),  5 );
-    EXPECT_EQ( savedMatrix.getElement( 1, 1 ),  6 );
-    EXPECT_EQ( savedMatrix.getElement( 1, 2 ),  7 );
-    EXPECT_EQ( savedMatrix.getElement( 1, 3 ),  8 );
-
-    EXPECT_EQ( savedMatrix.getElement( 2, 0 ),  9 );
-    EXPECT_EQ( savedMatrix.getElement( 2, 1 ), 10 );
-    EXPECT_EQ( savedMatrix.getElement( 2, 2 ), 11 );
-    EXPECT_EQ( savedMatrix.getElement( 2, 3 ), 12 );
-
-    EXPECT_EQ( savedMatrix.getElement( 3, 0 ), 13 );
-    EXPECT_EQ( savedMatrix.getElement( 3, 1 ), 14 );
-    EXPECT_EQ( savedMatrix.getElement( 3, 2 ), 15 );
-    EXPECT_EQ( savedMatrix.getElement( 3, 3 ), 16 );
+   RealType value = 1;
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < cols; j++ )
+      {
+         if( abs( i - j ) <= 1 )
+            savedMatrix.setElement( i, j, value );
+         value++;
+      }
+
+   ASSERT_NO_THROW( savedMatrix.save( TEST_FILE_NAME ) );
+
+   Matrix loadedMatrix;
+
+   ASSERT_NO_THROW( loadedMatrix.load( TEST_FILE_NAME ) );
+
+   EXPECT_EQ( savedMatrix.getElement( 0, 0 ), loadedMatrix.getElement( 0, 0 ) );
+   EXPECT_EQ( savedMatrix.getElement( 0, 1 ), loadedMatrix.getElement( 0, 1 ) );
+   EXPECT_EQ( savedMatrix.getElement( 0, 2 ), loadedMatrix.getElement( 0, 2 ) );
+   EXPECT_EQ( savedMatrix.getElement( 0, 3 ), loadedMatrix.getElement( 0, 3 ) );
+
+   EXPECT_EQ( savedMatrix.getElement( 1, 0 ), loadedMatrix.getElement( 1, 0 ) );
+   EXPECT_EQ( savedMatrix.getElement( 1, 1 ), loadedMatrix.getElement( 1, 1 ) );
+   EXPECT_EQ( savedMatrix.getElement( 1, 2 ), loadedMatrix.getElement( 1, 2 ) );
+   EXPECT_EQ( savedMatrix.getElement( 1, 3 ), loadedMatrix.getElement( 1, 3 ) );
+
+   EXPECT_EQ( savedMatrix.getElement( 2, 0 ), loadedMatrix.getElement( 2, 0 ) );
+   EXPECT_EQ( savedMatrix.getElement( 2, 1 ), loadedMatrix.getElement( 2, 1 ) );
+   EXPECT_EQ( savedMatrix.getElement( 2, 2 ), loadedMatrix.getElement( 2, 2 ) );
+   EXPECT_EQ( savedMatrix.getElement( 2, 3 ), loadedMatrix.getElement( 2, 3 ) );
+
+   EXPECT_EQ( savedMatrix.getElement( 3, 0 ), loadedMatrix.getElement( 3, 0 ) );
+   EXPECT_EQ( savedMatrix.getElement( 3, 1 ), loadedMatrix.getElement( 3, 1 ) );
+   EXPECT_EQ( savedMatrix.getElement( 3, 2 ), loadedMatrix.getElement( 3, 2 ) );
+   EXPECT_EQ( savedMatrix.getElement( 3, 3 ), loadedMatrix.getElement( 3, 3 ) );
+
+   EXPECT_EQ( savedMatrix.getElement( 0, 0 ),  1 );
+   EXPECT_EQ( savedMatrix.getElement( 0, 1 ),  2 );
+   EXPECT_EQ( savedMatrix.getElement( 0, 2 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 0, 3 ),  0 );
+
+   EXPECT_EQ( savedMatrix.getElement( 1, 0 ),  5 );
+   EXPECT_EQ( savedMatrix.getElement( 1, 1 ),  6 );
+   EXPECT_EQ( savedMatrix.getElement( 1, 2 ),  7 );
+   EXPECT_EQ( savedMatrix.getElement( 1, 3 ),  0 );
+
+   EXPECT_EQ( savedMatrix.getElement( 2, 0 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 2, 1 ), 10 );
+   EXPECT_EQ( savedMatrix.getElement( 2, 2 ), 11 );
+   EXPECT_EQ( savedMatrix.getElement( 2, 3 ), 12 );
+
+   EXPECT_EQ( savedMatrix.getElement( 3, 0 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 3, 1 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 3, 2 ), 15 );
+   EXPECT_EQ( savedMatrix.getElement( 3, 3 ), 16 );
 }
 
 template< typename Matrix >
 void test_Print()
 {
-    using RealType = typename Matrix::RealType;
-    using DeviceType = typename Matrix::DeviceType;
-    using IndexType = typename Matrix::IndexType;
-/*
- * Sets up the following 5x4 sparse matrix:
- *
- *    /  1  2  3  4 \
- *    |  5  6  7  8 |
- *    |  9 10 11 12 |
- *    | 13 14 15 16 |
- *    \ 17 18 19 20 /
- */
-    const IndexType rows = 5;
-    const IndexType cols = 4;
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
 
-    Matrix m;
-    m.reset();
-    m.setDimensions( rows, cols );
+   /*
+    * Sets up the following 5x4 sparse matrix:
+    *
+    *    /  1  2  0  0 \
+    *    |  5  6  7  0 |
+    *    |  0 10 11 12 |
+    *    |  0  0 15 16 |
+    *    \  0  0  0 20 /
+    */
+   const IndexType rows = 5;
+   const IndexType cols = 4;
 
-    RealType value = 1;
-    for( IndexType i = 0; i < rows; i++)
-        for( IndexType j = 0; j < cols; j++)
-            m.setElement( i, j, value++ );
+   Matrix m( rows, cols );
 
-    #include <sstream>
-    std::stringstream printed;
-    std::stringstream couted;
+   RealType value = 1;
+   for( IndexType i = 0; i < rows; i++)
+      for( IndexType j = 0; j < cols; j++)
+      {
+         if( abs( i - j ) <= 1 )
+            m.setElement( i, j, value );
+         value++;
+      }
 
-    //change the underlying buffer and save the old buffer
-    auto old_buf = std::cout.rdbuf(printed.rdbuf());
+   std::stringstream printed;
+   std::stringstream couted;
 
-    m.print( std::cout ); //all the std::cout goes to ss
+   //change the underlying buffer and save the old buffer
+   auto old_buf = std::cout.rdbuf(printed.rdbuf());
 
-    std::cout.rdbuf(old_buf); //reset
+   m.print( std::cout ); //all the std::cout goes to ss
 
-    couted << "Row: 0 ->  Col:0->1	 Col:1->2	 Col:2->3	 Col:3->4\t\n"
-              "Row: 1 ->  Col:0->5	 Col:1->6	 Col:2->7	 Col:3->8\t\n"
-              "Row: 2 ->  Col:0->9	 Col:1->10	 Col:2->11	 Col:3->12\t\n"
-              "Row: 3 ->  Col:0->13	 Col:1->14	 Col:2->15	 Col:3->16\t\n"
-              "Row: 4 ->  Col:0->17	 Col:1->18	 Col:2->19	 Col:3->20\t\n";
+   std::cout.rdbuf(old_buf); //reset
+   couted << "Row: 0 ->  Col:0->1\t Col:1->2\t\n"
+             "Row: 1 ->  Col:0->5\t Col:1->6\t Col:2->7\t\n"
+             "Row: 2 ->  Col:1->10\t Col:2->11\t Col:3->12\t\n"
+             "Row: 3 ->  Col:2->15\t Col:3->16\t\n"
+             "Row: 4 ->  Col:3->20\t\n";
 
-    EXPECT_EQ( printed.str(), couted.str() );
+   EXPECT_EQ( printed.str(), couted.str() );
 }
 
 // test fixture for typed tests
@@ -1388,6 +1450,21 @@ TYPED_TEST( MatrixTest, setLikeTest )
     test_SetLike< MatrixType, MatrixType >();
 }
 
+TYPED_TEST( MatrixTest, getNonemptyRowsCountTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_GetNonemptyRowsCount< MatrixType >();
+}
+
+/*
+TYPED_TEST( MatrixTest, getCompressedRowLengthTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_GetCompressedRowLengths< MatrixType >();
+}
+
 TYPED_TEST( MatrixTest, getRowLengthTest )
 {
     using MatrixType = typename TestFixture::MatrixType;
@@ -1395,11 +1472,11 @@ TYPED_TEST( MatrixTest, getRowLengthTest )
     test_GetRowLength< MatrixType >();
 }
 
-TYPED_TEST( MatrixTest, getNumberOfMatrixElementsTest )
+TYPED_TEST( MatrixTest, getAllocatedElementsCountTest )
 {
     using MatrixType = typename TestFixture::MatrixType;
 
-    test_GetNumberOfMatrixElements< MatrixType >();
+    test_GetAllocatedElementsCount< MatrixType >();
 }
 
 TYPED_TEST( MatrixTest, getNumberOfNonzeroMatrixElementsTest )
@@ -1465,6 +1542,19 @@ TYPED_TEST( MatrixTest, addMatrixTest )
     test_AddMatrix< MatrixType >();
 }
 
+TYPED_TEST( MatrixTest, addMatrixTest_differentOrdering )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    using RealType = typename MatrixType::RealType;
+    using DeviceType = typename MatrixType::DeviceType;
+    using IndexType = typename MatrixType::IndexType;
+    using RealAllocatorType = typename MatrixType::RealAllocatorType;
+    using MatrixType2 = TNL::Matrices::Multidiagonal< RealType, DeviceType, IndexType, ! MatrixType::getRowMajorOrder(), RealAllocatorType >;
+
+    test_AddMatrix< MatrixType, MatrixType2 >();
+}
+
 TYPED_TEST( MatrixTest, assignmentOperatorTest )
 {
     using MatrixType = typename TestFixture::MatrixType;
@@ -1485,6 +1575,7 @@ TYPED_TEST( MatrixTest, printTest )
 
     test_Print< MatrixType >();
 }
+*/
 
 //// test_getType is not general enough yet. DO NOT TEST IT YET.
 
-- 
GitLab


From b20418121b70638c87ce82851777105c29e9e464 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Mon, 13 Jan 2020 21:39:52 +0100
Subject: [PATCH 091/179] Debugging multidiagonal matrix.

---
 src/TNL/Matrices/Matrix.h                     |   4 +-
 src/TNL/Matrices/MatrixView.h                 |   4 +-
 src/TNL/Matrices/Multidiagonal.h              |  10 +-
 src/TNL/Matrices/Multidiagonal.hpp            |  19 +-
 src/TNL/Matrices/MultidiagonalMatrixRowView.h |  11 +-
 .../Matrices/MultidiagonalMatrixRowView.hpp   |  35 +-
 src/TNL/Matrices/MultidiagonalMatrixView.h    |   7 +-
 src/TNL/Matrices/MultidiagonalMatrixView.hpp  | 172 +++--
 src/TNL/Matrices/Tridiagonal.h                |   4 +-
 src/TNL/Matrices/Tridiagonal.hpp              |  17 +-
 src/TNL/Matrices/TridiagonalMatrixView.h      |   4 +-
 src/TNL/Matrices/TridiagonalMatrixView.hpp    |  10 +-
 .../details/MultidiagonalMatrixIndexer.h      |   3 +
 .../Matrices/MultidiagonalMatrixTest.h        | 666 ++++++++----------
 14 files changed, 453 insertions(+), 513 deletions(-)

diff --git a/src/TNL/Matrices/Matrix.h b/src/TNL/Matrices/Matrix.h
index 7813fa962..ebe7ccc21 100644
--- a/src/TNL/Matrices/Matrix.h
+++ b/src/TNL/Matrices/Matrix.h
@@ -76,11 +76,11 @@ public:
    __cuda_callable__
    IndexType getColumns() const;
 
-   virtual bool setElement( const IndexType row,
+   virtual void setElement( const IndexType row,
                             const IndexType column,
                             const RealType& value ) = 0;
 
-   virtual bool addElement( const IndexType row,
+   virtual void addElement( const IndexType row,
                             const IndexType column,
                             const RealType& value,
                             const RealType& thisElementMultiplicator = 1.0 ) = 0;
diff --git a/src/TNL/Matrices/MatrixView.h b/src/TNL/Matrices/MatrixView.h
index 467d02349..2a6429df5 100644
--- a/src/TNL/Matrices/MatrixView.h
+++ b/src/TNL/Matrices/MatrixView.h
@@ -73,11 +73,11 @@ public:
     * in the future and it does not slow down, declare them as virtual here.
     */
 
-   virtual bool setElement( const IndexType row,
+   virtual void setElement( const IndexType row,
                             const IndexType column,
                             const RealType& value ) = 0;
 
-   virtual bool addElement( const IndexType row,
+   virtual void addElement( const IndexType row,
                             const IndexType column,
                             const RealType& value,
                             const RealType& thisElementMultiplicator = 1.0 ) = 0;
diff --git a/src/TNL/Matrices/Multidiagonal.h b/src/TNL/Matrices/Multidiagonal.h
index 5d23cd960..1741c0c75 100644
--- a/src/TNL/Matrices/Multidiagonal.h
+++ b/src/TNL/Matrices/Multidiagonal.h
@@ -38,12 +38,12 @@ class Multidiagonal : public Matrix< Real, Device, Index, RealAllocator >
       using ValuesType = typename BaseType::ValuesVector;
       using ValuesViewType = typename ValuesType::ViewType;
       using IndexerType = details::MultidiagonalMatrixIndexer< IndexType, RowMajorOrder >;
-      using RowView = MultidiagonalMatrixRowView< ValuesViewType, IndexerType >;
+      using DiagonalsShiftsType = Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocatorType >;
+      using DiagonalsShiftsView = typename DiagonalsShiftsType::ViewType;
+      using RowView = MultidiagonalMatrixRowView< ValuesViewType, IndexerType, DiagonalsShiftsView >;
       using ViewType = MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >;
       using ConstViewType = MultidiagonalMatrixView< typename std::add_const< Real >::type, Device, Index, RowMajorOrder >;
 
-      using DiagonalsShiftsType = Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocatorType >;
-      using DiagonalsShiftsView = typename DiagonalsShiftsType::ViewType;
       using HostDiagonalsShiftsType = Containers::Vector< IndexType, Devices::Host, IndexType >;
       using HostDiagonalsShiftsView = typename HostDiagonalsShiftsType::ViewType;
 
@@ -119,11 +119,11 @@ class Multidiagonal : public Matrix< Real, Device, Index, RealAllocator >
 
       void setValue( const RealType& v );
 
-      bool setElement( const IndexType row,
+      void setElement( const IndexType row,
                        const IndexType column,
                        const RealType& value );
 
-      bool addElement( const IndexType row,
+      void addElement( const IndexType row,
                        const IndexType column,
                        const RealType& value,
                        const RealType& thisElementMultiplicator = 1.0 );
diff --git a/src/TNL/Matrices/Multidiagonal.hpp b/src/TNL/Matrices/Multidiagonal.hpp
index 95f6667c1..53e3c7f2f 100644
--- a/src/TNL/Matrices/Multidiagonal.hpp
+++ b/src/TNL/Matrices/Multidiagonal.hpp
@@ -44,6 +44,7 @@ Multidiagonal( const IndexType rows,
                const IndexType columns,
                const Vector& diagonalsShifts )
 {
+   TNL_ASSERT_GT( diagonalsShifts.getSize(), 0, "Cannot construct mutltidiagonal matrix with no diagonals shifts." );
    this->setDimensions( rows, columns, diagonalsShifts );
 }
 
@@ -60,6 +61,7 @@ getView() const -> ViewType
    // TODO: fix when getConstView works
    return ViewType( const_cast< Multidiagonal* >( this )->values.getView(),
                     const_cast< Multidiagonal* >( this )->diagonalsShifts.getView(),
+                    const_cast< Multidiagonal* >( this )->hostDiagonalsShifts.getView(),
                     indexer );
 }
 
@@ -358,11 +360,11 @@ template< typename Real,
           bool RowMajorOrder,
           typename RealAllocator,
           typename IndexAllocator >
-bool
+void
 Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
 setElement( const IndexType row, const IndexType column, const RealType& value )
 {
-   return this->view.setElement( row, column, value );
+   this->view.setElement( row, column, value );
 }
 
 template< typename Real,
@@ -371,14 +373,14 @@ template< typename Real,
           bool RowMajorOrder,
           typename RealAllocator,
           typename IndexAllocator >
-bool
+void
 Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
 addElement( const IndexType row,
             const IndexType column,
             const RealType& value,
             const RealType& thisElementMultiplicator )
 {
-   return this->view.addElement( row, column, value, thisElementMultiplicator );
+   this->view.addElement( row, column, value, thisElementMultiplicator );
 }
 
 template< typename Real,
@@ -745,14 +747,7 @@ void
 Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
 print( std::ostream& str ) const
 {
-   for( IndexType row = 0; row < this->getRows(); row++ )
-   {
-      str <<"Row: " << row << " -> ";
-      for( IndexType column = row - 1; column < row + 2; column++ )
-         if( column >= 0 && column < this->columns )
-            str << " Col:" << column << "->" << this->getElement( row, column ) << "\t";
-      str << std::endl;
-   }
+   this->view.print( str );
 }
 
 template< typename Real,
diff --git a/src/TNL/Matrices/MultidiagonalMatrixRowView.h b/src/TNL/Matrices/MultidiagonalMatrixRowView.h
index 68b5be55c..0825d6fb3 100644
--- a/src/TNL/Matrices/MultidiagonalMatrixRowView.h
+++ b/src/TNL/Matrices/MultidiagonalMatrixRowView.h
@@ -14,7 +14,8 @@ namespace TNL {
 namespace Matrices {   
 
 template< typename ValuesView,
-          typename Indexer >
+          typename Indexer,
+          typename DiagonalsShiftsView_ >
 class MultidiagonalMatrixRowView
 {
    public:
@@ -23,11 +24,13 @@ class MultidiagonalMatrixRowView
       using IndexType = typename ValuesView::IndexType;
       using ValuesViewType = ValuesView;
       using IndexerType = Indexer;
+      using DiagonalsShiftsView = DiagonalsShiftsView_;
 
       __cuda_callable__
       MultidiagonalMatrixRowView( const IndexType rowIdx,
-                                const ValuesViewType& values,
-                                const IndexerType& indexer );
+                                  const DiagonalsShiftsView& diagonalsShifts,
+                                  const ValuesViewType& values,
+                                  const IndexerType& indexer);
 
       __cuda_callable__
       IndexType getSize() const;
@@ -48,6 +51,8 @@ class MultidiagonalMatrixRowView
 
       IndexType rowIdx;
 
+      DiagonalsShiftsView diagonalsShifts;
+
       ValuesViewType values;
 
       Indexer indexer;
diff --git a/src/TNL/Matrices/MultidiagonalMatrixRowView.hpp b/src/TNL/Matrices/MultidiagonalMatrixRowView.hpp
index 349fbe8ea..88aae3f15 100644
--- a/src/TNL/Matrices/MultidiagonalMatrixRowView.hpp
+++ b/src/TNL/Matrices/MultidiagonalMatrixRowView.hpp
@@ -13,58 +13,59 @@
 namespace TNL {
 namespace Matrices {   
 
-template< typename ValuesView, typename Indexer >
+template< typename ValuesView, typename Indexer, typename DiagonalsShiftsView >
 __cuda_callable__
-MultidiagonalMatrixRowView< ValuesView, Indexer >::
+MultidiagonalMatrixRowView< ValuesView, Indexer, DiagonalsShiftsView >::
 MultidiagonalMatrixRowView( const IndexType rowIdx,
-                          const ValuesViewType& values,
-                          const IndexerType& indexer )
-: rowIdx( rowIdx ), values( values ), indexer( indexer )
+                            const DiagonalsShiftsView& diagonalsShifts,
+                            const ValuesViewType& values,
+                            const IndexerType& indexer )
+: rowIdx( rowIdx ), diagonalsShifts( diagonalsShifts ), values( values ), indexer( indexer )
 {
 }
 
-template< typename ValuesView, typename Indexer >
+template< typename ValuesView, typename Indexer, typename DiagonalsShiftsView >
 __cuda_callable__
 auto
-MultidiagonalMatrixRowView< ValuesView, Indexer >::
+MultidiagonalMatrixRowView< ValuesView, Indexer, DiagonalsShiftsView >::
 getSize() const -> IndexType
 {
    return indexer.getRowSize();
 }
 
-template< typename ValuesView, typename Indexer >
+template< typename ValuesView, typename Indexer, typename DiagonalsShiftsView >
 __cuda_callable__
 auto
-MultidiagonalMatrixRowView< ValuesView, Indexer >::
+MultidiagonalMatrixRowView< ValuesView, Indexer, DiagonalsShiftsView >::
 getColumnIndex( const IndexType localIdx ) const -> const IndexType
 {
    TNL_ASSERT_GE( localIdx, 0, "" );
-   TNL_ASSERT_LT( localIdx, 3, "" );
-   return rowIdx + localIdx - 1;
+   TNL_ASSERT_LT( localIdx, indexer.getDiagonals(), "" );
+   return rowIdx + diagonalsShifts[ localIdx ];
 }
 
-template< typename ValuesView, typename Indexer >
+template< typename ValuesView, typename Indexer, typename DiagonalsShiftsView >
 __cuda_callable__
 auto
-MultidiagonalMatrixRowView< ValuesView, Indexer >::
+MultidiagonalMatrixRowView< ValuesView, Indexer, DiagonalsShiftsView >::
 getValue( const IndexType localIdx ) const -> const RealType&
 {
    return this->values[ this->indexer.getGlobalIndex( rowIdx, localIdx ) ];
 }
 
-template< typename ValuesView, typename Indexer >
+template< typename ValuesView, typename Indexer, typename DiagonalsShiftsView >
 __cuda_callable__
 auto
-MultidiagonalMatrixRowView< ValuesView, Indexer >::
+MultidiagonalMatrixRowView< ValuesView, Indexer, DiagonalsShiftsView >::
 getValue( const IndexType localIdx ) -> RealType&
 {
    return this->values[ this->indexer.getGlobalIndex( rowIdx, localIdx ) ];
 }
 
-template< typename ValuesView, typename Indexer >
+template< typename ValuesView, typename Indexer, typename DiagonalsShiftsView >
 __cuda_callable__
 void 
-MultidiagonalMatrixRowView< ValuesView, Indexer >::
+MultidiagonalMatrixRowView< ValuesView, Indexer, DiagonalsShiftsView >::
 setElement( const IndexType localIdx,
             const RealType& value )
 {
diff --git a/src/TNL/Matrices/MultidiagonalMatrixView.h b/src/TNL/Matrices/MultidiagonalMatrixView.h
index addeb18b3..3d33ac0ae 100644
--- a/src/TNL/Matrices/MultidiagonalMatrixView.h
+++ b/src/TNL/Matrices/MultidiagonalMatrixView.h
@@ -38,7 +38,7 @@ class MultidiagonalMatrixView : public MatrixView< Real, Device, Index >
       using ValuesViewType = typename BaseType::ValuesView;
       using ViewType = MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >;
       using ConstViewType = MultidiagonalMatrixView< typename std::add_const< Real >::type, Device, Index, RowMajorOrder >;
-      using RowView = MultidiagonalMatrixRowView< ValuesViewType, IndexerType >;
+      using RowView = MultidiagonalMatrixRowView< ValuesViewType, IndexerType, DiagonalsShiftsView >;
 
       // TODO: remove this - it is here only for compatibility with original matrix implementation
       typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector;
@@ -55,6 +55,7 @@ class MultidiagonalMatrixView : public MatrixView< Real, Device, Index >
 
       MultidiagonalMatrixView( const ValuesViewType& values,
                                const DiagonalsShiftsView& diagonalsShifts,
+                               const HostDiagonalsShiftsView& hostDiagonalsShifts,
                                const IndexerType& indexer );
 
       ViewType getView();
@@ -92,11 +93,11 @@ class MultidiagonalMatrixView : public MatrixView< Real, Device, Index >
 
       void setValue( const RealType& v );
 
-      bool setElement( const IndexType row,
+      void setElement( const IndexType row,
                        const IndexType column,
                        const RealType& value );
 
-      bool addElement( const IndexType row,
+      void addElement( const IndexType row,
                        const IndexType column,
                        const RealType& value,
                        const RealType& thisElementMultiplicator = 1.0 );
diff --git a/src/TNL/Matrices/MultidiagonalMatrixView.hpp b/src/TNL/Matrices/MultidiagonalMatrixView.hpp
index 3d9b0237f..1ba8dc34d 100644
--- a/src/TNL/Matrices/MultidiagonalMatrixView.hpp
+++ b/src/TNL/Matrices/MultidiagonalMatrixView.hpp
@@ -33,9 +33,11 @@ template< typename Real,
 MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
 MultidiagonalMatrixView( const ValuesViewType& values,
                          const DiagonalsShiftsView& diagonalsShifts,
+                         const HostDiagonalsShiftsView& hostDiagonalsShifts,
                          const IndexerType& indexer )
 : MatrixView< Real, Device, Index >( indexer.getRows(), indexer.getColumns(), values ),
   diagonalsShifts( diagonalsShifts ),
+  hostDiagonalsShifts( hostDiagonalsShifts ),
   indexer( indexer )
 {
 }
@@ -48,7 +50,10 @@ auto
 MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
 getView() -> ViewType
 {
-   return ViewType( this->values.getView(), indexer );
+   return ViewType( const_cast< MultidiagonalMatrixView* >( this )->values.getView(),
+                    const_cast< MultidiagonalMatrixView* >( this )->diagonalsShifts.getView(),
+                    const_cast< MultidiagonalMatrixView* >( this )->hostDiagonalsShifts.getView(),
+                    indexer );
 }
 
 template< typename Real,
@@ -59,7 +64,10 @@ auto
 MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
 getConstView() const -> ConstViewType
 {
-   return ConstViewType( this->values.getConstView(), indexer );
+   return ConstViewType( this->values.getConstView(),
+                         this->diagonalsShifts.getConstView(),
+                         this->hostDiagonalsShifts.getConstView(),
+                         indexer );
 }
 
 template< typename Real,
@@ -208,7 +216,11 @@ void
 MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
 setValue( const RealType& v )
 {
-   this->values = v;
+   const RealType newValue = v;
+   auto f = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType columnIdx, RealType& value ) mutable {
+      value = newValue;
+   };
+   this->forAllRows( f );
 }
 
 template< typename Real,
@@ -220,7 +232,7 @@ auto
 MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
 getRow( const IndexType& rowIdx ) const -> const RowView
 {
-   return RowView( rowIdx, this->values.getView(), this->indexer );
+   return RowView( rowIdx, this->diagonalsShifts.getView(), this->values.getView(), this->indexer );
 }
 
 template< typename Real,
@@ -232,14 +244,14 @@ auto
 MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
 getRow( const IndexType& rowIdx ) -> RowView
 {
-   return RowView( rowIdx, this->values.getView(), this->indexer );
+   return RowView( rowIdx, this->diagonalsShifts.getView(), this->values.getView(), this->indexer );
 }
 
 template< typename Real,
           typename Device,
           typename Index,
           bool RowMajorOrder >
-bool
+void
 MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
 setElement( const IndexType row, const IndexType column, const RealType& value )
 {
@@ -247,21 +259,26 @@ setElement( const IndexType row, const IndexType column, const RealType& value )
    TNL_ASSERT_LT( row, this->getRows(), "" );
    TNL_ASSERT_GE( column, 0, "" );
    TNL_ASSERT_LT( column, this->getColumns(), "" );
-   if( abs( row - column ) > 1 )
+
+   for( IndexType i = 0; i < hostDiagonalsShifts.getSize(); i++ )
+      if( row + hostDiagonalsShifts[ i ] == column )
+      {
+         this->values.setElement( this->getElementIndex( row, i ), value );
+         return;
+      }
+   if( value != 0.0 )
    {
       std::stringstream msg;
-      msg << "Wrong matrix element coordinates ( "  << row << ", " << column << " ) in tridiagonal matrix.";
+      msg << "Wrong matrix element coordinates ( "  << row << ", " << column << " ) in multidiagonal matrix.";
       throw std::logic_error( msg.str() );
    }
-   this->values.setElement( this->getElementIndex( row, column ), value );
-   return true;
 }
 
 template< typename Real,
           typename Device,
           typename Index,
           bool RowMajorOrder >
-bool
+void
 MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
 addElement( const IndexType row,
             const IndexType column,
@@ -272,15 +289,20 @@ addElement( const IndexType row,
    TNL_ASSERT_LT( row, this->getRows(), "" );
    TNL_ASSERT_GE( column, 0, "" );
    TNL_ASSERT_LT( column, this->getColumns(), "" );
-   if( abs( row - column ) > 1 )
+
+   for( IndexType i = 0; i < hostDiagonalsShifts.getSize(); i++ )
+      if( row + hostDiagonalsShifts[ i ] == column )
+      {
+         const Index idx = this->getElementIndex( row, i );
+         this->values.setElement( idx, thisElementMultiplicator * this->values.getElement( idx ) + value );
+         return;
+      }
+   if( value != 0.0 )
    {
       std::stringstream msg;
-      msg << "Wrong matrix element coordinates ( "  << row << ", " << column << " ) in tridiagonal matrix.";
+      msg << "Wrong matrix element coordinates ( "  << row << ", " << column << " ) in multidiagonal matrix.";
       throw std::logic_error( msg.str() );
    }
-   const Index i = this->getElementIndex( row, column );
-   this->values.setElement( i, thisElementMultiplicator * this->values.getElement( i ) + value );
-   return true;
 }
 
 template< typename Real,
@@ -296,9 +318,10 @@ getElement( const IndexType row, const IndexType column ) const
    TNL_ASSERT_GE( column, 0, "" );
    TNL_ASSERT_LT( column, this->getColumns(), "" );
 
-   if( abs( column - row ) > 1 )
-      return 0.0;
-   return this->values.getElement( this->getElementIndex( row, column ) );
+   for( IndexType i = 0; i < hostDiagonalsShifts.getSize(); i++ )
+      if( row + hostDiagonalsShifts[ i ] == column )
+         return this->values.getElement( this->getElementIndex( row, i ) );
+   return 0.0;
 }
 
 template< typename Real,
@@ -326,35 +349,20 @@ rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Ke
 {
    using Real_ = decltype( fetch( IndexType(), IndexType(), RealType() ) );
    const auto values_view = this->values.getConstView();
+   const auto diagonalsShifts_view = this->diagonalsShifts.getConstView();
+   const IndexType diagonalsCount = this->diagonalsShifts.getSize();
+   const IndexType columns = this->getColumns();
    const auto indexer = this->indexer;
    const auto zero = zero_;
    auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
       Real_ sum( zero );
-      if( rowIdx == 0 )
-      {
-         reduce( sum, fetch( 0, 0, values_view[ indexer.getGlobalIndex( 0, 0 ) ] ) );
-         reduce( sum, fetch( 0, 1, values_view[ indexer.getGlobalIndex( 0, 1 ) ] ) );
-         keep( 0, sum );
-         return;
-      }
-      if( rowIdx + 1 < indexer.getColumns() )
-      {
-         reduce( sum, fetch( rowIdx, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ) );
-         reduce( sum, fetch( rowIdx, rowIdx,     values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] ) );
-         reduce( sum, fetch( rowIdx, rowIdx + 1, values_view[ indexer.getGlobalIndex( rowIdx, 2 ) ] ) );
-         keep( rowIdx, sum );
-         return;
-      }
-      if( rowIdx < indexer.getColumns() )
+      for( IndexType localIdx = 0; localIdx < diagonalsCount; localIdx++ )
       {
-         reduce( sum, fetch( rowIdx, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ) );
-         reduce( sum, fetch( rowIdx, rowIdx,     values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] ) );
-         keep( rowIdx, sum );
-      }
-      else
-      {
-         keep( rowIdx, fetch( rowIdx, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ) );
+         const IndexType columnIdx = rowIdx + diagonalsShifts_view[ localIdx ];
+         if( columnIdx >= 0 && columnIdx < columns )
+            reduce( sum, fetch( rowIdx, columnIdx, values_view[ indexer.getGlobalIndex( rowIdx, localIdx ) ] ) );
       }
+      keep( rowIdx, sum );
    };
    Algorithms::ParallelFor< DeviceType >::exec( first, last, f );
 }
@@ -368,7 +376,7 @@ void
 MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
 allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const
 {
-   this->rowsReduction( 0, this->indexer.getNonEmptyRowsCount(), fetch, reduce, keep, zero );
+   this->rowsReduction( 0, this->indexer.getNonemptyRowsCount(), fetch, reduce, keep, zero );
 }
 
 template< typename Real,
@@ -381,26 +389,17 @@ MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
 forRows( IndexType first, IndexType last, Function& function ) const
 {
    const auto values_view = this->values.getConstView();
+   const auto diagonalsShifts_view = this->diagonalsShifts.getConstView();
+   const IndexType diagonalsCount = this->diagonalsShifts.getSize();
+   const IndexType columns = this->getColumns();
    const auto indexer = this->indexer;
    auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
-      if( rowIdx == 0 )
+      for( IndexType localIdx = 0; localIdx < diagonalsCount; localIdx++ )
       {
-         function( 0, 0, 0, values_view[ indexer.getGlobalIndex( 0, 0 ) ] );
-         function( 0, 1, 1, values_view[ indexer.getGlobalIndex( 0, 1 ) ] );
-      } 
-      else if( rowIdx + 1 < indexer.getColumns() )
-      {
-         function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] );
-         function( rowIdx, 1, rowIdx,     values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] );
-         function( rowIdx, 2, rowIdx + 1, values_view[ indexer.getGlobalIndex( rowIdx, 2 ) ] );
+         const IndexType columnIdx = rowIdx + diagonalsShifts_view[ localIdx ];
+         if( columnIdx >= 0 && columnIdx < columns )
+            function( rowIdx, localIdx, columnIdx, values_view[ indexer.getGlobalIndex( rowIdx, localIdx, 0 ) ] );
       }
-      else if( rowIdx < indexer.getColumns() )
-      {
-         function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] );
-         function( rowIdx, 1, rowIdx,     values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] );
-      }
-      else
-         function( rowIdx, 0, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] );
    };
    Algorithms::ParallelFor< DeviceType >::exec( first, last, f );
 }
@@ -415,26 +414,17 @@ MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
 forRows( IndexType first, IndexType last, Function& function )
 {
    auto values_view = this->values.getView();
+   const auto diagonalsShifts_view = this->diagonalsShifts.getConstView();
+   const IndexType diagonalsCount = this->diagonalsShifts.getSize();
+   const IndexType columns = this->getColumns();
    const auto indexer = this->indexer;
    auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
-      if( rowIdx == 0 )
-      {
-         function( 0, 0, 0, values_view[ indexer.getGlobalIndex( 0, 0 ) ] );
-         function( 0, 1, 1, values_view[ indexer.getGlobalIndex( 0, 1 ) ] );
-      } 
-      else if( rowIdx + 1 < indexer.getColumns() )
-      {
-         function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] );
-         function( rowIdx, 1, rowIdx,     values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] );
-         function( rowIdx, 2, rowIdx + 1, values_view[ indexer.getGlobalIndex( rowIdx, 2 ) ] );
-      }
-      else if( rowIdx < indexer.getColumns() )
+      for( IndexType localIdx = 0; localIdx < diagonalsCount; localIdx++ )
       {
-         function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] );
-         function( rowIdx, 1, rowIdx,     values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] );
+         const IndexType columnIdx = rowIdx + diagonalsShifts_view[ localIdx ];
+         if( columnIdx >= 0 && columnIdx < columns )
+            function( rowIdx, localIdx, columnIdx, values_view[ indexer.getGlobalIndex( rowIdx, localIdx ) ] );
       }
-      else
-         function( rowIdx, 0, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] );
    };
    Algorithms::ParallelFor< DeviceType >::exec( first, last, f );
 }
@@ -460,7 +450,7 @@ void
 MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
 forAllRows( Function& function )
 {
-   this->forRows( 0, this->indexer.getNonEmptyRowsCount(), function );
+   this->forRows( 0, this->indexer.getNonemptyRowsCount(), function );
 }
 
 template< typename Real,
@@ -517,7 +507,7 @@ addMatrix( const MultidiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_
    TNL_ASSERT_EQ( this->getRows(), matrix.getRows(), "Matrices rows are not equal." );
    TNL_ASSERT_EQ( this->getColumns(), matrix.getColumns(), "Matrices columns are not equal." );
 
-   if( RowMajorOrder == RowMajorOrder_ )
+   /*if( RowMajorOrder == RowMajorOrder_ )
    {
       if( thisMatrixMultiplicator == 1.0 )
          this->values += matrixMultiplicator * matrix.getValues();
@@ -544,7 +534,7 @@ addMatrix( const MultidiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_
          this->forAllRows( add1 );
       else
          this->forAllRows( addGen );
-   }
+   }*/
 }
 
 #ifdef HAVE_CUDA
@@ -673,12 +663,19 @@ template< typename Real,
           bool RowMajorOrder >
 void MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::print( std::ostream& str ) const
 {
-   for( IndexType row = 0; row < this->getRows(); row++ )
+   for( IndexType rowIdx = 0; rowIdx < this->getRows(); rowIdx++ )
    {
-      str <<"Row: " << row << " -> ";
-      for( IndexType column = row - 1; column < row + 2; column++ )
-         if( column >= 0 && column < this->columns )
-            str << " Col:" << column << "->" << this->getElement( row, column ) << "\t";
+      str <<"Row: " << rowIdx << " -> ";
+      for( IndexType localIdx = 0; localIdx < this->hostDiagonalsShifts.getSize(); localIdx++ )
+      {
+         const IndexType columnIdx = rowIdx + this->hostDiagonalsShifts[ localIdx ];
+         if( columnIdx >= 0 && columnIdx < this->columns )
+         {
+            auto v = this->values.getElement( this->indexer.getGlobalIndex( rowIdx, localIdx ) );
+            if( v )
+               str << " Col:" << columnIdx << "->" << v  << "\t";
+         }
+      }
       str << std::endl;
    }
 }
@@ -713,15 +710,8 @@ template< typename Real,
           bool RowMajorOrder >
 __cuda_callable__
 Index MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
-getElementIndex( const IndexType row, const IndexType column ) const
+getElementIndex( const IndexType row, const IndexType localIdx ) const
 {
-   IndexType localIdx = column - row;
-   if( row > 0 )
-      localIdx++;
-
-   TNL_ASSERT_GE( localIdx, 0, "" );
-   TNL_ASSERT_LT( localIdx, 3, "" );
-
    return this->indexer.getGlobalIndex( row, localIdx );
 }
 
diff --git a/src/TNL/Matrices/Tridiagonal.h b/src/TNL/Matrices/Tridiagonal.h
index e7e3ab6b2..82549e744 100644
--- a/src/TNL/Matrices/Tridiagonal.h
+++ b/src/TNL/Matrices/Tridiagonal.h
@@ -97,11 +97,11 @@ class Tridiagonal : public Matrix< Real, Device, Index, RealAllocator >
 
       void setValue( const RealType& v );
 
-      bool setElement( const IndexType row,
+      void setElement( const IndexType row,
                        const IndexType column,
                        const RealType& value );
 
-      bool addElement( const IndexType row,
+      void addElement( const IndexType row,
                        const IndexType column,
                        const RealType& value,
                        const RealType& thisElementMultiplicator = 1.0 );
diff --git a/src/TNL/Matrices/Tridiagonal.hpp b/src/TNL/Matrices/Tridiagonal.hpp
index 6c09238ff..41d722c6a 100644
--- a/src/TNL/Matrices/Tridiagonal.hpp
+++ b/src/TNL/Matrices/Tridiagonal.hpp
@@ -285,11 +285,11 @@ template< typename Real,
           typename Index,
           bool RowMajorOrder,
           typename RealAllocator >
-bool
+void
 Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
 setElement( const IndexType row, const IndexType column, const RealType& value )
 {
-   return this->view.setElement( row, column, value );
+   this->view.setElement( row, column, value );
 }
 
 template< typename Real,
@@ -297,14 +297,14 @@ template< typename Real,
           typename Index,
           bool RowMajorOrder,
           typename RealAllocator >
-bool
+void
 Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
 addElement( const IndexType row,
             const IndexType column,
             const RealType& value,
             const RealType& thisElementMultiplicator )
 {
-   return this->view.addElement( row, column, value, thisElementMultiplicator );
+   this->view.addElement( row, column, value, thisElementMultiplicator );
 }
 
 template< typename Real,
@@ -645,14 +645,7 @@ void
 Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
 print( std::ostream& str ) const
 {
-   for( IndexType row = 0; row < this->getRows(); row++ )
-   {
-      str <<"Row: " << row << " -> ";
-      for( IndexType column = row - 1; column < row + 2; column++ )
-         if( column >= 0 && column < this->columns )
-            str << " Col:" << column << "->" << this->getElement( row, column ) << "\t";
-      str << std::endl;
-   }
+   this->view.print( str );
 }
 
 template< typename Real,
diff --git a/src/TNL/Matrices/TridiagonalMatrixView.h b/src/TNL/Matrices/TridiagonalMatrixView.h
index 128b48494..7db517cbd 100644
--- a/src/TNL/Matrices/TridiagonalMatrixView.h
+++ b/src/TNL/Matrices/TridiagonalMatrixView.h
@@ -81,11 +81,11 @@ class TridiagonalMatrixView : public MatrixView< Real, Device, Index >
 
       void setValue( const RealType& v );
 
-      bool setElement( const IndexType row,
+      void setElement( const IndexType row,
                        const IndexType column,
                        const RealType& value );
 
-      bool addElement( const IndexType row,
+      void addElement( const IndexType row,
                        const IndexType column,
                        const RealType& value,
                        const RealType& thisElementMultiplicator = 1.0 );
diff --git a/src/TNL/Matrices/TridiagonalMatrixView.hpp b/src/TNL/Matrices/TridiagonalMatrixView.hpp
index 4d4950c4e..e851d2a1f 100644
--- a/src/TNL/Matrices/TridiagonalMatrixView.hpp
+++ b/src/TNL/Matrices/TridiagonalMatrixView.hpp
@@ -213,7 +213,7 @@ template< typename Real,
           typename Device,
           typename Index,
           bool RowMajorOrder >
-bool
+void
 TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
 setElement( const IndexType row, const IndexType column, const RealType& value )
 {
@@ -235,7 +235,7 @@ template< typename Real,
           typename Device,
           typename Index,
           bool RowMajorOrder >
-bool
+void
 TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
 addElement( const IndexType row,
             const IndexType column,
@@ -638,7 +638,11 @@ void TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::print( std::os
       str <<"Row: " << row << " -> ";
       for( IndexType column = row - 1; column < row + 2; column++ )
          if( column >= 0 && column < this->columns )
-            str << " Col:" << column << "->" << this->getElement( row, column ) << "\t";
+         {
+            auto v = this->getElement( row, column );
+            if( v )
+               str << " Col:" << column << "->" << v << "\t";
+         }
       str << std::endl;
    }
 }
diff --git a/src/TNL/Matrices/details/MultidiagonalMatrixIndexer.h b/src/TNL/Matrices/details/MultidiagonalMatrixIndexer.h
index 0f0436d74..3597c30f7 100644
--- a/src/TNL/Matrices/details/MultidiagonalMatrixIndexer.h
+++ b/src/TNL/Matrices/details/MultidiagonalMatrixIndexer.h
@@ -77,6 +77,9 @@ class MultidiagonalMatrixIndexer
       __cuda_callable__
       const IndexType& getColumns() const { return this->columns; };
 
+      __cuda_callable__
+      const IndexType& getDiagonals() const { return this->diagonals; };
+
       __cuda_callable__
       const IndexType& getNonemptyRowsCount() const { return this->nonemptyRows; };
 
diff --git a/src/UnitTests/Matrices/MultidiagonalMatrixTest.h b/src/UnitTests/Matrices/MultidiagonalMatrixTest.h
index cb9916e4c..514ea39e0 100644
--- a/src/UnitTests/Matrices/MultidiagonalMatrixTest.h
+++ b/src/UnitTests/Matrices/MultidiagonalMatrixTest.h
@@ -137,7 +137,6 @@ void test_GetNonemptyRowsCount()
    Matrix m3( 8, 5, DiagonalsShiftsType({ -2, 0, 3, 5 }) );
    m3.setValue( 1.0 );
    EXPECT_EQ( m3.getNonemptyRowsCount(), 7 );
-
 }
 
 template< typename Matrix >
@@ -148,87 +147,53 @@ void test_GetCompressedRowLengths()
    using IndexType = typename Matrix::IndexType;
    using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType;
 
-   const IndexType rows = 10;
-   const IndexType cols = 11;
-
-   Matrix m( rows, cols );
-
-   // Insert values into the rows.
-   RealType value = 1;
-
-   for( IndexType i = 0; i < 2; i++ )  // 0th row -> 2 elements
-      m.setElement( 0, i, value++ );
-
-   for( IndexType i = 0; i < 3; i++ )  // 1st row -> 3 elements
-      m.setElement( 1, i, value++ );
-
-   for( IndexType i = 1; i < 3; i++ )  // 2nd row -> 2 elements
-      m.setElement( 2, i, value++ );
-
-   for( IndexType i = 2; i < 5; i++ )  // 3rd row -> 3 elements
-      m.setElement( 3, i, value++ );
-
-   for( IndexType i = 3; i < 6; i++ )  // 4th row -> 3 elements
-      m.setElement( 4, i, value++ );
-
-   for( IndexType i = 4; i < 6; i++ )  // 5th row -> 2 elements
-      m.setElement( 5, i, value++ );
-
-   for( IndexType i = 5; i < 8; i++ )  // 6th row -> 3 elements
-      m.setElement( 6, i, value++ );
-
-   for( IndexType i = 6; i < 8; i++ )  // 7th row -> 2 elements
-      m.setElement( 7, i, value++ );
-
-   for( IndexType i = 7; i < 10; i++ ) // 8th row -> 3 elements
-      m.setElement( 8, i, value++ );
+   /*
+    * Sets up the following 8x8 matrix:
+    *
+    *    /  0  0  0  1  0  1  0  0 \  -> 2
+    *    |  0  1  0  0  1  0  1  0 |  -> 3
+    *    |  1  0  1  0  0  1  0  1 |  -> 4
+    *    |  0  1  0  1  0  0  1  0 |  -> 3
+    *    |  0  0  1  0  1  0  0  1 |  -> 3
+    *    |  0  0  0  1  0  1  0  0 |  -> 2
+    *    |  0  0  0  0  1  0  1  0 |  -> 2
+    *    \  0  0  0  0  0  1  0  0 /  -> 1
+    */
+   
+   const IndexType rows = 8;
+   const IndexType cols = 8;
 
-   for( IndexType i = 8; i < 11; i++ ) // 9th row -> 3 elements
-      m.setElement( 9, i, value++ );
+   Matrix m( rows, cols, DiagonalsShiftsType({ -2, 0, 3, 5 }) );
+   m.setValue( 1.0 );
+   m.setElement( 0, 0, 0.0 );
+   m.setElement( 7, 7, 0.0 );
 
    typename Matrix::CompressedRowLengthsVector rowLengths( rows );
    rowLengths = 0;
    m.getCompressedRowLengths( rowLengths );
-   typename Matrix::CompressedRowLengthsVector correctRowLengths{ 2, 3, 2, 3, 3, 2, 3, 2, 3, 3 };
+   typename Matrix::CompressedRowLengthsVector correctRowLengths{ 2, 3, 4, 3, 3, 2, 2, 1 };
    EXPECT_EQ( rowLengths, correctRowLengths );
 }
 
-template< typename Matrix >
-void test_GetRowLength()
-{
-   using RealType = typename Matrix::RealType;
-   using DeviceType = typename Matrix::DeviceType;
-   using IndexType = typename Matrix::IndexType;
-   using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType;
-
-   const IndexType rows = 8;
-   const IndexType cols = 7;
-
-   Matrix m( rows, cols );
-
-   EXPECT_EQ( m.getRowLength( 0 ), 2 );
-   EXPECT_EQ( m.getRowLength( 1 ), 3 );
-   EXPECT_EQ( m.getRowLength( 2 ), 3 );
-   EXPECT_EQ( m.getRowLength( 3 ), 3 );
-   EXPECT_EQ( m.getRowLength( 4 ), 3 );
-   EXPECT_EQ( m.getRowLength( 5 ), 3 );
-   EXPECT_EQ( m.getRowLength( 6 ), 2 );
-   EXPECT_EQ( m.getRowLength( 7 ), 1 );
-}
-
 template< typename Matrix >
 void test_GetAllocatedElementsCount()
 {
    using RealType = typename Matrix::RealType;
    using DeviceType = typename Matrix::DeviceType;
    using IndexType = typename Matrix::IndexType;
+   using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType;
 
    const IndexType rows = 7;
    const IndexType cols = 6;
 
-   Matrix m( rows, cols );
+   Matrix m1( 7, 6, DiagonalsShiftsType( { -2, 0, 3, 5 } ) );
+   EXPECT_EQ( m1.getAllocatedElementsCount(), 28 );
+
+   Matrix m2( 8, 6, DiagonalsShiftsType( { -2, 0, 3, 5 } ) );
+   EXPECT_EQ( m2.getAllocatedElementsCount(), 32 );
 
-   EXPECT_EQ( m.getAllocatedElementsCount(), 21 );
+   Matrix m3( 9, 6, DiagonalsShiftsType( { -2, 0, 3, 5 } ) );
+   EXPECT_EQ( m3.getAllocatedElementsCount(), 32 );
 }
 
 template< typename Matrix >
@@ -237,29 +202,27 @@ void test_GetNumberOfNonzeroMatrixElements()
    using RealType = typename Matrix::RealType;
    using DeviceType = typename Matrix::DeviceType;
    using IndexType = typename Matrix::IndexType;
+   using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType;
 
    /*
     * Sets up the following 7x6 matrix:
     *
-    *    /  0  1  0  0  0  0 \
-    *    |  2  3  4  0  0  0 |
-    *    |  0  5  6  7  0  0 |
-    *    |  0  0  8  9 10  0 |
-    *    |  0  0  0 11 12 13 |
-    *    |  0  0  0  0 14  0 |
-    *    \  0  0  0  0  0 16 /
+    *    /  0  0  1  0  1  0 \ -> 2
+    *    |  0  1  0  1  0  1 | -> 3
+    *    |  0  0  1  0  1  0 | -> 2
+    *    |  1  0  0  1  0  1 | -> 3
+    *    |  0  1  0  0  1  0 | -> 2
+    *    |  0  0  1  0  0  1 | -> 2
+    *    \  0  0  0  1  0  0 / -> 1
+    *                           ----
+    *                            15
     */
    const IndexType rows = 7;
    const IndexType cols = 6;
 
-   Matrix m( rows, cols );
-
-   RealType value = 0;
-   for( IndexType i = 0; i < rows; i++ )
-      for( IndexType j = TNL::max( 0, i - 1 ); j < TNL::min( cols, i + 2 ); j++ )
-         m.setElement( i, j, value++ );
-
-   m.setElement( 5, 5, 0);
+   Matrix m( rows, cols, DiagonalsShiftsType( { -3, 0, 2, 4 } ) );
+   m.setValue( 1.0 );
+   m.setElement( 0, 0, 0.0 );
 
    EXPECT_EQ( m.getNumberOfNonzeroMatrixElements(), 15 );
 }
@@ -270,6 +233,7 @@ void test_Reset()
    using RealType = typename Matrix::RealType;
    using DeviceType = typename Matrix::DeviceType;
    using IndexType = typename Matrix::IndexType;
+   using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType;
 
    /*
     * Sets up the following 5x4 matrix:
@@ -283,8 +247,7 @@ void test_Reset()
    const IndexType rows = 5;
    const IndexType cols = 4;
 
-   Matrix m( rows, cols );
-
+   Matrix m( rows, cols, DiagonalsShiftsType( { 0, 1, 2, 4 } ) );
    m.reset();
 
    EXPECT_EQ( m.getRows(), 0 );
@@ -297,130 +260,73 @@ void test_SetValue()
    using RealType = typename Matrix::RealType;
    using DeviceType = typename Matrix::DeviceType;
    using IndexType = typename Matrix::IndexType;
+   using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType;
 
    /*
     * Sets up the following 7x6 matrix:
     *
-    *    /  0  1  0  0  0  0 \
-    *    |  2  3  4  0  0  0 |
-    *    |  0  5  6  7  0  0 |
-    *    |  0  0  8  9 10  0 |
-    *    |  0  0  0 11 12 13 |
-    *    |  0  0  0  0 14  0 |
-    *    \  0  0  0  0  0 16 /
+    *    /  1  0  1  0  1  0 \
+    *    |  0  1  0  1  0  1 |
+    *    |  0  0  1  0  1  0 |
+    *    |  1  0  0  1  0  1 |
+    *    |  0  1  0  0  1  0 |
+    *    |  0  0  1  0  0  1 |
+    *    \  0  0  0  1  0  0 /
     */
    const IndexType rows = 7;
    const IndexType cols = 6;
 
-   Matrix m( rows, cols );
-
-   RealType value = 0;
-   for( IndexType i = 0; i < rows; i++ )
-      for( IndexType j = TNL::max( 0, i - 1 ); j < TNL::min( cols, i + 2 ); j++ )
-         m.setElement( i, j, value++ );
-
-   m.setElement( 5, 5, 0);
-
-   EXPECT_EQ( m.getElement( 0, 0 ),  0 );
-   EXPECT_EQ( m.getElement( 0, 1 ),  1 );
-   EXPECT_EQ( m.getElement( 0, 2 ),  0 );
-   EXPECT_EQ( m.getElement( 0, 3 ),  0 );
-   EXPECT_EQ( m.getElement( 0, 4 ),  0 );
-   EXPECT_EQ( m.getElement( 0, 5 ),  0 );
-
-   EXPECT_EQ( m.getElement( 1, 0 ),  2 );
-   EXPECT_EQ( m.getElement( 1, 1 ),  3 );
-   EXPECT_EQ( m.getElement( 1, 2 ),  4 );
-   EXPECT_EQ( m.getElement( 1, 3 ),  0 );
-   EXPECT_EQ( m.getElement( 1, 4 ),  0 );
-   EXPECT_EQ( m.getElement( 1, 5 ),  0 );
-
-   EXPECT_EQ( m.getElement( 2, 0 ),  0 );
-   EXPECT_EQ( m.getElement( 2, 1 ),  5 );
-   EXPECT_EQ( m.getElement( 2, 2 ),  6 );
-   EXPECT_EQ( m.getElement( 2, 3 ),  7 );
-   EXPECT_EQ( m.getElement( 2, 4 ),  0 );
-   EXPECT_EQ( m.getElement( 2, 5 ),  0 );
-
-   EXPECT_EQ( m.getElement( 3, 0 ),  0 );
-   EXPECT_EQ( m.getElement( 3, 1 ),  0 );
-   EXPECT_EQ( m.getElement( 3, 2 ),  8 );
-   EXPECT_EQ( m.getElement( 3, 3 ),  9 );
-   EXPECT_EQ( m.getElement( 3, 4 ), 10 );
-   EXPECT_EQ( m.getElement( 3, 5 ),  0 );
-
-   EXPECT_EQ( m.getElement( 4, 0 ),  0 );
-   EXPECT_EQ( m.getElement( 4, 1 ),  0 );
-   EXPECT_EQ( m.getElement( 4, 2 ),  0 );
-   EXPECT_EQ( m.getElement( 4, 3 ), 11 );
-   EXPECT_EQ( m.getElement( 4, 4 ), 12 );
-   EXPECT_EQ( m.getElement( 4, 5 ), 13 );
-
-   EXPECT_EQ( m.getElement( 5, 0 ),  0 );
-   EXPECT_EQ( m.getElement( 5, 1 ),  0 );
-   EXPECT_EQ( m.getElement( 5, 2 ),  0 );
-   EXPECT_EQ( m.getElement( 5, 3 ),  0 );
-   EXPECT_EQ( m.getElement( 5, 4 ), 14 );
-   EXPECT_EQ( m.getElement( 5, 5 ),  0 );
-
-   EXPECT_EQ( m.getElement( 6, 0 ),  0 );
-   EXPECT_EQ( m.getElement( 6, 1 ),  0 );
-   EXPECT_EQ( m.getElement( 6, 2 ),  0 );
-   EXPECT_EQ( m.getElement( 6, 3 ),  0 );
-   EXPECT_EQ( m.getElement( 6, 4 ),  0 );
-   EXPECT_EQ( m.getElement( 6, 5 ), 16 );
-
-   // Set the values of all elements to a certain number
-   m.setValue( 42 );
-
-   EXPECT_EQ( m.getElement( 0, 0 ), 42 );
-   EXPECT_EQ( m.getElement( 0, 1 ), 42 );
-   EXPECT_EQ( m.getElement( 0, 2 ),  0 );
-   EXPECT_EQ( m.getElement( 0, 3 ),  0 );
-   EXPECT_EQ( m.getElement( 0, 4 ),  0 );
-   EXPECT_EQ( m.getElement( 0, 5 ),  0 );
-
-   EXPECT_EQ( m.getElement( 1, 0 ), 42 );
-   EXPECT_EQ( m.getElement( 1, 1 ), 42 );
-   EXPECT_EQ( m.getElement( 1, 2 ), 42 );
-   EXPECT_EQ( m.getElement( 1, 3 ),  0 );
-   EXPECT_EQ( m.getElement( 1, 4 ),  0 );
-   EXPECT_EQ( m.getElement( 1, 5 ),  0 );
-
-   EXPECT_EQ( m.getElement( 2, 0 ),  0 );
-   EXPECT_EQ( m.getElement( 2, 1 ), 42 );
-   EXPECT_EQ( m.getElement( 2, 2 ), 42 );
-   EXPECT_EQ( m.getElement( 2, 3 ), 42 );
-   EXPECT_EQ( m.getElement( 2, 4 ),  0 );
-   EXPECT_EQ( m.getElement( 2, 5 ),  0 );
-
-   EXPECT_EQ( m.getElement( 3, 0 ),  0 );
-   EXPECT_EQ( m.getElement( 3, 1 ),  0 );
-   EXPECT_EQ( m.getElement( 3, 2 ), 42 );
-   EXPECT_EQ( m.getElement( 3, 3 ), 42 );
-   EXPECT_EQ( m.getElement( 3, 4 ), 42 );
-   EXPECT_EQ( m.getElement( 3, 5 ),  0 );
-
-   EXPECT_EQ( m.getElement( 4, 0 ),  0 );
-   EXPECT_EQ( m.getElement( 4, 1 ),  0 );
-   EXPECT_EQ( m.getElement( 4, 2 ),  0 );
-   EXPECT_EQ( m.getElement( 4, 3 ), 42 );
-   EXPECT_EQ( m.getElement( 4, 4 ), 42 );
-   EXPECT_EQ( m.getElement( 4, 5 ), 42 );
-
-   EXPECT_EQ( m.getElement( 5, 0 ),  0 );
-   EXPECT_EQ( m.getElement( 5, 1 ),  0 );
-   EXPECT_EQ( m.getElement( 5, 2 ),  0 );
-   EXPECT_EQ( m.getElement( 5, 3 ),  0 );
-   EXPECT_EQ( m.getElement( 5, 4 ), 42 );
-   EXPECT_EQ( m.getElement( 5, 5 ), 42 );
-
-   EXPECT_EQ( m.getElement( 6, 0 ),  0 );
-   EXPECT_EQ( m.getElement( 6, 1 ),  0 );
-   EXPECT_EQ( m.getElement( 6, 2 ),  0 );
-   EXPECT_EQ( m.getElement( 6, 3 ),  0 );
-   EXPECT_EQ( m.getElement( 6, 4 ),  0 );
-   EXPECT_EQ( m.getElement( 6, 5 ), 42 );
+   Matrix m( rows, cols, DiagonalsShiftsType( { -3, 0, 2, 4 } ) );
+   m.setValue( 1.0 );
+
+   EXPECT_EQ( m.getElement( 0, 0 ), 1 );
+   EXPECT_EQ( m.getElement( 0, 1 ), 0 );
+   EXPECT_EQ( m.getElement( 0, 2 ), 1 );
+   EXPECT_EQ( m.getElement( 0, 3 ), 0 );
+   EXPECT_EQ( m.getElement( 0, 4 ), 1 );
+   EXPECT_EQ( m.getElement( 0, 5 ), 0 );
+
+   EXPECT_EQ( m.getElement( 1, 0 ), 0 );
+   EXPECT_EQ( m.getElement( 1, 1 ), 1 );
+   EXPECT_EQ( m.getElement( 1, 2 ), 0 );
+   EXPECT_EQ( m.getElement( 1, 3 ), 1 );
+   EXPECT_EQ( m.getElement( 1, 4 ), 0 );
+   EXPECT_EQ( m.getElement( 1, 5 ), 1 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ), 0 );
+   EXPECT_EQ( m.getElement( 2, 1 ), 0 );
+   EXPECT_EQ( m.getElement( 2, 2 ), 1 );
+   EXPECT_EQ( m.getElement( 2, 3 ), 0 );
+   EXPECT_EQ( m.getElement( 2, 4 ), 1 );
+   EXPECT_EQ( m.getElement( 2, 5 ), 0 );
+
+   EXPECT_EQ( m.getElement( 3, 0 ), 1 );
+   EXPECT_EQ( m.getElement( 3, 1 ), 0 );
+   EXPECT_EQ( m.getElement( 3, 2 ), 0 );
+   EXPECT_EQ( m.getElement( 3, 3 ), 1 );
+   EXPECT_EQ( m.getElement( 3, 4 ), 0 );
+   EXPECT_EQ( m.getElement( 3, 5 ), 1 );
+
+   EXPECT_EQ( m.getElement( 4, 0 ), 0 );
+   EXPECT_EQ( m.getElement( 4, 1 ), 1 );
+   EXPECT_EQ( m.getElement( 4, 2 ), 0 );
+   EXPECT_EQ( m.getElement( 4, 3 ), 0 );
+   EXPECT_EQ( m.getElement( 4, 4 ), 1 );
+   EXPECT_EQ( m.getElement( 4, 5 ), 0 );
+
+   EXPECT_EQ( m.getElement( 5, 0 ), 0 );
+   EXPECT_EQ( m.getElement( 5, 1 ), 0 );
+   EXPECT_EQ( m.getElement( 5, 2 ), 1 );
+   EXPECT_EQ( m.getElement( 5, 3 ), 0 );
+   EXPECT_EQ( m.getElement( 5, 4 ), 0 );
+   EXPECT_EQ( m.getElement( 5, 5 ), 1 );
+
+   EXPECT_EQ( m.getElement( 6, 0 ), 0 );
+   EXPECT_EQ( m.getElement( 6, 1 ), 0 );
+   EXPECT_EQ( m.getElement( 6, 2 ), 0 );
+   EXPECT_EQ( m.getElement( 6, 3 ), 1 );
+   EXPECT_EQ( m.getElement( 6, 4 ), 0 );
+   EXPECT_EQ( m.getElement( 6, 5 ), 0 );
 }
 
 template< typename Matrix >
@@ -429,61 +335,70 @@ void test_SetElement()
    using RealType = typename Matrix::RealType;
    using DeviceType = typename Matrix::DeviceType;
    using IndexType = typename Matrix::IndexType;
+   using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType;
 
    /*
     * Sets up the following 5x5 matrix:
     *
-    *    /  1  2  0  0  0 \
-    *    |  6  7  8  0  0 |
-    *    |  0 12 13 14  0 |
-    *    |  0  0 18 19 20 |
-    *    \  0  0  0 24 25 /
+    *    /  1  2  0  0  5 \
+    *    |  0  7  8  0  0 |
+    *    |  0  0 13 14  0 |
+    *    | 16  0  0 19 20 |
+    *    \  0 22  0  0 25 /
     */
    const IndexType rows = 5;
    const IndexType cols = 5;
-
-   Matrix m( rows, cols );
+   DiagonalsShiftsType diagonals{-3, 0, 1, 4 };
+   Matrix m( rows, cols, diagonals );
 
    RealType value = 1;
    for( IndexType i = 0; i < rows; i++ )
       for( IndexType j = 0; j < cols; j++ )
       {
-         if( abs( i - j ) > 1 )
+         bool found( false );
+         for( IndexType k = 0; k < diagonals.getSize(); k++ )
+         {
+            if( i + diagonals[ k ] == j )
+            {
+               m.setElement( i, j, value++ );
+               found = true;
+               break;
+            }
+         }
+         if( ! found )
          {
             EXPECT_THROW( m.setElement( i, j, value++ ), std::logic_error );
          }
-         else
-            m.setElement( i, j, value++ );
       }
 
    EXPECT_EQ( m.getElement( 0, 0 ),  1 );
    EXPECT_EQ( m.getElement( 0, 1 ),  2 );
    EXPECT_EQ( m.getElement( 0, 2 ),  0 );
    EXPECT_EQ( m.getElement( 0, 3 ),  0 );
-   EXPECT_EQ( m.getElement( 0, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 4 ),  5 );
 
-   EXPECT_EQ( m.getElement( 1, 0 ),  6 );
+   EXPECT_EQ( m.getElement( 1, 0 ),  0 );
    EXPECT_EQ( m.getElement( 1, 1 ),  7 );
    EXPECT_EQ( m.getElement( 1, 2 ),  8 );
    EXPECT_EQ( m.getElement( 1, 3 ),  0 );
    EXPECT_EQ( m.getElement( 1, 4 ),  0 );
 
    EXPECT_EQ( m.getElement( 2, 0 ),  0 );
-   EXPECT_EQ( m.getElement( 2, 1 ), 12 );
+   EXPECT_EQ( m.getElement( 2, 1 ),  0 );
    EXPECT_EQ( m.getElement( 2, 2 ), 13 );
    EXPECT_EQ( m.getElement( 2, 3 ), 14 );
    EXPECT_EQ( m.getElement( 2, 4 ),  0 );
 
-   EXPECT_EQ( m.getElement( 3, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 0 ), 16 );
    EXPECT_EQ( m.getElement( 3, 1 ),  0 );
-   EXPECT_EQ( m.getElement( 3, 2 ), 18 );
+   EXPECT_EQ( m.getElement( 3, 2 ),  0 );
    EXPECT_EQ( m.getElement( 3, 3 ), 19 );
    EXPECT_EQ( m.getElement( 3, 4 ), 20 );
 
    EXPECT_EQ( m.getElement( 4, 0 ),  0 );
-   EXPECT_EQ( m.getElement( 4, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 1 ), 22 );
    EXPECT_EQ( m.getElement( 4, 2 ),  0 );
-   EXPECT_EQ( m.getElement( 4, 3 ), 24 );
+   EXPECT_EQ( m.getElement( 4, 3 ),  0 );
    EXPECT_EQ( m.getElement( 4, 4 ), 25 );
 }
 
@@ -493,123 +408,137 @@ void test_AddElement()
    using RealType = typename Matrix::RealType;
    using DeviceType = typename Matrix::DeviceType;
    using IndexType = typename Matrix::IndexType;
+   using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType;
 
    /*
-    * Sets up the following 6x5 matrix:
+    * Sets up the following 5x5 matrix:
     *
-    *    /  1  2  0  0  0 \
-    *    |  6  7  8  0  0 |
-    *    |  0 12 13 14  0 |
-    *    |  0  0 18 19 20 |
-    *    |  0  0  0 24 25 |
-    *    \  0  0  0  0 30 /
+    *    /  1  2  0  0  5 \
+    *    |  0  7  8  0  0 |
+    *    |  0  0 13 14  0 |
+    *    |  0  0  0 19 20 |
+    *    \  0  0  0  0 25 /
     */
-
-   const IndexType rows = 6;
+   const IndexType rows = 5;
    const IndexType cols = 5;
+   DiagonalsShiftsType diagonals{-3, 0, 1, 4 };
+   Matrix m( rows, cols, diagonals );
 
-   Matrix m( rows, cols );
-
-    RealType value = 1;
-    for( IndexType i = 0; i < rows; i++ )
-        for( IndexType j = 0; j < cols; j++ )
-        {
-           if( abs( i - j ) <= 1 )
-               m.setElement( i, j, value );
-           value++;
-        }
+   RealType value = 1;
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < cols; j++ )
+      {
+         bool found( false );
+         for( IndexType k = 0; k < diagonals.getSize(); k++ )
+         {
+            if( i + diagonals[ k ] == j )
+            {
+               if( j >= i )
+                  m.setElement( i, j, value++ );
+               else value++;
+               found = true;
+               break;
+            }
+         }
+         if( ! found )
+         {
+            EXPECT_THROW( m.setElement( i, j, value++ ), std::logic_error );
+         }
+      }
 
    // Check the added elements
    EXPECT_EQ( m.getElement( 0, 0 ),  1 );
    EXPECT_EQ( m.getElement( 0, 1 ),  2 );
    EXPECT_EQ( m.getElement( 0, 2 ),  0 );
    EXPECT_EQ( m.getElement( 0, 3 ),  0 );
-   EXPECT_EQ( m.getElement( 0, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 4 ),  5 );
 
-   EXPECT_EQ( m.getElement( 1, 0 ),  6 );
+   EXPECT_EQ( m.getElement( 1, 0 ),  0 );
    EXPECT_EQ( m.getElement( 1, 1 ),  7 );
    EXPECT_EQ( m.getElement( 1, 2 ),  8 );
    EXPECT_EQ( m.getElement( 1, 3 ),  0 );
    EXPECT_EQ( m.getElement( 1, 4 ),  0 );
 
    EXPECT_EQ( m.getElement( 2, 0 ),  0 );
-   EXPECT_EQ( m.getElement( 2, 1 ), 12 );
+   EXPECT_EQ( m.getElement( 2, 1 ),  0 );
    EXPECT_EQ( m.getElement( 2, 2 ), 13 );
    EXPECT_EQ( m.getElement( 2, 3 ), 14 );
    EXPECT_EQ( m.getElement( 2, 4 ),  0 );
 
    EXPECT_EQ( m.getElement( 3, 0 ),  0 );
    EXPECT_EQ( m.getElement( 3, 1 ),  0 );
-   EXPECT_EQ( m.getElement( 3, 2 ), 18 );
+   EXPECT_EQ( m.getElement( 3, 2 ),  0 );
    EXPECT_EQ( m.getElement( 3, 3 ), 19 );
    EXPECT_EQ( m.getElement( 3, 4 ), 20 );
 
    EXPECT_EQ( m.getElement( 4, 0 ),  0 );
    EXPECT_EQ( m.getElement( 4, 1 ),  0 );
    EXPECT_EQ( m.getElement( 4, 2 ),  0 );
-   EXPECT_EQ( m.getElement( 4, 3 ), 24 );
+   EXPECT_EQ( m.getElement( 4, 3 ),  0 );
    EXPECT_EQ( m.getElement( 4, 4 ), 25 );
 
-   EXPECT_EQ( m.getElement( 5, 0 ),  0 );
-   EXPECT_EQ( m.getElement( 5, 1 ),  0 );
-   EXPECT_EQ( m.getElement( 5, 2 ),  0 );
-   EXPECT_EQ( m.getElement( 5, 3 ),  0 );
-   EXPECT_EQ( m.getElement( 5, 4 ), 30 );
-
    // Add new elements to the old elements with a multiplying factor applied to the old elements.
    /*
     * The following setup results in the following 6x5 matrix:
     *
-    *     /  1  2  0  0  0 \    /  1  2  0  0  0 \   /  3  6  0  0  0 \
-    *     |  6  7  8  0  0 |    |  3  4  5  0  0 |   | 15 18 21  0  0 |
-    * 2 * |  0 12 13 14  0 |  + |  0  6  7  8  0 | = |  0 30 33 36  0 |
-    *     |  0  0 18 19 20 |    |  0  0  9 10 11 |   |  0  0 45 48 51 |
-    *     |  0  0  0 24 25 |    |  0  0  0 12 13 |   |  0  0  0 60 63 |
-    *     \  0  0  0  0 30 /    \  0  0  0  0 14 /   \  0  0  0  0 74 /
+    *     /  1  2  0  0  5 \   /  1  2  0  0  5 \    /  3  6  0  0 15 \
+    *     |  0  7  8  0  0 |   |  0  7  8  0  0 |    |  0 21 24  0  0 |
+    * 2 * |  0  0 13 14  0 | + |  0  0 13 14  0 | =  |  0  0 39 42  0 |
+    *     |  0  0  0 19 20 |   | 16  0  0 19 20 |    | 16  0  0 57 60 |
+    *     \  0  0  0  0 25 /   \  0 22  0  0 25 /    \  0 22  0  0 75 /
+    *     
     */
 
-   RealType newValue = 1;
+   value = 1;
    RealType multiplicator = 2;
    for( IndexType i = 0; i < rows; i++ )
       for( IndexType j = 0; j < cols; j++ )
-         if( abs( i - j ) <= 1 )
-            m.addElement( i, j, newValue++, multiplicator );
+      {
+         bool found( false );
+         for( IndexType k = 0; k < diagonals.getSize(); k++ )
+         {
+            if( i + diagonals[ k ] == j )
+            {
+               m.addElement( i, j, value++, multiplicator );
+               found = true;
+               break;
+            }
+         }
+         if( ! found )
+         {
+            EXPECT_THROW( m.addElement( i, j, value++, multiplicator ), std::logic_error );
+         }
+      }
 
    EXPECT_EQ( m.getElement( 0, 0 ),  3 );
    EXPECT_EQ( m.getElement( 0, 1 ),  6 );
    EXPECT_EQ( m.getElement( 0, 2 ),  0 );
    EXPECT_EQ( m.getElement( 0, 3 ),  0 );
-   EXPECT_EQ( m.getElement( 0, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 4 ), 15 );
 
-   EXPECT_EQ( m.getElement( 1, 0 ), 15 );
-   EXPECT_EQ( m.getElement( 1, 1 ), 18 );
-   EXPECT_EQ( m.getElement( 1, 2 ), 21 );
+   EXPECT_EQ( m.getElement( 1, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 1 ), 21 );
+   EXPECT_EQ( m.getElement( 1, 2 ), 24 );
    EXPECT_EQ( m.getElement( 1, 3 ),  0 );
    EXPECT_EQ( m.getElement( 1, 4 ),  0 );
 
    EXPECT_EQ( m.getElement( 2, 0 ),  0 );
-   EXPECT_EQ( m.getElement( 2, 1 ), 30 );
-   EXPECT_EQ( m.getElement( 2, 2 ), 33 );
-   EXPECT_EQ( m.getElement( 2, 3 ), 36 );
+   EXPECT_EQ( m.getElement( 2, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 2 ), 39 );
+   EXPECT_EQ( m.getElement( 2, 3 ), 42 );
    EXPECT_EQ( m.getElement( 2, 4 ),  0 );
 
-   EXPECT_EQ( m.getElement( 3, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 0 ), 16 );
    EXPECT_EQ( m.getElement( 3, 1 ),  0 );
-   EXPECT_EQ( m.getElement( 3, 2 ), 45  );
-   EXPECT_EQ( m.getElement( 3, 3 ), 48 );
-   EXPECT_EQ( m.getElement( 3, 4 ), 51 );
+   EXPECT_EQ( m.getElement( 3, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 3 ), 57 );
+   EXPECT_EQ( m.getElement( 3, 4 ), 60 );
 
    EXPECT_EQ( m.getElement( 4, 0 ),  0 );
-   EXPECT_EQ( m.getElement( 4, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 1 ), 22 );
    EXPECT_EQ( m.getElement( 4, 2 ),  0 );
-   EXPECT_EQ( m.getElement( 4, 3 ), 60 );
-   EXPECT_EQ( m.getElement( 4, 4 ), 63 );
-
-   EXPECT_EQ( m.getElement( 5, 0 ),  0 );
-   EXPECT_EQ( m.getElement( 5, 1 ),  0 );
-   EXPECT_EQ( m.getElement( 5, 2 ),  0 );
-   EXPECT_EQ( m.getElement( 5, 3 ),  0 );
-   EXPECT_EQ( m.getElement( 5, 4 ), 74 );
+   EXPECT_EQ( m.getElement( 4, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 4 ), 75 );
 }
 
 template< typename Matrix >
@@ -618,58 +547,75 @@ void test_SetRow()
    using RealType = typename Matrix::RealType;
    using DeviceType = typename Matrix::DeviceType;
    using IndexType = typename Matrix::IndexType;
+   using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType;
 
    /*
-    * Sets up the following 3x7 matrix:
+    * Sets up the following 5x7 matrix:
     *
-    *    /  1  2  0  0  0  0  0 \
-    *    |  8  9 10  0  0  0  0 |
-    *    \  0 16 17 18  0  0  0 /
+    *    /  1  0  2  0  3  0  0 \
+    *    |  4  5  0  6  0  7  0 |
+    *    |  0  8  9  0 10  0 11 |
+    *    |  0  0 12 13  0 14  0 |
+    *    \  0  0  0 15 16  0 17 /
     */
-   const IndexType rows = 3;
+   const IndexType rows = 5;
    const IndexType cols = 7;
 
-   Matrix m( rows, cols );
+   Matrix m( rows, cols, DiagonalsShiftsType({ -1, 0, 2, 4 }) );
 
    auto matrix_view = m.getView();
    auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
-      RealType values[ 3 ][ 3 ] {
-         {  1,  2,  0 },
-         {  8,  9, 10 },
-         { 16, 17, 18 } };
+      RealType values[ 5 ][ 4 ] {
+         {  0,  1,  2,  3 },
+         {  4,  5,  6,  7 },
+         {  8,  9, 10, 11 },
+         { 12, 13, 14,  0 },
+         { 15, 16, 17,  0 } };
       auto row = matrix_view.getRow( rowIdx );
-      for( IndexType i = 0; i < 3; i++ )
-      {
-         if( rowIdx == 0 && i > 1 )
-            break;
+      for( IndexType i = 0; i < 4; i++ )
          row.setElement( i, values[ rowIdx ][ i ] );
-      }
    };
-   TNL::Algorithms::ParallelFor< DeviceType >::exec( 0, 3, f );
+   TNL::Algorithms::ParallelFor< DeviceType >::exec( ( IndexType) 0, rows, f );
 
    EXPECT_EQ( m.getElement( 0, 0 ),  1 );
-   EXPECT_EQ( m.getElement( 0, 1 ),  2 );
-   EXPECT_EQ( m.getElement( 0, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 2 ),  2 );
    EXPECT_EQ( m.getElement( 0, 3 ),  0 );
-   EXPECT_EQ( m.getElement( 0, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 4 ),  3 );
    EXPECT_EQ( m.getElement( 0, 5 ),  0 );
    EXPECT_EQ( m.getElement( 0, 6 ),  0 );
 
-   EXPECT_EQ( m.getElement( 1, 0 ),  8 );
-   EXPECT_EQ( m.getElement( 1, 1 ),  9 );
-   EXPECT_EQ( m.getElement( 1, 2 ), 10 );
-   EXPECT_EQ( m.getElement( 1, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 0 ),  4 );
+   EXPECT_EQ( m.getElement( 1, 1 ),  5 );
+   EXPECT_EQ( m.getElement( 1, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 3 ),  6 );
    EXPECT_EQ( m.getElement( 1, 4 ),  0 );
-   EXPECT_EQ( m.getElement( 1, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 5 ),  7 );
    EXPECT_EQ( m.getElement( 1, 6 ),  0 );
 
    EXPECT_EQ( m.getElement( 2, 0 ),  0 );
-   EXPECT_EQ( m.getElement( 2, 1 ), 16 );
-   EXPECT_EQ( m.getElement( 2, 2 ), 17 );
-   EXPECT_EQ( m.getElement( 2, 3 ), 18 );
-   EXPECT_EQ( m.getElement( 2, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 1 ),  8 );
+   EXPECT_EQ( m.getElement( 2, 2 ),  9 );
+   EXPECT_EQ( m.getElement( 2, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 4 ), 10 );
    EXPECT_EQ( m.getElement( 2, 5 ),  0 );
-   EXPECT_EQ( m.getElement( 2, 6 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 6 ), 11 );
+
+   EXPECT_EQ( m.getElement( 3, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 2 ), 12 );
+   EXPECT_EQ( m.getElement( 3, 3 ), 13 );
+   EXPECT_EQ( m.getElement( 3, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 5 ), 14 );
+   EXPECT_EQ( m.getElement( 3, 6 ),  0 );
+
+   EXPECT_EQ( m.getElement( 4, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 3 ), 15 );
+   EXPECT_EQ( m.getElement( 4, 4 ), 16 );
+   EXPECT_EQ( m.getElement( 4, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 6 ), 17 );
 }
 
 template< typename Matrix >
@@ -678,27 +624,31 @@ void test_AddRow()
    using RealType = typename Matrix::RealType;
    using DeviceType = typename Matrix::DeviceType;
    using IndexType = typename Matrix::IndexType;
+   using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType;
+
    /*
     * Sets up the following 6x5 matrix:
     *
-    *    /  1  2  0  0  0 \
-    *    |  6  7  8  0  0 |
-    *    |  0 12 13 14  0 |
-    *    |  0  0 18 19 20 |
-    *    |  0  0  0 24 25 |
-    *    \  0  0  0  0 30 /
+    *    /  1  2  3  0  0 \
+    *    |  0  7  8  9  0 |
+    *    |  0  0 13 14 15 |
+    *    |  0  0  0 19 20 |
+    *    |  0  0  0  0 25 |
+    *    \  0  0  0  0  0 /
     */
 
    const IndexType rows = 6;
    const IndexType cols = 5;
+   DiagonalsShiftsType diagonals( { -2, 0, 1, 2 } );
 
-   Matrix m( rows, cols );
+   Matrix m( rows, cols, diagonals );
 
    RealType value = 1;
    for( IndexType i = 0; i < rows; i++ )
       for( IndexType j = 0; j < cols; j++ )
       {
-         if( abs( i - j ) <= 1 )
+         IndexType offset = j - i;
+         if( diagonals.containsValue( offset ) && offset >= 0)
             m.setElement( i, j, value );
          value++;
       }
@@ -706,63 +656,63 @@ void test_AddRow()
    // Check the added elements
    EXPECT_EQ( m.getElement( 0, 0 ),  1 );
    EXPECT_EQ( m.getElement( 0, 1 ),  2 );
-   EXPECT_EQ( m.getElement( 0, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 2 ),  3 );
    EXPECT_EQ( m.getElement( 0, 3 ),  0 );
    EXPECT_EQ( m.getElement( 0, 4 ),  0 );
 
-   EXPECT_EQ( m.getElement( 1, 0 ),  6 );
+   EXPECT_EQ( m.getElement( 1, 0 ),  0 );
    EXPECT_EQ( m.getElement( 1, 1 ),  7 );
    EXPECT_EQ( m.getElement( 1, 2 ),  8 );
-   EXPECT_EQ( m.getElement( 1, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 3 ),  9 );
    EXPECT_EQ( m.getElement( 1, 4 ),  0 );
 
    EXPECT_EQ( m.getElement( 2, 0 ),  0 );
-   EXPECT_EQ( m.getElement( 2, 1 ), 12 );
+   EXPECT_EQ( m.getElement( 2, 1 ),  0 );
    EXPECT_EQ( m.getElement( 2, 2 ), 13 );
    EXPECT_EQ( m.getElement( 2, 3 ), 14 );
-   EXPECT_EQ( m.getElement( 2, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 4 ), 15 );
 
    EXPECT_EQ( m.getElement( 3, 0 ),  0 );
    EXPECT_EQ( m.getElement( 3, 1 ),  0 );
-   EXPECT_EQ( m.getElement( 3, 2 ), 18 );
+   EXPECT_EQ( m.getElement( 3, 2 ),  0 );
    EXPECT_EQ( m.getElement( 3, 3 ), 19 );
    EXPECT_EQ( m.getElement( 3, 4 ), 20 );
 
    EXPECT_EQ( m.getElement( 4, 0 ),  0 );
    EXPECT_EQ( m.getElement( 4, 1 ),  0 );
    EXPECT_EQ( m.getElement( 4, 2 ),  0 );
-   EXPECT_EQ( m.getElement( 4, 3 ), 24 );
+   EXPECT_EQ( m.getElement( 4, 3 ),  0 );
    EXPECT_EQ( m.getElement( 4, 4 ), 25 );
 
    EXPECT_EQ( m.getElement( 5, 0 ),  0 );
    EXPECT_EQ( m.getElement( 5, 1 ),  0 );
    EXPECT_EQ( m.getElement( 5, 2 ),  0 );
    EXPECT_EQ( m.getElement( 5, 3 ),  0 );
-   EXPECT_EQ( m.getElement( 5, 4 ), 30 );
+   EXPECT_EQ( m.getElement( 5, 4 ),  0 );
 
    // Add new elements to the old elements with a multiplying factor applied to the old elements.
    /*
     * The following setup results in the following 6x5 sparse matrix:
     *
-    *  / 0  0  0  0  0  0 \   /  1  2  0  0  0 \   / 11 11  0  0  0 \   / 11  11  0   0   0 \
-    *  | 0  1  0  0  0  0 |   |  6  7  8  0  0 |   | 22 22 22  0  0 |   | 28  29 30   0   0 |
-    *  | 0  0  2  0  0  0 | * |  0 12 13 14  0 | + |  0 33 33 33  0 | = |  0  57 59  61   0 |
-    *  | 0  0  0  3  0  0 |   |  0  0 18 19 20 |   |  0  0 44 44 44 |   |  0   0 98 101 104 |
-    *  | 0  0  0  0  4  0 |   |  0  0  0 24 25 |   |  0  0  0 55 55 |   |  0   0  0 151 155 |
-    *  \ 0  0  0  0  0  5 /   \  0  0  0  0 30 /   \  0  0  0  0 66 /   \  0   0  0   0 216 /
+    *  / 0  0  0  0  0  0 \   /  1  2  3  0  0 \   / 11  0  0  0  0 \   / 11   0  0   0   0 \
+    *  | 0  1  0  0  0  0 |   |  0  7  8  9  0 |   |  0 22  0  0  0 |   |  0  29  8   9   0 |
+    *  | 0  0  2  0  0  0 | * |  0  0 13 14 15 | + | 33  0 33  0  0 | = | 33   0 59  28  30 |
+    *  | 0  0  0  3  0  0 |   |  0  0  0 19 20 |   |  0 44  0 44  0 |   |  0  44  0 101  60 |
+    *  | 0  0  0  0  4  0 |   |  0  0  0  0 25 |   |  0  0 55  0 55 |   |  0   0 55   0 155 |
+    *  \ 0  0  0  0  0  5 /   \  0  0  0  0  0 /   \  0  0  0 66  0 /   \  0   0  0  66   0 /
     */
 
    auto matrix_view = m.getView();
    auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
-      RealType values[ 6 ][ 3 ] {
-         { 11, 11,  0 },
-         { 22, 22, 22 },
-         { 33, 33, 33 },
-         { 44, 44, 44 },
-         { 55, 55, 55 },
-         { 66, 66, 66 } };
+      RealType values[ 6 ][ 4 ] {
+         {  0, 11, 0,  0 },
+         {  0, 22, 0,  0 },
+         { 33, 33, 0,  0 },
+         { 44, 44, 0,  0 },
+         { 55, 55, 0,  0 },
+         { 66,  0, 0,  0 } };
       auto row = matrix_view.getRow( rowIdx );
-      for( IndexType i = 0; i < 3; i++ )
+      for( IndexType i = 0; i < 4; i++ )
       {
          RealType& val = row.getValue( i );
          val = rowIdx * val + values[ rowIdx ][ i ];
@@ -770,42 +720,41 @@ void test_AddRow()
    };
    TNL::Algorithms::ParallelFor< DeviceType >::exec( 0, 6, f );
 
-
    EXPECT_EQ( m.getElement( 0, 0 ),  11 );
-   EXPECT_EQ( m.getElement( 0, 1 ),  11 );
+   EXPECT_EQ( m.getElement( 0, 1 ),   0 );
    EXPECT_EQ( m.getElement( 0, 2 ),   0 );
    EXPECT_EQ( m.getElement( 0, 3 ),   0 );
    EXPECT_EQ( m.getElement( 0, 4 ),   0 );
 
-   EXPECT_EQ( m.getElement( 1, 0 ),  28 );
+   EXPECT_EQ( m.getElement( 1, 0 ),   0 );
    EXPECT_EQ( m.getElement( 1, 1 ),  29 );
-   EXPECT_EQ( m.getElement( 1, 2 ),  30 );
-   EXPECT_EQ( m.getElement( 1, 3 ),   0 );
+   EXPECT_EQ( m.getElement( 1, 2 ),   8 );
+   EXPECT_EQ( m.getElement( 1, 3 ),   9 );
    EXPECT_EQ( m.getElement( 1, 4 ),   0 );
 
-   EXPECT_EQ( m.getElement( 2, 0 ),   0 );
-   EXPECT_EQ( m.getElement( 2, 1 ),  57 );
+   EXPECT_EQ( m.getElement( 2, 0 ),  33 );
+   EXPECT_EQ( m.getElement( 2, 1 ),   0 );
    EXPECT_EQ( m.getElement( 2, 2 ),  59 );
-   EXPECT_EQ( m.getElement( 2, 3 ),  61 );
-   EXPECT_EQ( m.getElement( 2, 4 ),   0  );
+   EXPECT_EQ( m.getElement( 2, 3 ),  28 );
+   EXPECT_EQ( m.getElement( 2, 4 ),  30  );
 
    EXPECT_EQ( m.getElement( 3, 0 ),   0 );
-   EXPECT_EQ( m.getElement( 3, 1 ),   0 );
-   EXPECT_EQ( m.getElement( 3, 2 ),  98 );
+   EXPECT_EQ( m.getElement( 3, 1 ),  44 );
+   EXPECT_EQ( m.getElement( 3, 2 ),   0 );
    EXPECT_EQ( m.getElement( 3, 3 ), 101 );
-   EXPECT_EQ( m.getElement( 3, 4 ), 104 );
+   EXPECT_EQ( m.getElement( 3, 4 ),  60 );
 
    EXPECT_EQ( m.getElement( 4, 0 ),   0 );
    EXPECT_EQ( m.getElement( 4, 1 ),   0 );
-   EXPECT_EQ( m.getElement( 4, 2 ),   0 );
-   EXPECT_EQ( m.getElement( 4, 3 ), 151 );
+   EXPECT_EQ( m.getElement( 4, 2 ),  55 );
+   EXPECT_EQ( m.getElement( 4, 3 ),   0 );
    EXPECT_EQ( m.getElement( 4, 4 ), 155 );
 
    EXPECT_EQ( m.getElement( 5, 0 ),   0 );
    EXPECT_EQ( m.getElement( 5, 1 ),   0 );
    EXPECT_EQ( m.getElement( 5, 2 ),   0 );
-   EXPECT_EQ( m.getElement( 5, 3 ),   0 );
-   EXPECT_EQ( m.getElement( 5, 4 ), 216 );
+   EXPECT_EQ( m.getElement( 5, 3 ),  66 );
+   EXPECT_EQ( m.getElement( 5, 4 ),   0 );
 }
 
 template< typename Matrix >
@@ -814,6 +763,7 @@ void test_VectorProduct()
    using RealType = typename Matrix::RealType;
    using DeviceType = typename Matrix::DeviceType;
    using IndexType = typename Matrix::IndexType;
+   using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType;
 
    /*
     * Sets up the following 5x4 matrix:
@@ -826,8 +776,9 @@ void test_VectorProduct()
     */
    const IndexType rows = 5;
    const IndexType cols = 4;
+   DiagonalsShiftsType diagonals{ -1, 0, 1 };
 
-   Matrix m( rows, cols );
+   Matrix m( rows, cols, diagonals );
 
    RealType value = 1;
    for( IndexType i = 0; i < rows; i++ )
@@ -861,6 +812,8 @@ void test_AddMatrix()
    using RealType = typename Matrix1::RealType;
    using DeviceType = typename Matrix1::DeviceType;
    using IndexType = typename Matrix1::IndexType;
+   using DiagonalsShiftsType1 = typename Matrix1::DiagonalsShiftsType;
+   using DiagonalsShiftsType2 = typename Matrix2::DiagonalsShiftsType;
 
    /*
     * Sets up the following 5x4 matrix:
@@ -873,8 +826,10 @@ void test_AddMatrix()
     */
    const IndexType rows = 5;
    const IndexType cols = 4;
+   DiagonalsShiftsType1 diagonals1;
+   DiagonalsShiftsType2 diagonals2;
 
-   Matrix1 m( rows, cols );
+   Matrix1 m( rows, cols, diagonals1 );
 
    RealType value = 1;
    for( IndexType i = 0; i < rows; i++ )
@@ -894,7 +849,7 @@ void test_AddMatrix()
     *    |  0  0  9 10 |
     *    \  0  0  0 11 /
     */
-   Matrix2 m2( rows, cols );
+   Matrix2 m2( rows, cols, diagonals2 );
 
    RealType newValue = 1;
    for( IndexType i = 0; i < rows; i++ )
@@ -1457,7 +1412,7 @@ TYPED_TEST( MatrixTest, getNonemptyRowsCountTest )
     test_GetNonemptyRowsCount< MatrixType >();
 }
 
-/*
+
 TYPED_TEST( MatrixTest, getCompressedRowLengthTest )
 {
     using MatrixType = typename TestFixture::MatrixType;
@@ -1465,13 +1420,6 @@ TYPED_TEST( MatrixTest, getCompressedRowLengthTest )
     test_GetCompressedRowLengths< MatrixType >();
 }
 
-TYPED_TEST( MatrixTest, getRowLengthTest )
-{
-    using MatrixType = typename TestFixture::MatrixType;
-
-    test_GetRowLength< MatrixType >();
-}
-
 TYPED_TEST( MatrixTest, getAllocatedElementsCountTest )
 {
     using MatrixType = typename TestFixture::MatrixType;
@@ -1535,7 +1483,7 @@ TYPED_TEST( MatrixTest, vectorProductTest )
     test_VectorProduct< MatrixType >();
 }
 
-TYPED_TEST( MatrixTest, addMatrixTest )
+/*TYPED_TEST( MatrixTest, addMatrixTest )
 {
     using MatrixType = typename TestFixture::MatrixType;
 
-- 
GitLab


From 6747a5465e225f397f3935757f5d3a689be81a60 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Tue, 14 Jan 2020 21:37:50 +0100
Subject: [PATCH 092/179] Fixed multidiagonal matrix.

---
 src/TNL/Matrices/Multidiagonal.h              |   9 +-
 src/TNL/Matrices/Multidiagonal.hpp            |  60 +++++++---
 .../Matrices/MultidiagonalMatrixTest.h        | 106 +++++++-----------
 3 files changed, 94 insertions(+), 81 deletions(-)

diff --git a/src/TNL/Matrices/Multidiagonal.h b/src/TNL/Matrices/Multidiagonal.h
index 1741c0c75..9e5f92295 100644
--- a/src/TNL/Matrices/Multidiagonal.h
+++ b/src/TNL/Matrices/Multidiagonal.h
@@ -179,8 +179,13 @@ class Multidiagonal : public Matrix< Real, Device, Index, RealAllocator >
       Multidiagonal& operator=( const Multidiagonal& matrix );
 
       // cross-device copy assignment
-      template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ >
-      Multidiagonal& operator=( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix );
+      template< typename Real_,
+                typename Device_,
+                typename Index_,
+                bool RowMajorOrder_,
+                typename RealAllocator_,
+                typename IndexAllocator_ >
+      Multidiagonal& operator=( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_, IndexAllocator_ >& matrix );
 
       void save( File& file ) const;
 
diff --git a/src/TNL/Matrices/Multidiagonal.hpp b/src/TNL/Matrices/Multidiagonal.hpp
index 53e3c7f2f..b88511501 100644
--- a/src/TNL/Matrices/Multidiagonal.hpp
+++ b/src/TNL/Matrices/Multidiagonal.hpp
@@ -648,15 +648,17 @@ template< typename Real,
           bool RowMajorOrder,
           typename RealAllocator,
           typename IndexAllocator >
-   template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ >
+   template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_, typename IndexAllocator_ >
 Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >&
 Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
-operator=( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix )
+operator=( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_, IndexAllocator_ >& matrix )
 {
-   static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Cuda >::value,
-                  "unknown device" );
-   static_assert( std::is_same< Device_, Devices::Host >::value || std::is_same< Device_, Devices::Cuda >::value,
-                  "unknown device" );
+   using RHSMatrix = Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_, IndexAllocator_ >;
+   using RHSIndexType = typename RHSMatrix::IndexType;
+   using RHSRealType = typename RHSMatrix::RealType;
+   using RHSDeviceType = typename RHSMatrix::DeviceType;
+   using RHSRealAllocatorType = typename RHSMatrix::RealAllocatorType;
+   using RHSIndexAllocatorType = typename RHSMatrix::IndexAllocatorType;
 
    this->setLike( matrix );
    if( RowMajorOrder == RowMajorOrder_ )
@@ -673,13 +675,45 @@ operator=( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllo
       }
       else
       {
-         Multidiagonal< Real, Device, Index, RowMajorOrder_ > auxMatrix;
-         auxMatrix = matrix;
-         const auto matrix_view = auxMatrix.getView();
-         auto f = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value ) mutable {
-            value = matrix_view.getValues()[ matrix_view.getIndexer().getGlobalIndex( rowIdx, localIdx ) ];
-         };
-         this->forAllRows( f );
+         const IndexType maxRowLength = this->diagonalsShifts.getSize();
+         const IndexType bufferRowsCount( 128 );
+         const size_t bufferSize = bufferRowsCount * maxRowLength;
+         Containers::Vector< RHSRealType, RHSDeviceType, RHSIndexType, RHSRealAllocatorType > matrixValuesBuffer( bufferSize );
+         Containers::Vector< RHSIndexType, RHSDeviceType, RHSIndexType, RHSIndexAllocatorType > matrixColumnsBuffer( bufferSize );
+         Containers::Vector< RealType, DeviceType, IndexType, RealAllocatorType > thisValuesBuffer( bufferSize );
+         Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocatorType > thisColumnsBuffer( bufferSize );
+         auto matrixValuesBuffer_view = matrixValuesBuffer.getView();
+         auto matrixColumnsBuffer_view = matrixColumnsBuffer.getView();
+         auto thisValuesBuffer_view = thisValuesBuffer.getView();
+         auto thisColumnsBuffer_view = thisColumnsBuffer.getView();
+
+         IndexType baseRow( 0 );
+         const IndexType rowsCount = this->getRows();
+         while( baseRow < rowsCount )
+         {
+            const IndexType lastRow = min( baseRow + bufferRowsCount, rowsCount );
+
+            ////
+            // Copy matrix elements into buffer
+            auto f1 = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIndex, const RHSRealType& value ) mutable {
+                  const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx;
+                  matrixValuesBuffer_view[ bufferIdx ] = value;
+            };
+            matrix.forRows( baseRow, lastRow, f1 );
+
+            ////
+            // Copy the source matrix buffer to this matrix buffer
+            thisValuesBuffer_view = matrixValuesBuffer_view;
+
+            ////
+            // Copy matrix elements from the buffer to the matrix
+            auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType& columnIndex, RealType& value  ) mutable {
+               const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx;
+                  value = thisValuesBuffer_view[ bufferIdx ];
+            };
+            this->forRows( baseRow, lastRow, f2 );
+            baseRow += bufferRowsCount;
+         }
       }
    }
 }
diff --git a/src/UnitTests/Matrices/MultidiagonalMatrixTest.h b/src/UnitTests/Matrices/MultidiagonalMatrixTest.h
index 514ea39e0..21a836d2d 100644
--- a/src/UnitTests/Matrices/MultidiagonalMatrixTest.h
+++ b/src/UnitTests/Matrices/MultidiagonalMatrixTest.h
@@ -768,15 +768,15 @@ void test_VectorProduct()
    /*
     * Sets up the following 5x4 matrix:
     *
-    *    /  1  2  0  0 \
-    *    |  5  6  7  0 |
-    *    |  0 10 11 12 |
-    *    |  0  0 15 16 |
-    *    \  0  0  0 20 /
+    *    /  1  0  3  0 \
+    *    |  0  6  0  8 |
+    *    |  9  0 11  0 |
+    *    |  0 14  0 16 |
+    *    \  0  0 19  0 /
     */
    const IndexType rows = 5;
    const IndexType cols = 4;
-   DiagonalsShiftsType diagonals{ -1, 0, 1 };
+   DiagonalsShiftsType diagonals{ -2, 0, 2 };
 
    Matrix m( rows, cols, diagonals );
 
@@ -784,7 +784,7 @@ void test_VectorProduct()
    for( IndexType i = 0; i < rows; i++ )
       for( IndexType j = 0; j < cols; j++)
       {
-         if( abs( i - j ) <= 1 )
+         if( diagonals.containsValue( j - i ) )
             m.setElement( i, j, value );
          value++;
       }
@@ -799,11 +799,11 @@ void test_VectorProduct()
 
    m.vectorProduct( inVector, outVector);
 
-   EXPECT_EQ( outVector.getElement( 0 ),  6 );
-   EXPECT_EQ( outVector.getElement( 1 ), 36 );
-   EXPECT_EQ( outVector.getElement( 2 ), 66 );
-   EXPECT_EQ( outVector.getElement( 3 ), 62 );
-   EXPECT_EQ( outVector.getElement( 4 ), 40 );
+   EXPECT_EQ( outVector.getElement( 0 ),  8 );
+   EXPECT_EQ( outVector.getElement( 1 ), 28 );
+   EXPECT_EQ( outVector.getElement( 2 ), 40 );
+   EXPECT_EQ( outVector.getElement( 3 ), 60 );
+   EXPECT_EQ( outVector.getElement( 4 ), 38 );
 }
 
 template< typename Matrix1, typename Matrix2 = Matrix1 >
@@ -935,6 +935,7 @@ void test_GetMatrixProduct()
     using RealType = typename Matrix::RealType;
     using DeviceType = typename Matrix::DeviceType;
     using IndexType = typename Matrix::IndexType;
+    using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType;
 /*
  * Sets up the following 5x4 matrix:
  *
@@ -946,10 +947,9 @@ void test_GetMatrixProduct()
  */
     const IndexType leftRows = 5;
     const IndexType leftCols = 4;
+    DiagonalsShiftsType diagonalsShifts( { 0, 1, 2 } );
 
-    Matrix leftMatrix;
-    leftMatrix.reset();
-    leftMatrix.setDimensions( leftRows, leftCols );
+    Matrix leftMatrix( leftRows, leftCols, diagonalsShifts );
 
     RealType value = 1;
     for( IndexType i = 0; i < leftRows; i++ )
@@ -986,9 +986,7 @@ void test_GetMatrixProduct()
  *    \  0  0  0  0 /
  */
 
-    Matrix mResult;
-    mResult.reset();
-    mResult.setDimensions( leftRows, rightCols );
+    Matrix mResult( leftRows, rightCols, diagonalsShifts );
     mResult.setValue( 0 );
 
     RealType leftMatrixMultiplicator = 1;
@@ -1040,6 +1038,7 @@ void test_GetTransposition()
     using RealType = typename Matrix::RealType;
     using DeviceType = typename Matrix::DeviceType;
     using IndexType = typename Matrix::IndexType;
+    using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType;
 /*
  * Sets up the following 3x2 matrix:
  *
@@ -1049,10 +1048,9 @@ void test_GetTransposition()
  */
     const IndexType rows = 3;
     const IndexType cols = 2;
+    DiagonalsShiftsType diagonalsShifts( { 0, 1, 2 } );
 
-    Matrix m;
-    m.reset();
-    m.setDimensions( rows, cols );
+    Matrix m( rows, cols, diagonalsShifts );
 
     RealType value = 1;
     for( IndexType i = 0; i < rows; i++ )
@@ -1067,9 +1065,7 @@ void test_GetTransposition()
  *    /  0  0  0 \
  *    \  0  0  0 /
  */
-    Matrix mTransposed;
-    mTransposed.reset();
-    mTransposed.setDimensions( cols, rows );
+    Matrix mTransposed( cols, rows, diagonalsShifts );
 
     mTransposed.print( std::cout );
 
@@ -1102,6 +1098,7 @@ void test_PerformSORIteration()
     using RealType = typename Matrix::RealType;
     using DeviceType = typename Matrix::DeviceType;
     using IndexType = typename Matrix::IndexType;
+    using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType;
 /*
  * Sets up the following 4x4 matrix:
  *
@@ -1112,10 +1109,9 @@ void test_PerformSORIteration()
  */
     const IndexType rows = 4;
     const IndexType cols = 4;
+    DiagonalsShiftsType diagonalsShifts( { 0, 1, 2 } );
 
-    Matrix m;
-    m.reset();
-    m.setDimensions( rows, cols );
+    Matrix m( rows, cols, diagonalsShifts );
 
     m.setElement( 0, 0, 4.0 );        // 0th row
     m.setElement( 0, 1, 1.0 );
@@ -1178,33 +1174,35 @@ void test_AssignmentOperator()
    using RealType = typename Matrix::RealType;
    using DeviceType = typename Matrix::DeviceType;
    using IndexType = typename Matrix::IndexType;
+   using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType;
    constexpr bool rowMajorOrder = Matrix::getRowMajorOrder();
 
    using MultidiagonalHost = TNL::Matrices::Multidiagonal< RealType, TNL::Devices::Host, IndexType, rowMajorOrder >;
    using MultidiagonalCuda = TNL::Matrices::Multidiagonal< RealType, TNL::Devices::Cuda, IndexType, !rowMajorOrder >;
 
    const IndexType rows( 10 ), columns( 10 );
-   MultidiagonalHost hostMatrix( rows, columns );
+   DiagonalsShiftsType diagonalsShifts( { -4, -2, 0, 2, 3, 5 } );
+   MultidiagonalHost hostMatrix( rows, columns, diagonalsShifts );
    for( IndexType i = 0; i < rows; i++ )
       for( IndexType j = 0; j <  columns; j++ )
-         if( abs( i - j ) <= 1 )
+         if( diagonalsShifts.containsValue( j - i ) )
             hostMatrix.setElement( i, j,  i + j );
 
-   Matrix matrix( rows, columns );
+   Matrix matrix( rows, columns, diagonalsShifts );
    matrix.getValues() = 0.0;
    matrix = hostMatrix;
    for( IndexType i = 0; i < columns; i++ )
       for( IndexType j = 0; j < rows; j++ )
-            if( abs( i - j ) <= 1 )
+            if( diagonalsShifts.containsValue( j - i ) )
                EXPECT_EQ( matrix.getElement( i, j ), i + j );
             else
                EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
 
 #ifdef HAVE_CUDA
-   MultidiagonalCuda cudaMatrix( rows, columns );
+   MultidiagonalCuda cudaMatrix( rows, columns, diagonalsShifts );
    for( IndexType i = 0; i < rows; i++ )
       for( IndexType j = 0; j < columns; j++ )
-         if( abs( i - j ) <= 1 )
+         if( diagonalsShifts.containsValue( j - i ) )
             cudaMatrix.setElement( i, j, i + j );
 
    matrix.getValues() = 0.0;
@@ -1212,7 +1210,7 @@ void test_AssignmentOperator()
    for( IndexType i = 0; i < rows; i++ )
       for( IndexType j = 0; j < columns; j++ )
       {
-         if( abs( i - j ) <= 1 )
+         if( diagonalsShifts.containsValue( j - i ) )
             EXPECT_EQ( matrix.getElement( i, j ), i + j );
          else
             EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
@@ -1227,6 +1225,7 @@ void test_SaveAndLoad()
    using RealType = typename Matrix::RealType;
    using DeviceType = typename Matrix::DeviceType;
    using IndexType = typename Matrix::IndexType;
+   using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType;
 
    /*
     * Sets up the following 4x4 matrix:
@@ -1238,14 +1237,15 @@ void test_SaveAndLoad()
     */
    const IndexType rows = 4;
    const IndexType cols = 4;
+   DiagonalsShiftsType diagonalsShifts( { -1, 0, 1 } );
 
-   Matrix savedMatrix( rows, cols );
+   Matrix savedMatrix( rows, cols, diagonalsShifts );
 
    RealType value = 1;
    for( IndexType i = 0; i < rows; i++ )
       for( IndexType j = 0; j < cols; j++ )
       {
-         if( abs( i - j ) <= 1 )
+         if( diagonalsShifts.containsValue( j - i ) )
             savedMatrix.setElement( i, j, value );
          value++;
       }
@@ -1303,6 +1303,7 @@ void test_Print()
    using RealType = typename Matrix::RealType;
    using DeviceType = typename Matrix::DeviceType;
    using IndexType = typename Matrix::IndexType;
+   using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType;
 
    /*
     * Sets up the following 5x4 sparse matrix:
@@ -1315,8 +1316,9 @@ void test_Print()
     */
    const IndexType rows = 5;
    const IndexType cols = 4;
+   DiagonalsShiftsType diagonalsShifts( { -1, 0, 1 } );
 
-   Matrix m( rows, cols );
+   Matrix m( rows, cols, diagonalsShifts );
 
    RealType value = 1;
    for( IndexType i = 0; i < rows; i++)
@@ -1488,20 +1490,7 @@ TYPED_TEST( MatrixTest, vectorProductTest )
     using MatrixType = typename TestFixture::MatrixType;
 
     test_AddMatrix< MatrixType >();
-}
-
-TYPED_TEST( MatrixTest, addMatrixTest_differentOrdering )
-{
-    using MatrixType = typename TestFixture::MatrixType;
-
-    using RealType = typename MatrixType::RealType;
-    using DeviceType = typename MatrixType::DeviceType;
-    using IndexType = typename MatrixType::IndexType;
-    using RealAllocatorType = typename MatrixType::RealAllocatorType;
-    using MatrixType2 = TNL::Matrices::Multidiagonal< RealType, DeviceType, IndexType, ! MatrixType::getRowMajorOrder(), RealAllocatorType >;
-
-    test_AddMatrix< MatrixType, MatrixType2 >();
-}
+}*/
 
 TYPED_TEST( MatrixTest, assignmentOperatorTest )
 {
@@ -1523,21 +1512,6 @@ TYPED_TEST( MatrixTest, printTest )
 
     test_Print< MatrixType >();
 }
-*/
-
-//// test_getType is not general enough yet. DO NOT TEST IT YET.
-
-//TEST( MultidiagonalMatrixTest, Multidiagonal_GetTypeTest_Host )
-//{
-//    host_test_GetType< Multidiagonal_host_float, Multidiagonal_host_int >();
-//}
-//
-//#ifdef HAVE_CUDA
-//TEST( MultidiagonalMatrixTest, Multidiagonal_GetTypeTest_Cuda )
-//{
-//    cuda_test_GetType< Multidiagonal_cuda_float, Multidiagonal_cuda_int >();
-//}
-//#endif
 
 /*TEST( MultidiagonalMatrixTest, Multidiagonal_getMatrixProductTest_Host )
 {
-- 
GitLab


From 71a1f300e71ef90e6d81f07e553c11eb1d227b94 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Tue, 14 Jan 2020 22:25:05 +0100
Subject: [PATCH 093/179] Fixing multidiagonal matrix with CUDA.

---
 src/TNL/Matrices/Matrix.h                    | 10 ++++++----
 src/TNL/Matrices/Multidiagonal.hpp           |  2 +-
 src/TNL/Matrices/MultidiagonalMatrixView.h   |  8 ++++----
 src/TNL/Matrices/MultidiagonalMatrixView.hpp |  2 +-
 4 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/src/TNL/Matrices/Matrix.h b/src/TNL/Matrices/Matrix.h
index ebe7ccc21..0b34a5a57 100644
--- a/src/TNL/Matrices/Matrix.h
+++ b/src/TNL/Matrices/Matrix.h
@@ -76,14 +76,16 @@ public:
    __cuda_callable__
    IndexType getColumns() const;
 
-   virtual void setElement( const IndexType row,
+   //virtual TODO: uncomment
+   void setElement( const IndexType row,
                             const IndexType column,
-                            const RealType& value ) = 0;
+                            const RealType& value );// = 0;
 
-   virtual void addElement( const IndexType row,
+   //virtual TODO: uncomment
+   void addElement( const IndexType row,
                             const IndexType column,
                             const RealType& value,
-                            const RealType& thisElementMultiplicator = 1.0 ) = 0;
+                            const RealType& thisElementMultiplicator = 1.0 );// = 0;
 
    virtual Real getElement( const IndexType row,
                             const IndexType column ) const = 0;
diff --git a/src/TNL/Matrices/Multidiagonal.hpp b/src/TNL/Matrices/Multidiagonal.hpp
index b88511501..7bc83f2d4 100644
--- a/src/TNL/Matrices/Multidiagonal.hpp
+++ b/src/TNL/Matrices/Multidiagonal.hpp
@@ -707,7 +707,7 @@ operator=( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllo
 
             ////
             // Copy matrix elements from the buffer to the matrix
-            auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType& columnIndex, RealType& value  ) mutable {
+            auto f2 = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType localIdx, const IndexType columnIndex, RealType& value  ) mutable {
                const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx;
                   value = thisValuesBuffer_view[ bufferIdx ];
             };
diff --git a/src/TNL/Matrices/MultidiagonalMatrixView.h b/src/TNL/Matrices/MultidiagonalMatrixView.h
index 3d33ac0ae..1e5a9bd28 100644
--- a/src/TNL/Matrices/MultidiagonalMatrixView.h
+++ b/src/TNL/Matrices/MultidiagonalMatrixView.h
@@ -30,10 +30,10 @@ class MultidiagonalMatrixView : public MatrixView< Real, Device, Index >
       using DeviceType = Device;
       using IndexType = Index;
       using BaseType = MatrixView< Real, Device, Index >;
-      using DiagonalsShiftsType = Containers::Vector< IndexType, DeviceType, IndexType >;
-      using DiagonalsShiftsView = typename DiagonalsShiftsType::ViewType;
-      using HostDiagonalsShiftsType = Containers::Vector< IndexType, Devices::Host, IndexType >;
-      using HostDiagonalsShiftsView = typename DiagonalsShiftsType::ViewType;
+      //using DiagonalsShiftsType = Containers::Vector< IndexType, DeviceType, IndexType >;
+      using DiagonalsShiftsView = Containers::VectorView< IndexType, DeviceType, IndexType >;
+      //using HostDiagonalsShiftsType = Containers::Vector< IndexType, Devices::Host, IndexType >;
+      using HostDiagonalsShiftsView = Containers::VectorView< IndexType, Devices::Host, IndexType >;
       using IndexerType = details::MultidiagonalMatrixIndexer< IndexType, RowMajorOrder >;
       using ValuesViewType = typename BaseType::ValuesView;
       using ViewType = MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >;
diff --git a/src/TNL/Matrices/MultidiagonalMatrixView.hpp b/src/TNL/Matrices/MultidiagonalMatrixView.hpp
index 1ba8dc34d..2839c997a 100644
--- a/src/TNL/Matrices/MultidiagonalMatrixView.hpp
+++ b/src/TNL/Matrices/MultidiagonalMatrixView.hpp
@@ -398,7 +398,7 @@ forRows( IndexType first, IndexType last, Function& function ) const
       {
          const IndexType columnIdx = rowIdx + diagonalsShifts_view[ localIdx ];
          if( columnIdx >= 0 && columnIdx < columns )
-            function( rowIdx, localIdx, columnIdx, values_view[ indexer.getGlobalIndex( rowIdx, localIdx, 0 ) ] );
+            function( rowIdx, localIdx, columnIdx, values_view[ indexer.getGlobalIndex( rowIdx, localIdx ) ] );
       }
    };
    Algorithms::ParallelFor< DeviceType >::exec( first, last, f );
-- 
GitLab


From 537628295e5e7127f5d202e3f73a61103acaad15 Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Wed, 15 Jan 2020 14:42:18 +0100
Subject: [PATCH 094/179] Avoiding Matrix::getCompressedRowLengths.

---
 src/Python/pytnl/tnl/SparseMatrix.h |  2 +-
 src/TNL/Matrices/Matrix.h           |  2 +-
 src/TNL/Matrices/Matrix.hpp         | 10 +++++-----
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/Python/pytnl/tnl/SparseMatrix.h b/src/Python/pytnl/tnl/SparseMatrix.h
index b4cc0fc1a..e4064e1a4 100644
--- a/src/Python/pytnl/tnl/SparseMatrix.h
+++ b/src/Python/pytnl/tnl/SparseMatrix.h
@@ -51,7 +51,7 @@ void export_Matrix( py::module & m, const char* name )
 
     using VectorType = TNL::Containers::Vector< typename Matrix::RealType, typename Matrix::DeviceType, typename Matrix::IndexType >;
 
-    void (Matrix::* _getCompressedRowLengths)(typename Matrix::CompressedRowLengthsVector&) const = &Matrix::getCompressedRowLengths;
+    void (Matrix::* _getCompressedRowLengths)(typename Matrix::CompressedRowLengthsVectorView) const = &Matrix::getCompressedRowLengths;
 
     auto matrix = py::class_< Matrix, TNL::Object >( m, name )
         .def(py::init<>())
diff --git a/src/TNL/Matrices/Matrix.h b/src/TNL/Matrices/Matrix.h
index 0b34a5a57..a5f2b6b8f 100644
--- a/src/TNL/Matrices/Matrix.h
+++ b/src/TNL/Matrices/Matrix.h
@@ -57,7 +57,7 @@ public:
 
    // TODO: implementation is not parallel
    // TODO: it would be nice if padding zeros could be stripped
-   void getCompressedRowLengths( CompressedRowLengthsVector& rowLengths ) const;
+   //void getCompressedRowLengths( CompressedRowLengthsVector& rowLengths ) const;
 
    virtual void getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const;
 
diff --git a/src/TNL/Matrices/Matrix.hpp b/src/TNL/Matrices/Matrix.hpp
index 4ddbacde5..0710ca829 100644
--- a/src/TNL/Matrices/Matrix.hpp
+++ b/src/TNL/Matrices/Matrix.hpp
@@ -56,7 +56,7 @@ void Matrix< Real, Device, Index, RealAllocator >::setDimensions( const IndexTyp
    this->columns = columns;
 }
 
-template< typename Real,
+/*template< typename Real,
           typename Device,
           typename Index,
           typename RealAllocator >
@@ -64,7 +64,7 @@ void Matrix< Real, Device, Index, RealAllocator >::getCompressedRowLengths( Comp
 {
    rowLengths.setSize( this->getRows() );
    getCompressedRowLengths( rowLengths.getView() );
-}
+}*/
 
 template< typename Real,
           typename Device,
@@ -72,9 +72,9 @@ template< typename Real,
           typename RealAllocator >
 void Matrix< Real, Device, Index, RealAllocator >::getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const
 {
-   //TNL_ASSERT_EQ( rowLengths.getSize(), this->getRows(), "invalid size of the rowLengths vector" );
-   //for( IndexType row = 0; row < this->getRows(); row++ )
-   //   rowLengths.setElement( row, this->getRowLength( row ) );
+   TNL_ASSERT_EQ( rowLengths.getSize(), this->getRows(), "invalid size of the rowLengths vector" );
+   for( IndexType row = 0; row < this->getRows(); row++ )
+      rowLengths.setElement( row, this->getRowLength( row ) );
 }
 
 template< typename Real,
-- 
GitLab


From 376b4bffe77bdbc005f2692b66de09f107d47d16 Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Wed, 15 Jan 2020 14:42:57 +0100
Subject: [PATCH 095/179] Fixing multidiagonal matrix unit tests fro CUDA.

---
 .../Matrices/MultidiagonalMatrixTest.h        | 54 +++++--------------
 1 file changed, 13 insertions(+), 41 deletions(-)

diff --git a/src/UnitTests/Matrices/MultidiagonalMatrixTest.h b/src/UnitTests/Matrices/MultidiagonalMatrixTest.h
index 21a836d2d..49bcfa11c 100644
--- a/src/UnitTests/Matrices/MultidiagonalMatrixTest.h
+++ b/src/UnitTests/Matrices/MultidiagonalMatrixTest.h
@@ -159,7 +159,7 @@ void test_GetCompressedRowLengths()
     *    |  0  0  0  0  1  0  1  0 |  -> 2
     *    \  0  0  0  0  0  1  0  0 /  -> 1
     */
-   
+
    const IndexType rows = 8;
    const IndexType cols = 8;
 
@@ -354,22 +354,12 @@ void test_SetElement()
    RealType value = 1;
    for( IndexType i = 0; i < rows; i++ )
       for( IndexType j = 0; j < cols; j++ )
-      {
-         bool found( false );
-         for( IndexType k = 0; k < diagonals.getSize(); k++ )
-         {
-            if( i + diagonals[ k ] == j )
-            {
-               m.setElement( i, j, value++ );
-               found = true;
-               break;
-            }
-         }
-         if( ! found )
+         if( diagonals.containsValue( j - i ) )
+            m.setElement( i, j, value++ );
+         else
          {
             EXPECT_THROW( m.setElement( i, j, value++ ), std::logic_error );
          }
-      }
 
    EXPECT_EQ( m.getElement( 0, 0 ),  1 );
    EXPECT_EQ( m.getElement( 0, 1 ),  2 );
@@ -427,24 +417,16 @@ void test_AddElement()
    RealType value = 1;
    for( IndexType i = 0; i < rows; i++ )
       for( IndexType j = 0; j < cols; j++ )
-      {
-         bool found( false );
-         for( IndexType k = 0; k < diagonals.getSize(); k++ )
+         if( diagonals.containsValue( j - i ) )
          {
-            if( i + diagonals[ k ] == j )
-            {
-               if( j >= i )
-                  m.setElement( i, j, value++ );
-               else value++;
-               found = true;
-               break;
-            }
+            if( j >= i )
+               m.setElement( i, j, value );
+            value++;
          }
-         if( ! found )
+         else
          {
             EXPECT_THROW( m.setElement( i, j, value++ ), std::logic_error );
          }
-      }
 
    // Check the added elements
    EXPECT_EQ( m.getElement( 0, 0 ),  1 );
@@ -486,29 +468,19 @@ void test_AddElement()
     * 2 * |  0  0 13 14  0 | + |  0  0 13 14  0 | =  |  0  0 39 42  0 |
     *     |  0  0  0 19 20 |   | 16  0  0 19 20 |    | 16  0  0 57 60 |
     *     \  0  0  0  0 25 /   \  0 22  0  0 25 /    \  0 22  0  0 75 /
-    *     
+    *
     */
 
    value = 1;
    RealType multiplicator = 2;
    for( IndexType i = 0; i < rows; i++ )
       for( IndexType j = 0; j < cols; j++ )
-      {
-         bool found( false );
-         for( IndexType k = 0; k < diagonals.getSize(); k++ )
-         {
-            if( i + diagonals[ k ] == j )
-            {
-               m.addElement( i, j, value++, multiplicator );
-               found = true;
-               break;
-            }
-         }
-         if( ! found )
+         if( diagonals.containsValue( j - i ) )
+            m.addElement( i, j, value++, multiplicator );
+         else
          {
             EXPECT_THROW( m.addElement( i, j, value++, multiplicator ), std::logic_error );
          }
-      }
 
    EXPECT_EQ( m.getElement( 0, 0 ),  3 );
    EXPECT_EQ( m.getElement( 0, 1 ),  6 );
-- 
GitLab


From 4310648979b9c849d893287cb4deda1d03c359c0 Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Wed, 15 Jan 2020 14:45:12 +0100
Subject: [PATCH 096/179] Changing Matrix::[set,add]Element from bool to void
 with expcetion throwing.

---
 src/TNL/Matrices/Dense.h                   |  4 ++--
 src/TNL/Matrices/Dense.hpp                 | 20 +++++++++++---------
 src/TNL/Matrices/DenseMatrixView.h         |  4 ++--
 src/TNL/Matrices/DenseMatrixView.hpp       | 20 +++++++++++---------
 src/TNL/Matrices/Matrix.hpp                |  6 +++---
 src/TNL/Matrices/SparseMatrixView.h        |  4 ++--
 src/TNL/Matrices/SparseMatrixView.hpp      | 20 ++++++++++++--------
 src/TNL/Matrices/Tridiagonal.hpp           |  4 ++--
 src/TNL/Matrices/TridiagonalMatrixView.hpp |  2 --
 9 files changed, 45 insertions(+), 39 deletions(-)

diff --git a/src/TNL/Matrices/Dense.h b/src/TNL/Matrices/Dense.h
index 778fd0bd4..485c94701 100644
--- a/src/TNL/Matrices/Dense.h
+++ b/src/TNL/Matrices/Dense.h
@@ -107,11 +107,11 @@ class Dense : public Matrix< Real, Device, Index >
       const Real& operator()( const IndexType row,
                               const IndexType column ) const;
 
-      bool setElement( const IndexType row,
+      void setElement( const IndexType row,
                        const IndexType column,
                        const RealType& value );
 
-      bool addElement( const IndexType row,
+      void addElement( const IndexType row,
                        const IndexType column,
                        const RealType& value,
                        const RealType& thisElementMultiplicator = 1.0 );
diff --git a/src/TNL/Matrices/Dense.hpp b/src/TNL/Matrices/Dense.hpp
index c4deeb6fa..7e6f37948 100644
--- a/src/TNL/Matrices/Dense.hpp
+++ b/src/TNL/Matrices/Dense.hpp
@@ -289,12 +289,13 @@ template< typename Real,
           typename Index,
           bool RowMajorOrder,
           typename RealAllocator >
-bool Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::setElement( const IndexType row,
-                                               const IndexType column,
-                                               const RealType& value )
+void
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
+setElement( const IndexType row,
+            const IndexType column,
+            const RealType& value )
 {
    this->values.setElement( this->getElementIndex( row, column ), value );
-   return true;
 }
 
 template< typename Real,
@@ -302,10 +303,12 @@ template< typename Real,
           typename Index,
           bool RowMajorOrder,
           typename RealAllocator >
-bool Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::addElement( const IndexType row,
-                                                        const IndexType column,
-                                                        const RealType& value,
-                                                        const RealType& thisElementMultiplicator )
+void
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
+addElement( const IndexType row,
+            const IndexType column,
+            const RealType& value,
+            const RealType& thisElementMultiplicator )
 {
    const IndexType elementIndex = this->getElementIndex( row, column );
    if( thisElementMultiplicator == 1.0 )
@@ -314,7 +317,6 @@ bool Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::addElement( con
    else
       this->values.setElement( elementIndex,
                                thisElementMultiplicator * this->values.getElement( elementIndex ) + value );
-   return true;
 }
 
 template< typename Real,
diff --git a/src/TNL/Matrices/DenseMatrixView.h b/src/TNL/Matrices/DenseMatrixView.h
index 23f5d7317..9bad424b2 100644
--- a/src/TNL/Matrices/DenseMatrixView.h
+++ b/src/TNL/Matrices/DenseMatrixView.h
@@ -111,11 +111,11 @@ class DenseMatrixView : public MatrixView< Real, Device, Index >
       const Real& operator()( const IndexType row,
                               const IndexType column ) const;
 
-      bool setElement( const IndexType row,
+      void setElement( const IndexType row,
                        const IndexType column,
                        const RealType& value );
 
-      bool addElement( const IndexType row,
+      void addElement( const IndexType row,
                        const IndexType column,
                        const RealType& value,
                        const RealType& thisElementMultiplicator = 1.0 );
diff --git a/src/TNL/Matrices/DenseMatrixView.hpp b/src/TNL/Matrices/DenseMatrixView.hpp
index 48c0ccdc3..21f6d79ef 100644
--- a/src/TNL/Matrices/DenseMatrixView.hpp
+++ b/src/TNL/Matrices/DenseMatrixView.hpp
@@ -240,22 +240,25 @@ template< typename Real,
           typename Device,
           typename Index,
           bool RowMajorOrder >
-bool DenseMatrixView< Real, Device, Index, RowMajorOrder >::setElement( const IndexType row,
-                                               const IndexType column,
-                                               const RealType& value )
+void
+DenseMatrixView< Real, Device, Index, RowMajorOrder >::
+setElement( const IndexType row,
+            const IndexType column,
+            const RealType& value )
 {
    this->values.setElement( this->getElementIndex( row, column ), value );
-   return true;
 }
 
 template< typename Real,
           typename Device,
           typename Index,
           bool RowMajorOrder >
-bool DenseMatrixView< Real, Device, Index, RowMajorOrder >::addElement( const IndexType row,
-                                                        const IndexType column,
-                                                        const RealType& value,
-                                                        const RealType& thisElementMultiplicator )
+void
+DenseMatrixView< Real, Device, Index, RowMajorOrder >::
+addElement( const IndexType row,
+            const IndexType column,
+            const RealType& value,
+            const RealType& thisElementMultiplicator )
 {
    const IndexType elementIndex = this->getElementIndex( row, column );
    if( thisElementMultiplicator == 1.0 )
@@ -264,7 +267,6 @@ bool DenseMatrixView< Real, Device, Index, RowMajorOrder >::addElement( const In
    else
       this->values.setElement( elementIndex,
                                thisElementMultiplicator * this->values.getElement( elementIndex ) + value );
-   return true;
 }
 
 template< typename Real,
diff --git a/src/TNL/Matrices/Matrix.hpp b/src/TNL/Matrices/Matrix.hpp
index 0710ca829..2d5906d23 100644
--- a/src/TNL/Matrices/Matrix.hpp
+++ b/src/TNL/Matrices/Matrix.hpp
@@ -139,12 +139,12 @@ getValues() const
 {
    return this->values;
 }
-   
+
 template< typename Real,
           typename Device,
           typename Index,
           typename RealAllocator >
-typename Matrix< Real, Device, Index, RealAllocator >::ValuesVector& 
+typename Matrix< Real, Device, Index, RealAllocator >::ValuesVector&
 Matrix< Real, Device, Index, RealAllocator >::
 getValues()
 {
@@ -237,7 +237,7 @@ template< typename Real,
           typename Device,
           typename Index,
           typename RealAllocator >
-void 
+void
 Matrix< Real, Device, Index, RealAllocator >::
 computeColorsVector(Containers::Vector<Index, Device, Index> &colorsVector)
 {
diff --git a/src/TNL/Matrices/SparseMatrixView.h b/src/TNL/Matrices/SparseMatrixView.h
index 1f587acf3..aba3b4642 100644
--- a/src/TNL/Matrices/SparseMatrixView.h
+++ b/src/TNL/Matrices/SparseMatrixView.h
@@ -92,11 +92,11 @@ class SparseMatrixView : public MatrixView< Real, Device, Index >
       __cuda_callable__
       RowView getRow( const IndexType& rowIdx );
 
-      bool setElement( const IndexType row,
+      void setElement( const IndexType row,
                        const IndexType column,
                        const RealType& value );
 
-      bool addElement( const IndexType row,
+      void addElement( const IndexType row,
                        const IndexType column,
                        const RealType& value,
                        const RealType& thisElementMultiplicator = 1.0 );
diff --git a/src/TNL/Matrices/SparseMatrixView.hpp b/src/TNL/Matrices/SparseMatrixView.hpp
index d836fe5e9..3b192b4e9 100644
--- a/src/TNL/Matrices/SparseMatrixView.hpp
+++ b/src/TNL/Matrices/SparseMatrixView.hpp
@@ -54,7 +54,7 @@ auto
 SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
 getView() -> ViewType
 {
-   return ViewType( this->getRows(), 
+   return ViewType( this->getRows(),
                     this->getColumns(),
                     this->getValues().getView(),
                     this->columnIndexes.getView(),
@@ -204,13 +204,13 @@ template< typename Real,
           typename Index,
           typename MatrixType,
           template< typename, typename > class SegmentsView >
-bool
+void
 SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
 setElement( const IndexType row,
             const IndexType column,
             const RealType& value )
 {
-   return this->addElement( row, column, value, 0.0 );
+   this->addElement( row, column, value, 0.0 );
 }
 
 template< typename Real,
@@ -218,7 +218,7 @@ template< typename Real,
           typename Index,
           typename MatrixType,
           template< typename, typename > class SegmentsView >
-bool
+void
 SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
 addElement( const IndexType row,
             const IndexType column,
@@ -244,18 +244,22 @@ addElement( const IndexType row,
       if( col == column )
       {
          this->values.setElement( globalIdx, thisElementMultiplicator * this->values.getElement( globalIdx ) + value );
-         return true;
+         return;
       }
       if( col == this->getPaddingIndex() || col > column )
          break;
    }
    if( i == rowSize )
-      return false;
+   {
+      std::stringstream msg;
+      msg << "The capacity of the sparse matrix row number "  << row << " was exceeded.";
+      throw std::logic_error( msg.str() );
+   }
    if( col == this->getPaddingIndex() )
    {
       this->columnIndexes.setElement( globalIdx, column );
       this->values.setElement( globalIdx, value );
-      return true;
+      return;
    }
    else
    {
@@ -273,7 +277,7 @@ addElement( const IndexType row,
 
       this->columnIndexes.setElement( globalIdx, column );
       this->values.setElement( globalIdx, value );
-      return true;
+      return;
    }
 }
 
diff --git a/src/TNL/Matrices/Tridiagonal.hpp b/src/TNL/Matrices/Tridiagonal.hpp
index 41d722c6a..8f4f4e190 100644
--- a/src/TNL/Matrices/Tridiagonal.hpp
+++ b/src/TNL/Matrices/Tridiagonal.hpp
@@ -404,7 +404,7 @@ template< typename Real,
           typename RealAllocator >
 template< typename Vector >
 __cuda_callable__
-typename Vector::RealType 
+typename Vector::RealType
 Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
 rowVectorProduct( const IndexType row, const Vector& vector ) const
 {
@@ -418,7 +418,7 @@ template< typename Real,
           typename RealAllocator >
    template< typename InVector,
              typename OutVector >
-void 
+void
 Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
 vectorProduct( const InVector& inVector, OutVector& outVector ) const
 {
diff --git a/src/TNL/Matrices/TridiagonalMatrixView.hpp b/src/TNL/Matrices/TridiagonalMatrixView.hpp
index e851d2a1f..008becb09 100644
--- a/src/TNL/Matrices/TridiagonalMatrixView.hpp
+++ b/src/TNL/Matrices/TridiagonalMatrixView.hpp
@@ -228,7 +228,6 @@ setElement( const IndexType row, const IndexType column, const RealType& value )
       throw std::logic_error( msg.str() );
    }
    this->values.setElement( this->getElementIndex( row, column ), value );
-   return true;
 }
 
 template< typename Real,
@@ -254,7 +253,6 @@ addElement( const IndexType row,
    }
    const Index i = this->getElementIndex( row, column );
    this->values.setElement( i, thisElementMultiplicator * this->values.getElement( i ) + value );
-   return true;
 }
 
 template< typename Real,
-- 
GitLab


From cdc03609b8f270645060bbb08db97cac64b10beb Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Wed, 15 Jan 2020 14:45:58 +0100
Subject: [PATCH 097/179] Renaming nonEmptyRows to nonemptyRows in tridiagonal
 matrix indexer.

---
 src/TNL/Matrices/TridiagonalMatrixView.hpp     |  4 ++--
 .../details/TridiagonalMatrixIndexer.h         | 18 +++++++++---------
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/TNL/Matrices/TridiagonalMatrixView.hpp b/src/TNL/Matrices/TridiagonalMatrixView.hpp
index 008becb09..d8fa6061c 100644
--- a/src/TNL/Matrices/TridiagonalMatrixView.hpp
+++ b/src/TNL/Matrices/TridiagonalMatrixView.hpp
@@ -326,7 +326,7 @@ void
 TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
 allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const
 {
-   this->rowsReduction( 0, this->indexer.getNonEmptyRowsCount(), fetch, reduce, keep, zero );
+   this->rowsReduction( 0, this->indexer.getNonemptyRowsCount(), fetch, reduce, keep, zero );
 }
 
 template< typename Real,
@@ -418,7 +418,7 @@ void
 TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
 forAllRows( Function& function )
 {
-   this->forRows( 0, this->indexer.getNonEmptyRowsCount(), function );
+   this->forRows( 0, this->indexer.getNonemptyRowsCount(), function );
 }
 
 template< typename Real,
diff --git a/src/TNL/Matrices/details/TridiagonalMatrixIndexer.h b/src/TNL/Matrices/details/TridiagonalMatrixIndexer.h
index 6d3377b4f..64beb44f7 100644
--- a/src/TNL/Matrices/details/TridiagonalMatrixIndexer.h
+++ b/src/TNL/Matrices/details/TridiagonalMatrixIndexer.h
@@ -26,21 +26,21 @@ class TridiagonalMatrixIndexer
 
       __cuda_callable__
       TridiagonalMatrixIndexer()
-      : rows( 0 ), columns( 0 ), nonEmptyRows( 0 ){};
+      : rows( 0 ), columns( 0 ), nonemptyRows( 0 ){};
 
       __cuda_callable__
       TridiagonalMatrixIndexer( const IndexType& rows, const IndexType& columns )
-      : rows( rows ), columns( columns ), nonEmptyRows( TNL::min( rows, columns ) + ( rows > columns ) ) {};
+      : rows( rows ), columns( columns ), nonemptyRows( TNL::min( rows, columns ) + ( rows > columns ) ) {};
 
       __cuda_callable__
       TridiagonalMatrixIndexer( const TridiagonalMatrixIndexer& indexer )
-      : rows( indexer.rows ), columns( indexer.columns ), nonEmptyRows( indexer.nonEmptyRows ) {};
+      : rows( indexer.rows ), columns( indexer.columns ), nonemptyRows( indexer.nonemptyRows ) {};
 
       void setDimensions( const IndexType& rows, const IndexType& columns )
       {
          this->rows = rows;
          this->columns = columns;
-         this->nonEmptyRows = min( rows, columns ) + ( rows > columns );
+         this->nonemptyRows = min( rows, columns ) + ( rows > columns );
       };
 
       __cuda_callable__
@@ -65,9 +65,9 @@ class TridiagonalMatrixIndexer
       const IndexType& getColumns() const { return this->columns; };
 
       __cuda_callable__
-      const IndexType& getNonEmptyRowsCount() const { return this->nonEmptyRows; };
+      const IndexType& getNonemptyRowsCount() const { return this->nonemptyRows; };
       __cuda_callable__
-      IndexType getStorageSize() const { return 3 * this->nonEmptyRows; };
+      IndexType getStorageSize() const { return 3 * this->nonemptyRows; };
 
       __cuda_callable__
       IndexType getGlobalIndex( const Index rowIdx, const Index localIdx ) const
@@ -76,16 +76,16 @@ class TridiagonalMatrixIndexer
          TNL_ASSERT_LT( localIdx, 3, "" );
          TNL_ASSERT_GE( rowIdx, 0, "" );
          TNL_ASSERT_LT( rowIdx, this->rows, "" );
-         
+
          if( RowMajorOrder )
             return 3 * rowIdx + localIdx;
          else
-            return localIdx * nonEmptyRows + rowIdx;
+            return localIdx * nonemptyRows + rowIdx;
       };
 
       protected:
 
-         IndexType rows, columns, nonEmptyRows;
+         IndexType rows, columns, nonemptyRows;
 };
       } //namespace details
    } // namespace Materices
-- 
GitLab


From fcb368b75abc9fbce5a0e4cf8e31c18935fc1fdb Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Wed, 15 Jan 2020 18:05:53 +0100
Subject: [PATCH 098/179] Forwarding dense matrix methods calls to
 DenseMatrixView.

---
 src/TNL/Containers/Segments/EllpackView.h   |   2 +
 src/TNL/Containers/Segments/EllpackView.hpp |  13 ++
 src/TNL/Matrices/Dense.h                    |   4 +-
 src/TNL/Matrices/Dense.hpp                  | 136 +++++---------------
 src/TNL/Matrices/DenseMatrixView.h          |  10 --
 src/TNL/Matrices/DenseMatrixView.hpp        |  77 ++++-------
 src/UnitTests/Matrices/DenseMatrixTest.h    |   4 +-
 7 files changed, 71 insertions(+), 175 deletions(-)

diff --git a/src/TNL/Containers/Segments/EllpackView.h b/src/TNL/Containers/Segments/EllpackView.h
index 737810498..f64b04068 100644
--- a/src/TNL/Containers/Segments/EllpackView.h
+++ b/src/TNL/Containers/Segments/EllpackView.h
@@ -104,6 +104,8 @@ class EllpackView
       template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
       void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const;
 
+      EllpackView& operator=( const EllpackView& view );
+
       void save( File& file ) const;
 
       void load( File& file );
diff --git a/src/TNL/Containers/Segments/EllpackView.hpp b/src/TNL/Containers/Segments/EllpackView.hpp
index 21be88654..719a349a9 100644
--- a/src/TNL/Containers/Segments/EllpackView.hpp
+++ b/src/TNL/Containers/Segments/EllpackView.hpp
@@ -291,6 +291,19 @@ allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Re
    this->segmentsReduction( 0, this->getSegmentsCount(), fetch, reduction, keeper, zero, args... );
 }
 
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int Alignment >
+EllpackView< Device, Index, RowMajorOrder, Alignment >&
+EllpackView< Device, Index, RowMajorOrder, Alignment >::
+operator=( const EllpackView< Device, Index, RowMajorOrder, Alignment >& view )
+{
+   this->segmentSize = view.segmentSize;
+   this->size = view.size;
+   this->alignedSize = view.alignedSize;
+}
+
 template< typename Device,
           typename Index,
           bool RowMajorOrder,
diff --git a/src/TNL/Matrices/Dense.h b/src/TNL/Matrices/Dense.h
index 485c94701..2e71316e9 100644
--- a/src/TNL/Matrices/Dense.h
+++ b/src/TNL/Matrices/Dense.h
@@ -168,7 +168,7 @@ class Dense : public Matrix< Real, Device, Index >
                                 const RealType& omega = 1.0 ) const;
 
       // copy assignment
-      //Dense& operator=( const Dense& matrix );
+      Dense& operator=( const Dense& matrix );
 
       // cross-device copy assignment
       template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAlocator_ >
@@ -200,6 +200,8 @@ class Dense : public Matrix< Real, Device, Index >
       friend class DenseDeviceDependentCode< DeviceType >;
 
       SegmentsType segments;
+
+      ViewType view;
 };
 
 } // namespace Matrices
diff --git a/src/TNL/Matrices/Dense.hpp b/src/TNL/Matrices/Dense.hpp
index 7e6f37948..b8fa96907 100644
--- a/src/TNL/Matrices/Dense.hpp
+++ b/src/TNL/Matrices/Dense.hpp
@@ -76,10 +76,7 @@ String
 Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
 getSerializationType()
 {
-   return String( "Matrices::Dense< " ) +
-          TNL::getSerializationType< RealType >() + ", [any_device], " +
-          TNL::getSerializationType< IndexType >() + ", " +
-          ( RowMajorOrder ? "true" : "false" ) + ", [any_allocator] >";
+   return ViewType::getSerializationType();
 }
 
 template< typename Real,
@@ -108,6 +105,7 @@ setDimensions( const IndexType rows,
    this->segments.setSegmentsSizes( rows, columns );
    this->values.setSize( rows * columns );
    this->values = 0.0;
+   this->view = this->getView();
 }
 
 template< typename Real,
@@ -145,19 +143,7 @@ void
 Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
 getCompressedRowLengths( Vector& rowLengths ) const
 {
-   rowLengths.setSize( this->getRows() );
-   rowLengths = 0;
-   auto rowLengths_view = rowLengths.getView();
-   auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, const RealType& value ) -> IndexType {
-      return ( value != 0.0 );
-   };
-   auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) {
-      aux += a;
-   };
-   auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable {
-      rowLengths_view[ rowIdx ] = value;
-   };
-   this->allRowsReduction( fetch, reduce, keep, 0 );
+   this->view.getCompressedRowLengths( rowLengths );
 }
 
 template< typename Real,
@@ -197,11 +183,7 @@ template< typename Real,
           typename RealAllocator >
 Index Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getNumberOfNonzeroMatrixElements() const
 {
-   const auto values_view = this->values.getConstView();
-   auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType {
-      return ( values_view[ i ] != 0.0 );
-   };
-   return Algorithms::Reduction< DeviceType >::reduce( this->values.getSize(), std::plus<>{}, fetch, 0 );
+   return this->view.getNumberOfNonzeroMatrixElements();
 }
 
 template< typename Real,
@@ -221,7 +203,7 @@ template< typename Real,
           typename RealAllocator >
 void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::setValue( const Real& value )
 {
-   this->values = value;
+   this->view.setValue( value );
 }
 
 template< typename Real,
@@ -233,8 +215,7 @@ __cuda_callable__ auto
 Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
 getRow( const IndexType& rowIdx ) const -> const RowView
 {
-   TNL_ASSERT_LT( rowIdx, this->getRows(), "Row index is larger than number of matrix rows." );
-   return RowView( this->segments.getSegmentView( rowIdx ), this->values.getView() );
+   return this->view.getRow( rowIdx );
 }
 
 template< typename Real,
@@ -246,8 +227,7 @@ __cuda_callable__ auto
 Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
 getRow( const IndexType& rowIdx ) -> RowView
 {
-   TNL_ASSERT_LT( rowIdx, this->getRows(), "Row index is larger than number of matrix rows." );
-   return RowView( this->segments.getSegmentView( rowIdx ), this->values.getView() );
+   return this->view.getRow( rowIdx );
 }
 
 template< typename Real,
@@ -259,12 +239,7 @@ __cuda_callable__
 Real& Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::operator()( const IndexType row,
                                                 const IndexType column )
 {
-   TNL_ASSERT_GE( row, 0, "Row index must be non-negative." );
-   TNL_ASSERT_LT( row, this->getRows(), "Row index is out of bounds." );
-   TNL_ASSERT_GE( column, 0, "Column index must be non-negative." );
-   TNL_ASSERT_LT( column, this->getColumns(), "Column index is out of bounds." );
-
-   return this->values.operator[]( this->getElementIndex( row, column ) );
+   return this->view.operator()( row, column );
 }
 
 template< typename Real,
@@ -276,12 +251,7 @@ __cuda_callable__
 const Real& Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::operator()( const IndexType row,
                                                       const IndexType column ) const
 {
-   TNL_ASSERT_GE( row, 0, "Row index must be non-negative." );
-   TNL_ASSERT_LT( row, this->getRows(), "Row index is out of bounds." );
-   TNL_ASSERT_GE( column, 0, "Column index must be non-negative." );
-   TNL_ASSERT_LT( column, this->getColumns(), "Column index is out of bounds." );
-
-   return this->values.operator[]( this->getElementIndex( row, column ) );
+   return this->view.operator()( row, column );
 }
 
 template< typename Real,
@@ -295,7 +265,7 @@ setElement( const IndexType row,
             const IndexType column,
             const RealType& value )
 {
-   this->values.setElement( this->getElementIndex( row, column ), value );
+   this->view.setElement( row, column, value );
 }
 
 template< typename Real,
@@ -310,13 +280,7 @@ addElement( const IndexType row,
             const RealType& value,
             const RealType& thisElementMultiplicator )
 {
-   const IndexType elementIndex = this->getElementIndex( row, column );
-   if( thisElementMultiplicator == 1.0 )
-      this->values.setElement( elementIndex,
-                               this->values.getElement( elementIndex ) + value );
-   else
-      this->values.setElement( elementIndex,
-                               thisElementMultiplicator * this->values.getElement( elementIndex ) + value );
+   this->view.addElement( row, column, value, thisElementMultiplicator );
 }
 
 template< typename Real,
@@ -329,7 +293,7 @@ Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
 getElement( const IndexType row,
             const IndexType column ) const
 {
-   return this->values.getElement( this->getElementIndex( row, column ) );
+   return this->view.getElement( row, column );
 }
 
 template< typename Real,
@@ -342,12 +306,7 @@ void
 Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
 rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchValue& zero ) const
 {
-   const auto values_view = this->values.getConstView();
-   auto fetch_ = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx, bool& compute ) mutable -> decltype( fetch( IndexType(), IndexType(), RealType() ) ) {
-         return fetch( rowIdx, columnIdx, values_view[ globalIdx ] );
-      return zero;
-   };
-   this->segments.segmentsReduction( first, last, fetch_, reduce, keep, zero );
+   this->view.rowsReduction( first, last, fetch, reduce, keep, zero );
 }
 
 template< typename Real,
@@ -373,12 +332,7 @@ void
 Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
 forRows( IndexType first, IndexType last, Function& function ) const
 {
-   const auto values_view = this->values.getConstView();
-   auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx ) mutable -> bool {
-      function( rowIdx, columnIdx, globalIdx, values_view[ globalIdx ] );
-      return true;
-   };
-   this->segments.forSegments( first, last, f );
+   this->view.forRows( first, last, function );
 }
 
 template< typename Real,
@@ -391,12 +345,7 @@ void
 Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
 forRows( IndexType first, IndexType last, Function& function )
 {
-   auto values_view = this->values.getView();
-   auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx ) mutable -> bool {
-      function( rowIdx, columnIdx, globalIdx, values_view[ globalIdx ] );
-      return true;
-   };
-   this->segments.forSegments( first, last, f );
+   this->view.forRows( first, last, function );
 }
 
 template< typename Real,
@@ -435,11 +384,7 @@ __cuda_callable__
 typename Vector::RealType Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::rowVectorProduct( const IndexType row,
                                                                                    const Vector& vector ) const
 {
-   RealType sum( 0.0 );
-   // TODO: Fix this
-   //for( IndexType column = 0; column < this->getColumns(); column++ )
-   //   sum += this->getElementFast( row, column ) * vector[ column ];
-   return sum;
+   return this->view.rowVectorProduct( row, vector );
 }
 
 template< typename Real,
@@ -453,27 +398,7 @@ void
 Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
 vectorProduct( const InVector& inVector, OutVector& outVector ) const
 {
-   TNL_ASSERT( this->getColumns() == inVector.getSize(),
-            std::cerr << "Matrix columns: " << this->getColumns() << std::endl
-                 << "Vector size: " << inVector.getSize() << std::endl );
-   TNL_ASSERT( this->getRows() == outVector.getSize(),
-               std::cerr << "Matrix rows: " << this->getRows() << std::endl
-                    << "Vector size: " << outVector.getSize() << std::endl );
-
-   //DeviceDependentCode::vectorProduct( *this, inVector, outVector );
-   const auto inVectorView = inVector.getConstView();
-   auto outVectorView = outVector.getView();
-   const auto valuesView = this->values.getConstView();
-   auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType column, IndexType offset, bool& compute ) -> RealType {
-      return valuesView[ offset ] * inVectorView[ column ];
-   };
-   auto reduction = [] __cuda_callable__ ( RealType& sum, const RealType& value ) {
-      sum += value;
-   };
-   auto keeper = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable {
-      outVectorView[ row ] = value;
-   };
-   this->segments.segmentsReduction( 0, this->getRows(), fetch, reduction, keeper, ( RealType ) 0.0 );
+   this->view.vectorProduct( inVector, outVector );
 }
 
 template< typename Real,
@@ -959,7 +884,7 @@ void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::performSORItera
    x[ row ] = ( 1.0 - omega ) * x[ row ] + omega / diagonalValue * ( b[ row ] - sum );
 }
 
-/*template< typename Real,
+template< typename Real,
           typename Device,
           typename Index,
           bool RowMajorOrder,
@@ -968,7 +893,9 @@ Dense< Real, Device, Index, RowMajorOrder, RealAllocator >&
 Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
 operator=( const Dense< Real, Device, Index, RowMajorOrder, RealAllocator >& matrix )
 {
-   const IndexType bufferRowsCount( 128 );
+   setLike( matrix );
+   this->values = matrix.values;
+   /*const IndexType bufferRowsCount( 128 );
    const IndexType columns = this->getColumns();
    const size_t bufferSize = bufferRowsCount * columns;
    Containers::Vector< RealType, Device, IndexType, RealAllocatorType > sourceValuesBuffer( bufferSize );
@@ -1000,8 +927,8 @@ operator=( const Dense< Real, Device, Index, RowMajorOrder, RealAllocator >& mat
       this->forRows( baseRow, lastRow, f2 );
       baseRow += bufferRowsCount;
    }
-   return *this;
-}*/
+   return *this;*/
+}
 
 template< typename Real,
           typename Device,
@@ -1101,7 +1028,7 @@ template< typename Real,
           typename RealAllocator >
 void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::save( const String& fileName ) const
 {
-   Object::save( fileName );
+   this->view.save( fileName );
 }
 
 template< typename Real,
@@ -1121,7 +1048,7 @@ template< typename Real,
           typename RealAllocator >
 void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::save( File& file ) const
 {
-   Matrix< Real, Device, Index >::save( file );
+   this->view.save( file );
 }
 
 template< typename Real,
@@ -1141,13 +1068,7 @@ template< typename Real,
           typename RealAllocator >
 void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::print( std::ostream& str ) const
 {
-   for( IndexType row = 0; row < this->getRows(); row++ )
-   {
-      str <<"Row: " << row << " -> ";
-      for( IndexType column = 0; column < this->getColumns(); column++ )
-         str << " Col:" << column << "->" << this->getElement( row, column ) << "\t";
-      str << std::endl;
-   }
+   this->view.print( str );
 }
 
 template< typename Real,
@@ -1156,8 +1077,9 @@ template< typename Real,
           bool RowMajorOrder,
           typename RealAllocator >
 __cuda_callable__
-Index Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getElementIndex( const IndexType row,
-                                                              const IndexType column ) const
+Index
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
+getElementIndex( const IndexType row, const IndexType column ) const
 {
    return this->segments.getGlobalIndex( row, column );
 }
diff --git a/src/TNL/Matrices/DenseMatrixView.h b/src/TNL/Matrices/DenseMatrixView.h
index 9bad424b2..d963dd7c3 100644
--- a/src/TNL/Matrices/DenseMatrixView.h
+++ b/src/TNL/Matrices/DenseMatrixView.h
@@ -171,22 +171,12 @@ class DenseMatrixView : public MatrixView< Real, Device, Index >
                                 Vector2& x,
                                 const RealType& omega = 1.0 ) const;
 
-      // copy assignment
       DenseMatrixView& operator=( const DenseMatrixView& matrix );
 
-      // cross-device copy assignment
-      template< typename Real2, typename Device2, typename Index2,
-                typename = typename Enabler< Device2 >::type >
-      DenseMatrixView& operator=( const DenseMatrixView< Real2, Device2, Index2 >& matrix );
-
       void save( const String& fileName ) const;
 
-      void load( const String& fileName );
-
       void save( File& file ) const;
 
-      void load( File& file );
-
       void print( std::ostream& str ) const;
 
    protected:
diff --git a/src/TNL/Matrices/DenseMatrixView.hpp b/src/TNL/Matrices/DenseMatrixView.hpp
index 21f6d79ef..527915d55 100644
--- a/src/TNL/Matrices/DenseMatrixView.hpp
+++ b/src/TNL/Matrices/DenseMatrixView.hpp
@@ -83,7 +83,7 @@ getSerializationType()
 {
    return String( "Matrices::Dense< " ) +
           TNL::getSerializationType< RealType >() + ", [any_device], " +
-          TNL::getSerializationType< IndexType >() +
+          TNL::getSerializationType< IndexType >() + ", " +
           ( RowMajorOrder ? "true" : "false" ) + ", [any_allocator] >";
 }
 
@@ -321,7 +321,7 @@ forRows( IndexType first, IndexType last, Function& function ) const
 {
    const auto values_view = this->values.getConstView();
    auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx ) mutable -> bool {
-      function( rowIdx, columnIdx, values_view[ globalIdx ] );
+      function( rowIdx, columnIdx, columnIdx, values_view[ globalIdx ] );
       return true;
    };
    this->segments.forSegments( first, last, f );
@@ -339,7 +339,7 @@ forRows( IndexType first, IndexType last, Function& function )
 {
    auto values_view = this->values.getView();
    auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx ) mutable -> bool {
-      function( rowIdx, columnIdx, values_view[ globalIdx ] );
+      function( rowIdx, columnIdx, globalIdx, values_view[ globalIdx ] );
       return true;
    };
    this->segments.forSegments( first, last, f );
@@ -395,14 +395,22 @@ template< typename Real,
 void DenseMatrixView< Real, Device, Index, RowMajorOrder >::vectorProduct( const InVector& inVector,
                                                            OutVector& outVector ) const
 {
-   TNL_ASSERT( this->getColumns() == inVector.getSize(),
-            std::cerr << "Matrix columns: " << this->getColumns() << std::endl
-                 << "Vector size: " << inVector.getSize() << std::endl );
-   TNL_ASSERT( this->getRows() == outVector.getSize(),
-               std::cerr << "Matrix rows: " << this->getRows() << std::endl
-                    << "Vector size: " << outVector.getSize() << std::endl );
-
-   //DeviceDependentCode::vectorProduct( *this, inVector, outVector );
+   TNL_ASSERT_EQ( this->getColumns(), inVector.getSize(), "Matrix columns count differs with input vector size." );
+   TNL_ASSERT_EQ( this->getRows(), outVector.getSize(), "Matrix rows count differs with output vector size." );
+
+   const auto inVectorView = inVector.getConstView();
+   auto outVectorView = outVector.getView();
+   const auto valuesView = this->values.getConstView();
+   auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType column, IndexType offset, bool& compute ) -> RealType {
+      return valuesView[ offset ] * inVectorView[ column ];
+   };
+   auto reduction = [] __cuda_callable__ ( RealType& sum, const RealType& value ) {
+      sum += value;
+   };
+   auto keeper = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable {
+      outVectorView[ row ] = value;
+   };
+   this->segments.segmentsReduction( 0, this->getRows(), fetch, reduction, keeper, ( RealType ) 0.0 );
 }
 
 template< typename Real,
@@ -885,39 +893,18 @@ void DenseMatrixView< Real, Device, Index, RowMajorOrder >::performSORIteration(
 }
 
 
-// copy assignment
 template< typename Real,
           typename Device,
           typename Index,
           bool RowMajorOrder >
 DenseMatrixView< Real, Device, Index, RowMajorOrder >&
-DenseMatrixView< Real, Device, Index, RowMajorOrder >::operator=( const DenseMatrixView& matrix )
-{
-   this->setLike( matrix );
-   this->values = matrix.values;
-   return *this;
-}
-
-// cross-device copy assignment
-template< typename Real,
-          typename Device,
-          typename Index,
-          bool RowMajorOrder >
-   template< typename Real2, typename Device2, typename Index2, typename >
-DenseMatrixView< Real, Device, Index, RowMajorOrder >&
-DenseMatrixView< Real, Device, Index, RowMajorOrder >::operator=( const DenseMatrixView< Real2, Device2, Index2 >& matrix )
+DenseMatrixView< Real, Device, Index, RowMajorOrder >::
+operator=( const DenseMatrixView& matrix )
 {
-   static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Cuda >::value,
-                  "unknown device" );
-   static_assert( std::is_same< Device2, Devices::Host >::value || std::is_same< Device2, Devices::Cuda >::value,
-                  "unknown device" );
-
-   this->setLike( matrix );
-
-   throw Exceptions::NotImplementedError("Cross-device assignment for the Dense format is not implemented yet.");
+   MatrixView< Real, Device, Index >::operator=( matrix );
+   this->segments = matrix.segments;
 }
 
-
 template< typename Real,
           typename Device,
           typename Index,
@@ -927,15 +914,6 @@ void DenseMatrixView< Real, Device, Index, RowMajorOrder >::save( const String&
    Object::save( fileName );
 }
 
-template< typename Real,
-          typename Device,
-          typename Index,
-          bool RowMajorOrder >
-void DenseMatrixView< Real, Device, Index, RowMajorOrder >::load( const String& fileName )
-{
-   Object::load( fileName );
-}
-
 template< typename Real,
           typename Device,
           typename Index,
@@ -945,15 +923,6 @@ void DenseMatrixView< Real, Device, Index, RowMajorOrder >::save( File& file ) c
    MatrixView< Real, Device, Index >::save( file );
 }
 
-template< typename Real,
-          typename Device,
-          typename Index,
-          bool RowMajorOrder >
-void DenseMatrixView< Real, Device, Index, RowMajorOrder >::load( File& file )
-{
-   MatrixView< Real, Device, Index >::load( file );
-}
-
 template< typename Real,
           typename Device,
           typename Index,
diff --git a/src/UnitTests/Matrices/DenseMatrixTest.h b/src/UnitTests/Matrices/DenseMatrixTest.h
index 0f7158010..a3e7e8f61 100644
--- a/src/UnitTests/Matrices/DenseMatrixTest.h
+++ b/src/UnitTests/Matrices/DenseMatrixTest.h
@@ -1234,9 +1234,7 @@ void test_SaveAndLoad()
 
     ASSERT_NO_THROW( savedMatrix.save( TEST_FILE_NAME ) );
 
-    Matrix loadedMatrix;
-    loadedMatrix.reset();
-    loadedMatrix.setDimensions( rows, cols );
+    Matrix loadedMatrix( rows, cols );
 
     ASSERT_NO_THROW( loadedMatrix.load( TEST_FILE_NAME ) );
 
-- 
GitLab


From 990b916aad57fafe6998c60df19b2285b97b83db Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Fri, 17 Jan 2020 14:53:56 +0100
Subject: [PATCH 099/179] Forwarding SparseMatrix calls to SparseMatrixView.

---
 src/TNL/Containers/Segments/CSRView.h         |   3 +
 src/TNL/Containers/Segments/CSRView.hpp       |  15 +-
 src/TNL/Containers/Segments/EllpackView.hpp   |  12 +-
 .../Containers/Segments/SlicedEllpackView.h   |   2 +
 .../Containers/Segments/SlicedEllpackView.hpp |  27 ++-
 src/TNL/Matrices/Dense.hpp                    |   6 +-
 src/TNL/Matrices/DenseMatrixView.hpp          |  10 +-
 src/TNL/Matrices/SparseMatrix.h               |  16 +-
 src/TNL/Matrices/SparseMatrix.hpp             | 155 ++++--------------
 src/TNL/Matrices/SparseMatrixView.h           |   2 +
 src/TNL/Matrices/SparseMatrixView.hpp         |  82 +++++----
 src/UnitTests/Matrices/Legacy/CMakeLists.txt  |  12 +-
 .../Matrices/Legacy/SparseMatrixCopyTest.h    |  36 ++--
 13 files changed, 174 insertions(+), 204 deletions(-)

diff --git a/src/TNL/Containers/Segments/CSRView.h b/src/TNL/Containers/Segments/CSRView.h
index a0f5cd200..2ad849f97 100644
--- a/src/TNL/Containers/Segments/CSRView.h
+++ b/src/TNL/Containers/Segments/CSRView.h
@@ -41,6 +41,7 @@ class CSRView
       __cuda_callable__
       CSRView( const OffsetsView& offsets );
 
+      __cuda_callable__
       CSRView( const OffsetsView&& offsets );
 
       __cuda_callable__
@@ -110,6 +111,8 @@ class CSRView
       template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
       void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const;
 
+      CSRView& operator=( const CSRView& view );
+
       void save( File& file ) const;
 
       void load( File& file );
diff --git a/src/TNL/Containers/Segments/CSRView.hpp b/src/TNL/Containers/Segments/CSRView.hpp
index 2d2b58331..cc4d16fe6 100644
--- a/src/TNL/Containers/Segments/CSRView.hpp
+++ b/src/TNL/Containers/Segments/CSRView.hpp
@@ -171,9 +171,9 @@ forSegments( IndexType first, IndexType last, Function& f, Args... args ) const
       const IndexType begin = offsetsView[ segmentIdx ];
       const IndexType end = offsetsView[ segmentIdx + 1 ];
       IndexType localIdx( 0 );
-      for( IndexType globalIdx = begin; globalIdx < end; globalIdx++  )
-         if( ! f( segmentIdx, localIdx++, globalIdx, args... ) )
-            break;
+      bool compute( true );
+      for( IndexType globalIdx = begin; globalIdx < end && compute; globalIdx++  )
+         f( segmentIdx, localIdx++, globalIdx, compute, args... );
    };
    Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
 }
@@ -220,6 +220,15 @@ allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Re
    this->segmentsReduction( 0, this->getSegmentsCount(), fetch, reduction, keeper, zero, args... );
 }
 
+template< typename Device,
+          typename Index >
+CSRView< Device, Index >&
+CSRView< Device, Index >::
+operator=( const CSRView& view )
+{
+   this->offsets.copy( view.offsets );
+}
+
 template< typename Device,
           typename Index >
 void
diff --git a/src/TNL/Containers/Segments/EllpackView.hpp b/src/TNL/Containers/Segments/EllpackView.hpp
index 719a349a9..c0d0b3721 100644
--- a/src/TNL/Containers/Segments/EllpackView.hpp
+++ b/src/TNL/Containers/Segments/EllpackView.hpp
@@ -202,9 +202,9 @@ forSegments( IndexType first, IndexType last, Function& f, Args... args ) const
          const IndexType begin = segmentIdx * segmentSize;
          const IndexType end = begin + segmentSize;
          IndexType localIdx( 0 );
-         for( IndexType globalIdx = begin; globalIdx < end; globalIdx++  )
-            if( ! f( segmentIdx, localIdx++, globalIdx,  args... ) )
-               break;
+         bool compute( true );
+         for( IndexType globalIdx = begin; globalIdx < end && compute; globalIdx++  )
+            f( segmentIdx, localIdx++, globalIdx, compute, args... );
       };
       Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
    }
@@ -216,9 +216,9 @@ forSegments( IndexType first, IndexType last, Function& f, Args... args ) const
          const IndexType begin = segmentIdx;
          const IndexType end = storageSize;
          IndexType localIdx( 0 );
-         for( IndexType globalIdx = begin; globalIdx < end; globalIdx += alignedSize )
-            if( ! f( segmentIdx, localIdx++, globalIdx, args... ) )
-               break;
+         bool compute( true );
+         for( IndexType globalIdx = begin; globalIdx < end && compute; globalIdx += alignedSize )
+            f( segmentIdx, localIdx++, globalIdx, compute, args... );
       };
       Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
    }
diff --git a/src/TNL/Containers/Segments/SlicedEllpackView.h b/src/TNL/Containers/Segments/SlicedEllpackView.h
index 86745e7c0..c8c73c3f2 100644
--- a/src/TNL/Containers/Segments/SlicedEllpackView.h
+++ b/src/TNL/Containers/Segments/SlicedEllpackView.h
@@ -106,6 +106,8 @@ class SlicedEllpackView
       template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
       void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const;
 
+      SlicedEllpackView& operator=( const SlicedEllpackView& view );
+
       void save( File& file ) const;
 
       void load( File& file );
diff --git a/src/TNL/Containers/Segments/SlicedEllpackView.hpp b/src/TNL/Containers/Segments/SlicedEllpackView.hpp
index 5f9cbdee3..98a3d9b81 100644
--- a/src/TNL/Containers/Segments/SlicedEllpackView.hpp
+++ b/src/TNL/Containers/Segments/SlicedEllpackView.hpp
@@ -14,6 +14,8 @@
 #include <TNL/Algorithms/ParallelFor.h>
 #include <TNL/Containers/Segments/SlicedEllpackView.h>
 
+#include "SlicedEllpackView.h"
+
 namespace TNL {
    namespace Containers {
       namespace Segments {
@@ -240,7 +242,7 @@ forSegments( IndexType first, IndexType last, Function& f, Args... args ) const
    const auto sliceOffsets_view = this->sliceOffsets.getConstView();
    if( RowMajorOrder )
    {
-      auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) {
+      auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable {
          const IndexType sliceIdx = segmentIdx / SliceSize;
          const IndexType segmentInSliceIdx = segmentIdx % SliceSize;
          const IndexType segmentSize = sliceSegmentSizes_view[ sliceIdx ];
@@ -249,14 +251,13 @@ forSegments( IndexType first, IndexType last, Function& f, Args... args ) const
          IndexType localIdx( 0 );
          bool compute( true );
          for( IndexType globalIdx = begin; globalIdx < end && compute; globalIdx++  )
-            if( ! f( segmentIdx, localIdx++, globalIdx, compute, args... ) )
-               break;
+            f( segmentIdx, localIdx++, globalIdx, compute, args... );
       };
       Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
    }
    else
    {
-      auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) {
+      auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable {
          const IndexType sliceIdx = segmentIdx / SliceSize;
          const IndexType segmentInSliceIdx = segmentIdx % SliceSize;
          const IndexType segmentSize = sliceSegmentSizes_view[ sliceIdx ];
@@ -265,8 +266,7 @@ forSegments( IndexType first, IndexType last, Function& f, Args... args ) const
          IndexType localIdx( 0 );
          bool compute( true );
          for( IndexType globalIdx = begin; globalIdx < end && compute; globalIdx += SliceSize )
-            if( ! f( segmentIdx, localIdx++, globalIdx, compute, args... ) )
-               break;
+            f( segmentIdx, localIdx++, globalIdx, compute, args... );
       };
       Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
    }
@@ -344,6 +344,21 @@ allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Re
    this->segmentsReduction( 0, this->getSegmentsCount(), fetch, reduction, keeper, zero, args... );
 }
 
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int SliceSize >
+SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >&
+SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >::
+operator=( const SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >& view )
+{
+   this->size = view.size;
+   this->alignedSize = view.alignedSize;
+   this->segmentsCount = view.segmentsCount;
+   this->sliceOffsets.copy( view.sliceOffsets );
+   this->sliceSegmentSizes.copy( view.sliceSegmentSizes );
+}
+
 template< typename Device,
           typename Index,
           bool RowMajorOrder,
diff --git a/src/TNL/Matrices/Dense.hpp b/src/TNL/Matrices/Dense.hpp
index b8fa96907..5504f6408 100644
--- a/src/TNL/Matrices/Dense.hpp
+++ b/src/TNL/Matrices/Dense.hpp
@@ -948,7 +948,7 @@ operator=( const Dense< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >
       if( std::is_same< DeviceType, Device_ >::value )
       {
          auto this_view = this->getView();
-         auto f = [=] __cuda_callable__ ( Index_ rowIdx, Index_ columnIdx, Index_ globalIdx, const Real_& value ) mutable {
+         auto f = [=] __cuda_callable__ ( Index_ rowIdx, Index_ columnIdx, Index_ globalIdx, const Real_& value, bool& compute ) mutable {
             this_view.getRow( rowIdx ).setElement( columnIdx, value );
          };
          matrix.forAllRows( f );
@@ -971,7 +971,7 @@ operator=( const Dense< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >
 
             ////
             // Copy matrix elements into buffer
-            auto f1 = [=] __cuda_callable__ ( Index_ rowIdx, Index_ columnIdx, Index_ globalIdx, const Real_& value ) mutable {
+            auto f1 = [=] __cuda_callable__ ( Index_ rowIdx, Index_ columnIdx, Index_ globalIdx, const Real_& value, bool& compute ) mutable {
                const IndexType bufferIdx = ( rowIdx - baseRow ) * columns + columnIdx;
                sourceValuesBuffer_view[ bufferIdx ] = value;
             };
@@ -981,7 +981,7 @@ operator=( const Dense< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >
 
             ////
             // Copy buffer to this matrix
-            auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx, RealType& value ) mutable {
+            auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx, RealType& value, bool& compute ) mutable {
                const IndexType bufferIdx = ( rowIdx - baseRow ) * columns + columnIdx;
                value = destinationValuesBuffer_view[ bufferIdx ];
             };
diff --git a/src/TNL/Matrices/DenseMatrixView.hpp b/src/TNL/Matrices/DenseMatrixView.hpp
index 527915d55..890606436 100644
--- a/src/TNL/Matrices/DenseMatrixView.hpp
+++ b/src/TNL/Matrices/DenseMatrixView.hpp
@@ -320,9 +320,8 @@ DenseMatrixView< Real, Device, Index, RowMajorOrder >::
 forRows( IndexType first, IndexType last, Function& function ) const
 {
    const auto values_view = this->values.getConstView();
-   auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx ) mutable -> bool {
-      function( rowIdx, columnIdx, columnIdx, values_view[ globalIdx ] );
-      return true;
+   auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx, bool& compute ) mutable {
+      function( rowIdx, columnIdx, columnIdx, values_view[ globalIdx ], compute );
    };
    this->segments.forSegments( first, last, f );
 
@@ -338,9 +337,8 @@ DenseMatrixView< Real, Device, Index, RowMajorOrder >::
 forRows( IndexType first, IndexType last, Function& function )
 {
    auto values_view = this->values.getView();
-   auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx ) mutable -> bool {
-      function( rowIdx, columnIdx, globalIdx, values_view[ globalIdx ] );
-      return true;
+   auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx, bool& compute ) mutable {
+      function( rowIdx, columnIdx, globalIdx, values_view[ globalIdx ], compute );
    };
    this->segments.forSegments( first, last, f );
 
diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h
index 44883a124..43ea25bf5 100644
--- a/src/TNL/Matrices/SparseMatrix.h
+++ b/src/TNL/Matrices/SparseMatrix.h
@@ -108,11 +108,11 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
       __cuda_callable__
       RowView getRow( const IndexType& rowIdx );
 
-      bool setElement( const IndexType row,
+      void setElement( const IndexType row,
                        const IndexType column,
                        const RealType& value );
 
-      bool addElement( const IndexType row,
+      void addElement( const IndexType row,
                        const IndexType column,
                        const RealType& value,
                        const RealType& thisElementMultiplicator );
@@ -172,7 +172,7 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
       /**
        * \brief Assignment of exactly the same matrix type.
        * @param matrix
-       * @return 
+       * @return
        */
       SparseMatrix& operator=( const SparseMatrix& matrix );
 
@@ -181,12 +181,12 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
        */
       template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder, typename RealAllocator_ >
       SparseMatrix& operator=( const Dense< Real_, Device_, Index_, RowMajorOrder, RealAllocator_ >& matrix );
-      
-      
+
+
       /**
        * \brief Assignment of any other matrix type.
        * @param matrix
-       * @return 
+       * @return
        */
       template< typename RHSMatrix >
       SparseMatrix& operator=( const RHSMatrix& matrix );
@@ -213,7 +213,9 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
 
       IndexAllocator indexAllocator;
 
-      RealAllocator realAllocator;
+      //RealAllocator realAllocator;
+
+      ViewType view;
 };
 
 }  // namespace Conatiners
diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp
index 3f5636bb6..c94506084 100644
--- a/src/TNL/Matrices/SparseMatrix.hpp
+++ b/src/TNL/Matrices/SparseMatrix.hpp
@@ -84,7 +84,7 @@ auto
 SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
 getView() -> ViewType
 {
-   return ViewType( this->getRows(), 
+   return ViewType( this->getRows(),
                     this->getColumns(),
                     this->getValues().getView(),
                     this->columnIndexes.getView(),
@@ -166,6 +166,7 @@ setCompressedRowLengths( const RowsCapacitiesVector& rowsCapacities )
    this->values = ( RealType ) 0;
    this->columnIndexes.setSize( this->segments.getStorageSize() );
    this->columnIndexes = this->getPaddingIndex();
+   this->view = this->getView();
 }
 
 template< typename Real,
@@ -180,19 +181,7 @@ void
 SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
 getCompressedRowLengths( Vector& rowLengths ) const
 {
-   rowLengths.setSize( this->getRows() );
-   rowLengths = 0;
-   auto rowLengths_view = rowLengths.getView();
-   auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, IndexType globalIdx, const RealType& value ) -> IndexType {
-      return ( value != 0.0 );
-   };
-   auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) {
-      aux += a;
-   };
-   auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable {
-      rowLengths_view[ rowIdx ] = value;
-   };
-   this->allRowsReduction( fetch, reduce, keep, 0 );
+   this->view.getCompressedRowLengths( rowLengths );
 }
 
 template< typename Real,
@@ -221,12 +210,7 @@ Index
 SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
 getNumberOfNonzeroMatrixElements() const
 {
-   const auto columns_view = this->columnIndexes.getConstView();
-   const IndexType paddingIndex = this->getPaddingIndex();
-   auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType {
-      return ( columns_view[ i ] != paddingIndex );
-   };
-   return Algorithms::Reduction< DeviceType >::reduce( this->columnIndexes.getSize(), std::plus<>{}, fetch, 0 );
+   this->view.getNumberOfNonzeroMatrixElements();
 }
 
 template< typename Real,
@@ -254,8 +238,7 @@ __cuda_callable__ auto
 SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
 getRow( const IndexType& rowIdx ) const -> const RowView
 {
-   TNL_ASSERT_LT( rowIdx, this->getRows(), "Row index is larger than number of matrix rows." );
-   return RowView( this->segments.getSegmentView( rowIdx ), this->values.getView(), this->columnIndexes.getView() );
+   return this->view.getRow( rowIdx );
 }
 
 template< typename Real,
@@ -269,8 +252,7 @@ __cuda_callable__ auto
 SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
 getRow( const IndexType& rowIdx ) -> RowView
 {
-   TNL_ASSERT_LT( rowIdx, this->getRows(), "Row index is larger than number of matrix rows." );
-   return RowView( this->segments.getSegmentView( rowIdx ), this->values.getView(), this->columnIndexes.getView() );
+   return this->view.getRow( rowIdx );
 }
 
 template< typename Real,
@@ -280,13 +262,13 @@ template< typename Real,
           template< typename, typename, typename > class Segments,
           typename RealAllocator,
           typename IndexAllocator >
-bool
+void
 SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
 setElement( const IndexType row,
             const IndexType column,
             const RealType& value )
 {
-   return this->addElement( row, column, value, 0.0 );
+   this->view.setElement( row, column, value );
 }
 
 template< typename Real,
@@ -296,63 +278,14 @@ template< typename Real,
           template< typename, typename, typename > class Segments,
           typename RealAllocator,
           typename IndexAllocator >
-bool
+void
 SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
 addElement( const IndexType row,
             const IndexType column,
             const RealType& value,
             const RealType& thisElementMultiplicator )
 {
-   TNL_ASSERT( row >= 0 && row < this->rows &&
-               column >= 0 && column < this->columns,
-               std::cerr << " row = " << row
-                    << " column = " << column
-                    << " this->rows = " << this->rows
-                    << " this->columns = " << this->columns );
-
-   const IndexType rowSize = this->segments.getSegmentSize( row );
-   IndexType col( this->getPaddingIndex() );
-   IndexType i;
-   IndexType globalIdx;
-   for( i = 0; i < rowSize; i++ )
-   {
-      globalIdx = this->segments.getGlobalIndex( row, i );
-      TNL_ASSERT_LT( globalIdx, this->columnIndexes.getSize(), "" );
-      col = this->columnIndexes.getElement( globalIdx );
-      if( col == column )
-      {
-         this->values.setElement( globalIdx, thisElementMultiplicator * this->values.getElement( globalIdx ) + value );
-         return true;
-      }
-      if( col == this->getPaddingIndex() || col > column )
-         break;
-   }
-   if( i == rowSize )
-      return false;
-   if( col == this->getPaddingIndex() )
-   {
-      this->columnIndexes.setElement( globalIdx, column );
-      this->values.setElement( globalIdx, value );
-      return true;
-   }
-   else
-   {
-      IndexType j = rowSize - 1;
-      while( j > i )
-      {
-         const IndexType globalIdx1 = this->segments.getGlobalIndex( row, j );
-         const IndexType globalIdx2 = this->segments.getGlobalIndex( row, j - 1 );
-         TNL_ASSERT_LT( globalIdx1, this->columnIndexes.getSize(), "" );
-         TNL_ASSERT_LT( globalIdx2, this->columnIndexes.getSize(), "" );
-         this->columnIndexes.setElement( globalIdx1, this->columnIndexes.getElement( globalIdx2 ) );
-         this->values.setElement( globalIdx1, this->values.getElement( globalIdx2 ) );
-         j--;
-      }
-
-      this->columnIndexes.setElement( globalIdx, column );
-      this->values.setElement( globalIdx, value );
-      return true;
-   }
+   this->view.addElement( row, column, value, thisElementMultiplicator );
 }
 
 template< typename Real,
@@ -367,16 +300,7 @@ SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAll
 getElement( const IndexType row,
             const IndexType column ) const
 {
-   const IndexType rowSize = this->segments.getSegmentSize( row );
-   for( IndexType i = 0; i < rowSize; i++ )
-   {
-      const IndexType globalIdx = this->segments.getGlobalIndex( row, i );
-      TNL_ASSERT_LT( globalIdx, this->columnIndexes.getSize(), "" );
-      const IndexType col = this->columnIndexes.getElement( globalIdx );
-      if( col == column )
-         return this->values.getElement( globalIdx );
-   }
-   return 0.0;
+   return this->view.getElement( row, column );
 }
 
 template< typename Real,
@@ -393,7 +317,7 @@ SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAll
 rowVectorProduct( const IndexType row,
                   const Vector& vector ) const
 {
-
+   this->view.rowVectorProduct( row, vector );
 }
 
 template< typename Real,
@@ -412,7 +336,8 @@ vectorProduct( const InVector& inVector,
                const RealType& matrixMultiplicator,
                const RealType& inVectorAddition ) const
 {
-   TNL_ASSERT_EQ( this->getColumns(), inVector.getSize(), "Matrix columns do not fit with input vector." );
+   this->view.vectorProduct( inVector, outVector, matrixMultiplicator, inVectorAddition );
+   /*TNL_ASSERT_EQ( this->getColumns(), inVector.getSize(), "Matrix columns do not fit with input vector." );
    TNL_ASSERT_EQ( this->getRows(), outVector.getSize(), "Matrix rows do not fit with output vector." );
 
    const auto inVectorView = inVector.getConstView();
@@ -433,7 +358,7 @@ vectorProduct( const InVector& inVector,
    auto keeper = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable {
       outVectorView[ row ] = value;
    };
-   this->segments.segmentsReduction( 0, this->getRows(), fetch, reduction, keeper, ( RealType ) 0.0 );
+   this->segments.segmentsReduction( 0, this->getRows(), fetch, reduction, keeper, ( RealType ) 0.0 );*/
 }
 
 template< typename Real,
@@ -448,7 +373,8 @@ void
 SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
 rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchValue& zero ) const
 {
-   const auto columns_view = this->columnIndexes.getConstView();
+   this->view.rowsReduction( first, last, fetch, reduce, keep, zero );
+   /*const auto columns_view = this->columnIndexes.getConstView();
    const auto values_view = this->values.getConstView();
    const IndexType paddingIndex_ = this->getPaddingIndex();
    auto fetch_ = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx, bool& compute ) mutable -> decltype( fetch( IndexType(), IndexType(), IndexType(), RealType() ) ) {
@@ -457,7 +383,7 @@ rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Ke
          return fetch( rowIdx, columnIdx, globalIdx, values_view[ globalIdx ] );
       return zero;
    };
-   this->segments.segmentsReduction( first, last, fetch_, reduce, keep, zero );
+   this->segments.segmentsReduction( first, last, fetch_, reduce, keep, zero );*/
 }
 
 template< typename Real,
@@ -487,7 +413,8 @@ void
 SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
 forRows( IndexType first, IndexType last, Function& function ) const
 {
-   const auto columns_view = this->columnIndexes.getConstView();
+   this->view.forRows( first, last, function );
+   /*const auto columns_view = this->columnIndexes.getConstView();
    const auto values_view = this->values.getConstView();
    const IndexType paddingIndex_ = this->getPaddingIndex();
    auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx ) mutable -> bool {
@@ -495,7 +422,7 @@ forRows( IndexType first, IndexType last, Function& function ) const
       return true;
    };
    this->segments.forSegments( first, last, f );
-
+    */
 }
 
 template< typename Real,
@@ -510,14 +437,15 @@ void
 SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
 forRows( IndexType first, IndexType last, Function& function )
 {
-   auto columns_view = this->columnIndexes.getView();
+   this->view.forRows( first, last, function );
+   /*auto columns_view = this->columnIndexes.getView();
    auto values_view = this->values.getView();
    const IndexType paddingIndex_ = this->getPaddingIndex();
    auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx ) mutable -> bool {
       function( rowIdx, localIdx, columns_view[ globalIdx ], values_view[ globalIdx ] );
       return true;
    };
-   this->segments.forSegments( first, last, f );
+   this->segments.forSegments( first, last, f );*/
 }
 
 template< typename Real,
@@ -653,7 +581,7 @@ operator=( const Dense< Real_, Device_, Index_, RowMajorOrder, RealAllocator_ >&
    if( std::is_same< DeviceType, RHSDeviceType >::value )
    {
       const auto segments_view = this->segments.getView();
-      auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType columnIdx, RHSIndexType globalIndex, const RHSRealType& value ) mutable {
+      auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType columnIdx, RHSIndexType globalIndex, const RHSRealType& value, bool& compute ) mutable {
          if( value != 0.0 )
          {
             IndexType thisGlobalIdx = segments_view.getGlobalIndex( rowIdx, rowLocalIndexes_view[ rowIdx ]++ );
@@ -684,7 +612,7 @@ operator=( const Dense< Real_, Device_, Index_, RowMajorOrder, RealAllocator_ >&
 
          ////
          // Copy matrix elements into buffer
-         auto f1 = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIndex, const RHSRealType& value ) mutable {
+         auto f1 = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIndex, const RHSRealType& value, bool& compute ) mutable {
             const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx;
             matrixValuesBuffer_view[ bufferIdx ] = value;
          };
@@ -697,7 +625,7 @@ operator=( const Dense< Real_, Device_, Index_, RowMajorOrder, RealAllocator_ >&
          ////
          // Copy matrix elements from the buffer to the matrix
          const IndexType matrix_columns = this->getColumns();
-         auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType& columnIndex, RealType& value  ) mutable {
+         auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType& columnIndex, RealType& value, bool& compute  ) mutable {
             RealType inValue( 0.0 );
             IndexType bufferIdx, column( rowLocalIndexes_view[ rowIdx ] );
             while( inValue == 0.0 && column < matrix_columns )
@@ -723,7 +651,7 @@ operator=( const Dense< Real_, Device_, Index_, RowMajorOrder, RealAllocator_ >&
       //std::cerr << "This matrix = " << std::endl << *this << std::endl;
    }
    return *this;
-   
+
 }
 
 template< typename Real,
@@ -759,7 +687,7 @@ operator=( const RHSMatrix& matrix )
    if( std::is_same< DeviceType, RHSDeviceType >::value )
    {
       const auto segments_view = this->segments.getView();
-      auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIndex, const RHSRealType& value ) mutable {
+      auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIndex, const RHSRealType& value, bool& compute ) mutable {
          if( columnIndex != paddingIndex )
          {
             IndexType thisGlobalIdx = segments_view.getGlobalIndex( rowIdx, localIdx );
@@ -793,7 +721,7 @@ operator=( const RHSMatrix& matrix )
 
          ////
          // Copy matrix elements into buffer
-         auto f1 = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIndex, const RHSRealType& value ) mutable {
+         auto f1 = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIndex, const RHSRealType& value, bool& compute ) mutable {
             if( columnIndex != paddingIndex )
             {
                const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx;
@@ -810,7 +738,7 @@ operator=( const RHSMatrix& matrix )
 
          ////
          // Copy matrix elements from the buffer to the matrix
-         auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType& columnIndex, RealType& value  ) mutable {
+         auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType& columnIndex, RealType& value, bool& compute ) mutable {
             const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx;
             const IndexType column = thisColumnsBuffer_view[ bufferIdx ];
             if( column != paddingIndex )
@@ -819,7 +747,7 @@ operator=( const RHSMatrix& matrix )
                value = thisValuesBuffer_view[ bufferIdx ];
             }
          };
-         this->forRows( baseRow, lastRow, f2 );
+         //this->forRows( baseRow, lastRow, f2 );
          baseRow += bufferRowsCount;
       }
       //std::cerr << "This matrix = " << std::endl << *this << std::endl;
@@ -838,9 +766,7 @@ void
 SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
 save( File& file ) const
 {
-   Matrix< RealType, DeviceType, IndexType >::save( file );
-   file << this->columnIndexes;
-   this->segments.save( file );
+   this->view.save( file );
 }
 
 template< typename Real,
@@ -898,20 +824,7 @@ void
 SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
 print( std::ostream& str ) const
 {
-   for( IndexType row = 0; row < this->getRows(); row++ )
-   {
-      str <<"Row: " << row << " -> ";
-      const IndexType rowLength = this->segments.getSegmentSize( row );
-      for( IndexType i = 0; i < rowLength; i++ )
-      {
-         const IndexType globalIdx = this->segments.getGlobalIndex( row, i );
-         const IndexType column = this->columnIndexes.getElement( globalIdx );
-         if( column == this->getPaddingIndex() )
-            break;
-         str << " Col:" << column << "->" << this->values.getElement( globalIdx ) << "\t";
-      }
-      str << std::endl;
-   }
+   this->view.print( str );
 }
 
 template< typename Real,
diff --git a/src/TNL/Matrices/SparseMatrixView.h b/src/TNL/Matrices/SparseMatrixView.h
index aba3b4642..7168e1e8e 100644
--- a/src/TNL/Matrices/SparseMatrixView.h
+++ b/src/TNL/Matrices/SparseMatrixView.h
@@ -143,6 +143,8 @@ class SparseMatrixView : public MatrixView< Real, Device, Index >
                                 Vector2& x,
                                 const RealType& omega = 1.0 ) const;
 
+      SparseMatrixView& operator=( const SparseMatrixView& matrix );
+
       void save( File& file ) const;
 
       void save( const String& fileName ) const;
diff --git a/src/TNL/Matrices/SparseMatrixView.hpp b/src/TNL/Matrices/SparseMatrixView.hpp
index 3b192b4e9..055a1d60e 100644
--- a/src/TNL/Matrices/SparseMatrixView.hpp
+++ b/src/TNL/Matrices/SparseMatrixView.hpp
@@ -118,7 +118,7 @@ getCompressedRowLengths( Vector& rowLengths ) const
    rowLengths.setSize( this->getRows() );
    rowLengths = 0;
    auto rowLengths_view = rowLengths.getView();
-   auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, const RealType& value ) -> IndexType {
+   auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, IndexType globalIdx, const RealType& value ) -> IndexType {
       return ( value != 0.0 );
    };
    auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) {
@@ -159,20 +159,6 @@ getNumberOfNonzeroMatrixElements() const
    return Algorithms::Reduction< DeviceType >::reduce( this->columnIndexes.getSize(), std::plus<>{}, fetch, 0 );
 }
 
-template< typename Real,
-          typename Device,
-          typename Index,
-          typename MatrixType,
-          template< typename, typename > class SegmentsView >
-void
-SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
-reset()
-{
-   Matrix< Real, Device, Index >::reset();
-   this->columnIndexes.reset();
-
-}
-
 template< typename Real,
           typename Device,
           typename Index,
@@ -225,12 +211,10 @@ addElement( const IndexType row,
             const RealType& value,
             const RealType& thisElementMultiplicator )
 {
-   TNL_ASSERT( row >= 0 && row < this->rows &&
-               column >= 0 && column < this->columns,
-               std::cerr << " row = " << row
-                    << " column = " << column
-                    << " this->rows = " << this->rows
-                    << " this->columns = " << this->columns );
+   TNL_ASSERT_GE( row, 0, "Sparse matrix row index cannot be negative." );
+   TNL_ASSERT_LT( row, this->getRows(), "Sparse matrix row index is larger than number of matrix rows." );
+   TNL_ASSERT_GE( column, 0, "Sparse matrix column index cannot be negative." );
+   TNL_ASSERT_LT( column, this->getColumns(), "Sparse matrix column index is larger than number of matrix columns." );
 
    const IndexType rowSize = this->segments.getSegmentSize( row );
    IndexType col( this->getPaddingIndex() );
@@ -291,6 +275,11 @@ SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
 getElement( const IndexType row,
             const IndexType column ) const
 {
+   TNL_ASSERT_GE( row, 0, "Sparse matrix row index cannot be negative." );
+   TNL_ASSERT_LT( row, this->getRows(), "Sparse matrix row index is larger than number of matrix rows." );
+   TNL_ASSERT_GE( column, 0, "Sparse matrix column index cannot be negative." );
+   TNL_ASSERT_LT( column, this->getColumns(), "Sparse matrix column index is larger than number of matrix columns." );
+
    const IndexType rowSize = this->segments.getSegmentSize( row );
    for( IndexType i = 0; i < rowSize; i++ )
    {
@@ -332,11 +321,34 @@ vectorProduct( const InVector& inVector,
                const RealType& matrixMultiplicator,
                const RealType& inVectorAddition ) const
 {
+   TNL_ASSERT_EQ( this->getColumns(), inVector.getSize(), "Matrix columns do not fit with input vector." );
+   TNL_ASSERT_EQ( this->getRows(), outVector.getSize(), "Matrix rows do not fit with output vector." );
+
    const auto inVectorView = inVector.getConstView();
    auto outVectorView = outVector.getView();
    const auto valuesView = this->values.getConstView();
    const auto columnIndexesView = this->columnIndexes.getConstView();
    const IndexType paddingIndex = this->getPaddingIndex();
+   auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType localIdx, IndexType globalIdx, bool& compute ) -> RealType {
+      const IndexType column = columnIndexesView[ globalIdx ];
+      compute = ( column != paddingIndex );
+      if( ! compute )
+         return 0.0;
+      return valuesView[ globalIdx ] * inVectorView[ column ];
+   };
+   auto reduction = [] __cuda_callable__ ( RealType& sum, const RealType& value ) {
+      sum += value;
+   };
+   auto keeper = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable {
+      outVectorView[ row ] = value;
+   };
+   this->segments.segmentsReduction( 0, this->getRows(), fetch, reduction, keeper, ( RealType ) 0.0 );
+
+   /*const auto inVectorView = inVector.getConstView();
+   auto outVectorView = outVector.getView();
+   const auto valuesView = this->values.getConstView();
+   const auto columnIndexesView = this->columnIndexes.getConstView();
+   const IndexType paddingIndex = this->getPaddingIndex();
    auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType offset, bool& compute ) -> RealType {
       const IndexType column = columnIndexesView[ offset ];
       compute = ( column != paddingIndex );
@@ -351,6 +363,7 @@ vectorProduct( const InVector& inVector,
       outVectorView[ row ] = value;
    };
    this->segments.segmentsReduction( 0, this->getRows(), fetch, reduction, keeper, ( RealType ) 0.0 );
+   */
 }
 
 template< typename Real,
@@ -366,10 +379,10 @@ rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Ke
    const auto columns_view = this->columnIndexes.getConstView();
    const auto values_view = this->values.getConstView();
    const IndexType paddingIndex_ = this->getPaddingIndex();
-   auto fetch_ = [=] __cuda_callable__ ( IndexType rowIdx, IndexType globalIdx ) mutable -> decltype( fetch( IndexType(), IndexType(), RealType() ) ) {
+   auto fetch_ = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx, bool& compute ) mutable -> decltype( fetch( IndexType(), IndexType(), IndexType(), RealType() ) ) {
       IndexType columnIdx = columns_view[ globalIdx ];
       if( columnIdx != paddingIndex_ )
-         return fetch( rowIdx, columnIdx, values_view[ globalIdx ] );
+         return fetch( rowIdx, columnIdx, globalIdx, values_view[ globalIdx ] );
       return zero;
    };
    this->segments.segmentsReduction( first, last, fetch_, reduce, keep, zero );
@@ -401,8 +414,8 @@ forRows( IndexType first, IndexType last, Function& function ) const
    const auto columns_view = this->columnIndexes.getConstView();
    const auto values_view = this->values.getConstView();
    const IndexType paddingIndex_ = this->getPaddingIndex();
-   auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx ) mutable -> bool {
-      function( rowIdx, localIdx, globalIdx );
+   auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx, bool& compute ) mutable -> bool {
+      function( rowIdx, localIdx, columns_view[ globalIdx ], globalIdx, compute );
       return true;
    };
    this->segments.forSegments( first, last, f );
@@ -421,9 +434,8 @@ forRows( IndexType first, IndexType last, Function& function )
    auto columns_view = this->columnIndexes.getView();
    auto values_view = this->values.getView();
    const IndexType paddingIndex_ = this->getPaddingIndex();
-   auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx ) mutable -> bool {
-      function( rowIdx, localIdx, globalIdx );
-      return true;
+   auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx, bool& compute ) mutable {
+      function( rowIdx, localIdx, columns_view[ globalIdx ], globalIdx, compute );
    };
    this->segments.forSegments( first, last, f );
 }
@@ -501,6 +513,20 @@ performSORIteration( const Vector1& b,
    return false;
 }
 
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >&
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+operator=( const SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >& matrix )
+{
+   MatrixView< Real, Device, Index >::operator=( matrix );
+   this->columnIndexes.copy( matrix.columnIndexes );
+   this->segments = matrix.segments;
+}
+
 template< typename Real,
           typename Device,
           typename Index,
diff --git a/src/UnitTests/Matrices/Legacy/CMakeLists.txt b/src/UnitTests/Matrices/Legacy/CMakeLists.txt
index 4320b6c1f..46c6be2cd 100644
--- a/src/UnitTests/Matrices/Legacy/CMakeLists.txt
+++ b/src/UnitTests/Matrices/Legacy/CMakeLists.txt
@@ -1,6 +1,6 @@
 IF( BUILD_CUDA )
-   CUDA_ADD_EXECUTABLE( Legacy_SparseMatrixCopyTest SparseMatrixCopyTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
-   TARGET_LINK_LIBRARIES( Legacy_SparseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} )
+   #CUDA_ADD_EXECUTABLE( Legacy_SparseMatrixCopyTest SparseMatrixCopyTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
+   #TARGET_LINK_LIBRARIES( Legacy_SparseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} )
 
    CUDA_ADD_EXECUTABLE( Legacy_SparseMatrixTest SparseMatrixTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
    TARGET_LINK_LIBRARIES( Legacy_SparseMatrixTest ${GTEST_BOTH_LIBRARIES} )
@@ -24,9 +24,9 @@ IF( BUILD_CUDA )
    TARGET_LINK_LIBRARIES( Legacy_SparseMatrixTest_SlicedEllpack ${GTEST_BOTH_LIBRARIES} )
 
 ELSE(  BUILD_CUDA )
-   ADD_EXECUTABLE( Legacy_SparseMatrixCopyTest SparseMatrixCopyTest.cpp )
-   TARGET_COMPILE_OPTIONS( Legacy_SparseMatrixCopyTest PRIVATE ${CXX_TESTS_FLAGS} )
-   TARGET_LINK_LIBRARIES( Legacy_SparseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} )
+   #ADD_EXECUTABLE( Legacy_SparseMatrixCopyTest SparseMatrixCopyTest.cpp )
+   #TARGET_COMPILE_OPTIONS( Legacy_SparseMatrixCopyTest PRIVATE ${CXX_TESTS_FLAGS} )
+   #TARGET_LINK_LIBRARIES( Legacy_SparseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} )
 
    ADD_EXECUTABLE( Legacy_SparseMatrixTest SparseMatrixTest.cpp )
    TARGET_COMPILE_OPTIONS( Legacy_SparseMatrixTest PRIVATE ${CXX_TESTS_FLAGS} )
@@ -59,7 +59,7 @@ ELSE(  BUILD_CUDA )
 ENDIF( BUILD_CUDA )
 
 
-ADD_TEST( Legacy_SparseMatrixCopyTest ${EXECUTABLE_OUTPUT_PATH}/Legacy_SparseMatrixCopyTest${CMAKE_EXECUTABLE_SUFFIX} )
+#ADD_TEST( Legacy_SparseMatrixCopyTest ${EXECUTABLE_OUTPUT_PATH}/Legacy_SparseMatrixCopyTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( Legacy_SparseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/Legacy_SparseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} )
 # TODO: Uncomment the following when AdEllpack works
 #ADD_TEST( SparseMatrixTest_AdEllpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_AdEllpack${CMAKE_EXECUTABLE_SUFFIX} )
diff --git a/src/UnitTests/Matrices/Legacy/SparseMatrixCopyTest.h b/src/UnitTests/Matrices/Legacy/SparseMatrixCopyTest.h
index 7069fd777..aece39d9a 100644
--- a/src/UnitTests/Matrices/Legacy/SparseMatrixCopyTest.h
+++ b/src/UnitTests/Matrices/Legacy/SparseMatrixCopyTest.h
@@ -18,14 +18,14 @@
 #include <TNL/Containers/Segments/Ellpack.h>
 #include <TNL/Containers/Segments/SlicedEllpack.h>
 
-/*using CSR_host = TNL::Matrices::CSR< int, TNL::Devices::Host, int >;
+using CSR_host = TNL::Matrices::CSR< int, TNL::Devices::Host, int >;
 using CSR_cuda = TNL::Matrices::CSR< int, TNL::Devices::Cuda, int >;
 using E_host = TNL::Matrices::Ellpack< int, TNL::Devices::Host, int >;
 using E_cuda = TNL::Matrices::Ellpack< int, TNL::Devices::Cuda, int >;
 using SE_host = TNL::Matrices::SlicedEllpack< int, TNL::Devices::Host, int, 2 >;
-using SE_cuda = TNL::Matrices::SlicedEllpack< int, TNL::Devices::Cuda, int, 2 >;*/
+using SE_cuda = TNL::Matrices::SlicedEllpack< int, TNL::Devices::Cuda, int, 2 >;
 
-template< typename Device, typename Index, typename IndexAllocator >
+/*template< typename Device, typename Index, typename IndexAllocator >
 using EllpackSegments = TNL::Containers::Segments::Ellpack< Device, Index, IndexAllocator >;
 
 template< typename Device, typename Index, typename IndexAllocator >
@@ -36,10 +36,10 @@ using CSR_cuda = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL:
 using E_host   = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, EllpackSegments >;
 using E_cuda   = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, EllpackSegments >;
 using SE_host  = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, SlicedEllpackSegments >;
-using SE_cuda  = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, SlicedEllpackSegments >;
+using SE_cuda  = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, SlicedEllpackSegments >;*/
 
 
-#ifdef HAVE_GTEST 
+#ifdef HAVE_GTEST
 #include <gtest/gtest.h>
 
 /*
@@ -98,7 +98,7 @@ void setupUnevenRowSizeMatrix( Matrix& m )
 
     m.setElement( 7, 0, value++ );   // 7th row
 
-    for( int i = 0; i < cols - 1; i++ )  // 8th row 
+    for( int i = 0; i < cols - 1; i++ )  // 8th row
         m.setElement( 8, i, value++ );
 
     m.setElement( 9, 5, value++ );   // 9th row
@@ -158,21 +158,21 @@ void checkUnevenRowSizeMatrix( Matrix& m )
    EXPECT_EQ( m.getElement( 6, 3 ),  0 );
    EXPECT_EQ( m.getElement( 6, 4 ),  0 );
    EXPECT_EQ( m.getElement( 6, 5 ),  0 );
-   
+
    EXPECT_EQ( m.getElement( 7, 0 ), 22 );
    EXPECT_EQ( m.getElement( 7, 1 ),  0 );
    EXPECT_EQ( m.getElement( 7, 2 ),  0 );
    EXPECT_EQ( m.getElement( 7, 3 ),  0 );
    EXPECT_EQ( m.getElement( 7, 4 ),  0 );
    EXPECT_EQ( m.getElement( 7, 5 ),  0 );
-   
+
    EXPECT_EQ( m.getElement( 8, 0 ), 23 );
    EXPECT_EQ( m.getElement( 8, 1 ), 24 );
    EXPECT_EQ( m.getElement( 8, 2 ), 25 );
    EXPECT_EQ( m.getElement( 8, 3 ), 26 );
    EXPECT_EQ( m.getElement( 8, 4 ), 27 );
    EXPECT_EQ( m.getElement( 8, 5 ),  0 );
-   
+
    EXPECT_EQ( m.getElement( 9, 0 ),  0 );
    EXPECT_EQ( m.getElement( 9, 1 ),  0 );
    EXPECT_EQ( m.getElement( 9, 2 ),  0 );
@@ -205,7 +205,7 @@ void setupAntiTriDiagMatrix( Matrix& m )
     rowLengths.setElement( 0, 4);
     rowLengths.setElement( 1,  4 );
     m.setCompressedRowLengths( rowLengths );
-    
+
     int value = 1;
     for( int i = 0; i < rows; i++ )
         for( int j = cols - 1; j > 2; j-- )
@@ -396,39 +396,39 @@ void testCopyAssignment()
 template< typename Matrix1, typename Matrix2 >
 void testConversion()
 {
-    
+
    {
         SCOPED_TRACE("Tri Diagonal Matrix");
-        
+
         Matrix1 triDiag1;
         setupTriDiagMatrix( triDiag1 );
         checkTriDiagMatrix( triDiag1 );
-        
+
         Matrix2 triDiag2;
         //TNL::Matrices::copySparseMatrix( triDiag2, triDiag1 );
         triDiag2 = triDiag1;
         checkTriDiagMatrix( triDiag2 );
    }
-   
+
    {
         SCOPED_TRACE("Anti Tri Diagonal Matrix");
-                
+
         Matrix1 antiTriDiag1;
         setupAntiTriDiagMatrix( antiTriDiag1 );
         checkAntiTriDiagMatrix( antiTriDiag1 );
-        
+
         Matrix2 antiTriDiag2;
         //TNL::Matrices::copySparseMatrix( antiTriDiag2, antiTriDiag1 );
         antiTriDiag2 = antiTriDiag1;
         checkAntiTriDiagMatrix( antiTriDiag2 );
    }
-   
+
    {
         SCOPED_TRACE("Uneven Row Size Matrix");
         Matrix1 unevenRowSize1;
         setupUnevenRowSizeMatrix( unevenRowSize1 );
         checkUnevenRowSizeMatrix( unevenRowSize1 );
-        
+
         Matrix2 unevenRowSize2;
         //TNL::Matrices::copySparseMatrix( unevenRowSize2, unevenRowSize1 );
         unevenRowSize2 = unevenRowSize1;
-- 
GitLab


From 5927f064b6b9f06fa6c1e434d6bd1488c35966a4 Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Fri, 17 Jan 2020 15:51:31 +0100
Subject: [PATCH 100/179] Fixed sparse matrix assignment operator.

---
 src/TNL/Matrices/Dense.hpp                    |  1 +
 src/TNL/Matrices/SparseMatrix.hpp             | 12 ++++----
 src/TNL/Matrices/SparseMatrixView.hpp         |  4 +--
 src/UnitTests/Matrices/SparseMatrixCopyTest.h | 28 +++++++++----------
 4 files changed, 23 insertions(+), 22 deletions(-)

diff --git a/src/TNL/Matrices/Dense.hpp b/src/TNL/Matrices/Dense.hpp
index 5504f6408..21ae1bce6 100644
--- a/src/TNL/Matrices/Dense.hpp
+++ b/src/TNL/Matrices/Dense.hpp
@@ -990,6 +990,7 @@ operator=( const Dense< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >
          }
       }
    }
+   this->view = this->getView();
    return *this;
 }
 
diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp
index c94506084..14495ad3d 100644
--- a/src/TNL/Matrices/SparseMatrix.hpp
+++ b/src/TNL/Matrices/SparseMatrix.hpp
@@ -542,6 +542,7 @@ operator=( const SparseMatrix& matrix )
    Matrix< Real, Device, Index >::operator=( matrix );
    this->columnIndexes = matrix.columnIndexes;
    this->segments = matrix.segments;
+   this->view = this->getView();
    return *this;
 }
 
@@ -581,7 +582,7 @@ operator=( const Dense< Real_, Device_, Index_, RowMajorOrder, RealAllocator_ >&
    if( std::is_same< DeviceType, RHSDeviceType >::value )
    {
       const auto segments_view = this->segments.getView();
-      auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType columnIdx, RHSIndexType globalIndex, const RHSRealType& value, bool& compute ) mutable {
+      auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIdx, const RHSRealType& value, bool& compute ) mutable {
          if( value != 0.0 )
          {
             IndexType thisGlobalIdx = segments_view.getGlobalIndex( rowIdx, rowLocalIndexes_view[ rowIdx ]++ );
@@ -650,6 +651,7 @@ operator=( const Dense< Real_, Device_, Index_, RowMajorOrder, RealAllocator_ >&
       }
       //std::cerr << "This matrix = " << std::endl << *this << std::endl;
    }
+   this->view = this->getView();
    return *this;
 
 }
@@ -684,7 +686,7 @@ operator=( const RHSMatrix& matrix )
    auto values_view = this->values.getView();
    columns_view = paddingIndex;
 
-   if( std::is_same< DeviceType, RHSDeviceType >::value )
+   /*if( std::is_same< DeviceType, RHSDeviceType >::value )
    {
       const auto segments_view = this->segments.getView();
       auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIndex, const RHSRealType& value, bool& compute ) mutable {
@@ -697,7 +699,7 @@ operator=( const RHSMatrix& matrix )
       };
       matrix.forAllRows( f );
    }
-   else
+   else*/
    {
       const IndexType maxRowLength = max( rowLengths );
       const IndexType bufferRowsCount( 128 );
@@ -747,11 +749,11 @@ operator=( const RHSMatrix& matrix )
                value = thisValuesBuffer_view[ bufferIdx ];
             }
          };
-         //this->forRows( baseRow, lastRow, f2 );
+         this->forRows( baseRow, lastRow, f2 );
          baseRow += bufferRowsCount;
       }
-      //std::cerr << "This matrix = " << std::endl << *this << std::endl;
    }
+   this->view = this->getView();
    return *this;
 }
 
diff --git a/src/TNL/Matrices/SparseMatrixView.hpp b/src/TNL/Matrices/SparseMatrixView.hpp
index 055a1d60e..965a51b8b 100644
--- a/src/TNL/Matrices/SparseMatrixView.hpp
+++ b/src/TNL/Matrices/SparseMatrixView.hpp
@@ -415,7 +415,7 @@ forRows( IndexType first, IndexType last, Function& function ) const
    const auto values_view = this->values.getConstView();
    const IndexType paddingIndex_ = this->getPaddingIndex();
    auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx, bool& compute ) mutable -> bool {
-      function( rowIdx, localIdx, columns_view[ globalIdx ], globalIdx, compute );
+      function( rowIdx, localIdx, columns_view[ globalIdx ], values_view[ globalIdx ], compute );
       return true;
    };
    this->segments.forSegments( first, last, f );
@@ -435,7 +435,7 @@ forRows( IndexType first, IndexType last, Function& function )
    auto values_view = this->values.getView();
    const IndexType paddingIndex_ = this->getPaddingIndex();
    auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx, bool& compute ) mutable {
-      function( rowIdx, localIdx, columns_view[ globalIdx ], globalIdx, compute );
+      function( rowIdx, localIdx, columns_view[ globalIdx ], values_view[ globalIdx ], compute );
    };
    this->segments.forSegments( first, last, f );
 }
diff --git a/src/UnitTests/Matrices/SparseMatrixCopyTest.h b/src/UnitTests/Matrices/SparseMatrixCopyTest.h
index 8677443b2..e9898bb39 100644
--- a/src/UnitTests/Matrices/SparseMatrixCopyTest.h
+++ b/src/UnitTests/Matrices/SparseMatrixCopyTest.h
@@ -40,7 +40,7 @@ using SE_host  = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL:
 using SE_cuda  = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, SlicedEllpackSegments >;
 
 
-#ifdef HAVE_GTEST 
+#ifdef HAVE_GTEST
 #include <gtest/gtest.h>
 
 /*
@@ -99,7 +99,7 @@ void setupUnevenRowSizeMatrix( Matrix& m )
 
     m.setElement( 7, 0, value++ );   // 7th row
 
-    for( int i = 0; i < cols - 1; i++ )  // 8th row 
+    for( int i = 0; i < cols - 1; i++ )  // 8th row
         m.setElement( 8, i, value++ );
 
     m.setElement( 9, 5, value++ );   // 9th row
@@ -159,21 +159,21 @@ void checkUnevenRowSizeMatrix( Matrix& m )
    EXPECT_EQ( m.getElement( 6, 3 ),  0 );
    EXPECT_EQ( m.getElement( 6, 4 ),  0 );
    EXPECT_EQ( m.getElement( 6, 5 ),  0 );
-   
+
    EXPECT_EQ( m.getElement( 7, 0 ), 22 );
    EXPECT_EQ( m.getElement( 7, 1 ),  0 );
    EXPECT_EQ( m.getElement( 7, 2 ),  0 );
    EXPECT_EQ( m.getElement( 7, 3 ),  0 );
    EXPECT_EQ( m.getElement( 7, 4 ),  0 );
    EXPECT_EQ( m.getElement( 7, 5 ),  0 );
-   
+
    EXPECT_EQ( m.getElement( 8, 0 ), 23 );
    EXPECT_EQ( m.getElement( 8, 1 ), 24 );
    EXPECT_EQ( m.getElement( 8, 2 ), 25 );
    EXPECT_EQ( m.getElement( 8, 3 ), 26 );
    EXPECT_EQ( m.getElement( 8, 4 ), 27 );
    EXPECT_EQ( m.getElement( 8, 5 ),  0 );
-   
+
    EXPECT_EQ( m.getElement( 9, 0 ),  0 );
    EXPECT_EQ( m.getElement( 9, 1 ),  0 );
    EXPECT_EQ( m.getElement( 9, 2 ),  0 );
@@ -206,7 +206,7 @@ void setupAntiTriDiagMatrix( Matrix& m )
     rowLengths.setElement( 0, 4);
     rowLengths.setElement( 1,  4 );
     m.setCompressedRowLengths( rowLengths );
-    
+
     int value = 1;
     for( int i = 0; i < rows; i++ )
         for( int j = cols - 1; j > 2; j-- )
@@ -397,39 +397,37 @@ void testCopyAssignment()
 template< typename Matrix1, typename Matrix2 >
 void testConversion()
 {
-    
    {
         SCOPED_TRACE("Tri Diagonal Matrix");
-        
+
         Matrix1 triDiag1;
         setupTriDiagMatrix( triDiag1 );
         checkTriDiagMatrix( triDiag1 );
-        
+
         Matrix2 triDiag2;
-        //TNL::Matrices::copySparseMatrix( triDiag2, triDiag1 );
         triDiag2 = triDiag1;
         checkTriDiagMatrix( triDiag2 );
    }
-   
+
    {
         SCOPED_TRACE("Anti Tri Diagonal Matrix");
-                
+
         Matrix1 antiTriDiag1;
         setupAntiTriDiagMatrix( antiTriDiag1 );
         checkAntiTriDiagMatrix( antiTriDiag1 );
-        
+
         Matrix2 antiTriDiag2;
         //TNL::Matrices::copySparseMatrix( antiTriDiag2, antiTriDiag1 );
         antiTriDiag2 = antiTriDiag1;
         checkAntiTriDiagMatrix( antiTriDiag2 );
    }
-   
+
    {
         SCOPED_TRACE("Uneven Row Size Matrix");
         Matrix1 unevenRowSize1;
         setupUnevenRowSizeMatrix( unevenRowSize1 );
         checkUnevenRowSizeMatrix( unevenRowSize1 );
-        
+
         Matrix2 unevenRowSize2;
         //TNL::Matrices::copySparseMatrix( unevenRowSize2, unevenRowSize1 );
         unevenRowSize2 = unevenRowSize1;
-- 
GitLab


From c2514bc6b8b76a13d73f947ae5d7a67d4d7204c5 Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Fri, 17 Jan 2020 17:40:18 +0100
Subject: [PATCH 101/179] Debuging assignment of tridiagonal matrix to sparse
 matrix.

---
 src/TNL/Matrices/SparseMatrix.h               |   2 +-
 src/TNL/Matrices/SparseMatrix.hpp             |  59 +++++++---
 src/TNL/Matrices/TridiagonalMatrixRowView.hpp |   2 +-
 src/TNL/Matrices/TridiagonalMatrixView.h      |   2 +
 src/TNL/Matrices/TridiagonalMatrixView.hpp    |  17 +--
 src/UnitTests/Matrices/SparseMatrixCopyTest.h | 105 +++++++++++++++++-
 6 files changed, 155 insertions(+), 32 deletions(-)

diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h
index 43ea25bf5..26d5d2d84 100644
--- a/src/TNL/Matrices/SparseMatrix.h
+++ b/src/TNL/Matrices/SparseMatrix.h
@@ -73,7 +73,7 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
                     const RealAllocatorType& realAllocator = RealAllocatorType(),
                     const IndexAllocatorType& indexAllocator = IndexAllocatorType() );
 
-      ViewType getView();
+      ViewType getView() const; // TODO: remove const
 
       ConstViewType getConstView() const;
 
diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp
index 14495ad3d..6d3d9d8b8 100644
--- a/src/TNL/Matrices/SparseMatrix.hpp
+++ b/src/TNL/Matrices/SparseMatrix.hpp
@@ -82,13 +82,13 @@ template< typename Real,
           typename IndexAllocator >
 auto
 SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
-getView() -> ViewType
+getView() const -> ViewType
 {
    return ViewType( this->getRows(),
                     this->getColumns(),
-                    this->getValues().getView(),
-                    this->columnIndexes.getView(),
-                    this->segments.getView() );
+                    const_cast< SparseMatrix* >( this )->getValues().getView(),  // TODO: remove const_cast
+                    const_cast< SparseMatrix* >( this )->columnIndexes.getView(),
+                    const_cast< SparseMatrix* >( this )->segments.getView() );
 }
 
 template< typename Real,
@@ -624,7 +624,8 @@ operator=( const Dense< Real_, Device_, Index_, RowMajorOrder, RealAllocator_ >&
          thisValuesBuffer_view = matrixValuesBuffer_view;
 
          ////
-         // Copy matrix elements from the buffer to the matrix
+         // Copy matrix elements from the buffer to the matrix and ignoring
+         // zero matrix elements.
          const IndexType matrix_columns = this->getColumns();
          auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType& columnIndex, RealType& value, bool& compute  ) mutable {
             RealType inValue( 0.0 );
@@ -672,34 +673,41 @@ operator=( const RHSMatrix& matrix )
    using RHSRealType = typename RHSMatrix::RealType;
    using RHSDeviceType = typename RHSMatrix::DeviceType;
    using RHSRealAllocatorType = typename RHSMatrix::RealAllocatorType;
-   using RHSIndexAllocatorType = typename RHSMatrix::IndexAllocatorType;
+   using RHSIndexAllocatorType = typename Allocators::Default< RHSDeviceType >::template Allocator< RHSIndexType >;
 
-   typename RHSMatrix::RowsCapacitiesType rowLengths;
+   Containers::Vector< RHSIndexType, RHSDeviceType, RHSIndexType > rowLengths;
    matrix.getCompressedRowLengths( rowLengths );
    this->setDimensions( matrix.getRows(), matrix.getColumns() );
    this->setCompressedRowLengths( rowLengths );
+   Containers::Vector< IndexType, DeviceType, IndexType > rowLocalIndexes( matrix.getRows() );
+   rowLocalIndexes = 0;
+
 
    // TODO: use getConstView when it works
    const auto matrixView = const_cast< RHSMatrix& >( matrix ).getView();
    const IndexType paddingIndex = this->getPaddingIndex();
    auto columns_view = this->columnIndexes.getView();
    auto values_view = this->values.getView();
+   auto rowLocalIndexes_view = rowLocalIndexes.getView();
    columns_view = paddingIndex;
 
-   /*if( std::is_same< DeviceType, RHSDeviceType >::value )
+   if( std::is_same< DeviceType, RHSDeviceType >::value )
    {
       const auto segments_view = this->segments.getView();
-      auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIndex, const RHSRealType& value, bool& compute ) mutable {
+      auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx_, RHSIndexType columnIndex, const RHSRealType& value, bool& compute ) mutable {
+         RealType inValue( 0.0 );
+         IndexType localIdx( rowLocalIndexes_view[ rowIdx ] );
          if( columnIndex != paddingIndex )
          {
-            IndexType thisGlobalIdx = segments_view.getGlobalIndex( rowIdx, localIdx );
+            IndexType thisGlobalIdx = segments_view.getGlobalIndex( rowIdx, localIdx++ );
             columns_view[ thisGlobalIdx ] = columnIndex;
             values_view[ thisGlobalIdx ] = value;
+            rowLocalIndexes_view[ rowIdx ] = localIdx;
          }
       };
       matrix.forAllRows( f );
    }
-   else*/
+   else
    {
       const IndexType maxRowLength = max( rowLengths );
       const IndexType bufferRowsCount( 128 );
@@ -739,14 +747,29 @@ operator=( const RHSMatrix& matrix )
          thisColumnsBuffer_view = matrixColumnsBuffer_view;
 
          ////
-         // Copy matrix elements from the buffer to the matrix
-         auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType& columnIndex, RealType& value, bool& compute ) mutable {
-            const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx;
-            const IndexType column = thisColumnsBuffer_view[ bufferIdx ];
-            if( column != paddingIndex )
+         // Copy matrix elements from the buffer to the matrix and ignoring
+         // zero matrix elements
+         const IndexType matrix_columns = this->getColumns();
+         auto matrix_view = matrix.getView();
+         auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx_, IndexType& columnIndex, RealType& value, bool& compute ) mutable {
+            RealType inValue( 0.0 );
+            IndexType bufferIdx, localIdx( rowLocalIndexes_view[ rowIdx ] );
+            auto matrixRow = matrix_view.getRow( rowIdx );
+            while( inValue == 0.0 && localIdx < matrixRow.getSize() ) //matrix_columns )
             {
-               columnIndex = column;
-               value = thisValuesBuffer_view[ bufferIdx ];
+               bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx++;
+               inValue = thisValuesBuffer_view[ bufferIdx ];
+            }
+            rowLocalIndexes_view[ rowIdx ] = localIdx;
+            if( inValue == 0.0 )
+            {
+               columnIndex = paddingIndex;
+               value = 0.0;
+            }
+            else
+            {
+               columnIndex = thisColumnsBuffer_view[ bufferIdx ];//column - 1;
+               value = inValue;
             }
          };
          this->forRows( baseRow, lastRow, f2 );
diff --git a/src/TNL/Matrices/TridiagonalMatrixRowView.hpp b/src/TNL/Matrices/TridiagonalMatrixRowView.hpp
index ba60876b9..80fc1a26d 100644
--- a/src/TNL/Matrices/TridiagonalMatrixRowView.hpp
+++ b/src/TNL/Matrices/TridiagonalMatrixRowView.hpp
@@ -29,7 +29,7 @@ auto
 TridiagonalMatrixRowView< ValuesView, Indexer >::
 getSize() const -> IndexType
 {
-   return indexer.getRowSize();
+   return indexer.getRowSize( rowIdx );
 }
 
 template< typename ValuesView, typename Indexer >
diff --git a/src/TNL/Matrices/TridiagonalMatrixView.h b/src/TNL/Matrices/TridiagonalMatrixView.h
index 7db517cbd..61b005c5a 100644
--- a/src/TNL/Matrices/TridiagonalMatrixView.h
+++ b/src/TNL/Matrices/TridiagonalMatrixView.h
@@ -75,8 +75,10 @@ class TridiagonalMatrixView : public MatrixView< Real, Device, Index >
       template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ >
       bool operator != ( const TridiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix ) const;
 
+      __cuda_callable__
       RowView getRow( const IndexType& rowIdx );
 
+      __cuda_callable__
       const RowView getRow( const IndexType& rowIdx ) const;
 
       void setValue( const RealType& v );
diff --git a/src/TNL/Matrices/TridiagonalMatrixView.hpp b/src/TNL/Matrices/TridiagonalMatrixView.hpp
index d8fa6061c..7fc5fd6b7 100644
--- a/src/TNL/Matrices/TridiagonalMatrixView.hpp
+++ b/src/TNL/Matrices/TridiagonalMatrixView.hpp
@@ -340,25 +340,26 @@ forRows( IndexType first, IndexType last, Function& function ) const
 {
    const auto values_view = this->values.getConstView();
    const auto indexer = this->indexer;
+   bool compute( true );
    auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
       if( rowIdx == 0 )
       {
-         function( 0, 0, 0, values_view[ indexer.getGlobalIndex( 0, 0 ) ] );
-         function( 0, 1, 1, values_view[ indexer.getGlobalIndex( 0, 1 ) ] );
+         function( 0, 0, 0, values_view[ indexer.getGlobalIndex( 0, 0 ) ], compute );
+         function( 0, 1, 1, values_view[ indexer.getGlobalIndex( 0, 1 ) ], compute );
       } 
       else if( rowIdx + 1 < indexer.getColumns() )
       {
-         function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] );
-         function( rowIdx, 1, rowIdx,     values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] );
-         function( rowIdx, 2, rowIdx + 1, values_view[ indexer.getGlobalIndex( rowIdx, 2 ) ] );
+         function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ], compute );
+         function( rowIdx, 1, rowIdx,     values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ], compute );
+         function( rowIdx, 2, rowIdx + 1, values_view[ indexer.getGlobalIndex( rowIdx, 2 ) ], compute );
       }
       else if( rowIdx < indexer.getColumns() )
       {
-         function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] );
-         function( rowIdx, 1, rowIdx,     values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] );
+         function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ], compute );
+         function( rowIdx, 1, rowIdx,     values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ], compute );
       }
       else
-         function( rowIdx, 0, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] );
+         function( rowIdx, 0, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ], compute );
    };
    Algorithms::ParallelFor< DeviceType >::exec( first, last, f );
 }
diff --git a/src/UnitTests/Matrices/SparseMatrixCopyTest.h b/src/UnitTests/Matrices/SparseMatrixCopyTest.h
index e9898bb39..b285f5e05 100644
--- a/src/UnitTests/Matrices/SparseMatrixCopyTest.h
+++ b/src/UnitTests/Matrices/SparseMatrixCopyTest.h
@@ -15,6 +15,8 @@
 #include <TNL/Matrices/SparseMatrix.h>
 #include <TNL/Matrices/MatrixType.h>
 #include <TNL/Matrices/Dense.h>
+#include <TNL/Matrices/Tridiagonal.h>
+#include <TNL/Matrices/Multidiagonal.h>
 #include <TNL/Containers/Segments/CSR.h>
 #include <TNL/Containers/Segments/Ellpack.h>
 #include <TNL/Containers/Segments/SlicedEllpack.h>
@@ -370,6 +372,7 @@ void testCopyAssignment()
 
       Matrix2 triDiag2;
       triDiag2 = triDiag1;
+      checkTriDiagMatrix( triDiag1 );
       checkTriDiagMatrix( triDiag2 );
    }
    {
@@ -390,6 +393,7 @@ void testCopyAssignment()
 
       Matrix2 unevenRowSize2;
       unevenRowSize2 = unevenRowSize1;
+
       checkUnevenRowSizeMatrix( unevenRowSize2 );
    }
 }
@@ -435,6 +439,62 @@ void testConversion()
    }
 }
 
+template< typename Matrix >
+void tridiagonalMatrixAssignment()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   using TridiagonalHost = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Host, IndexType >;
+   using TridiagonalCuda = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Cuda, IndexType >;
+
+   const IndexType rows( 10 ), columns( 10 );
+   TridiagonalHost hostMatrix( rows, columns );
+   for( IndexType i = 0; i < columns; i++ )
+      for( IndexType j = TNL::max( 0, i - 1 ); j < TNL::min( rows, i + 2 ); j++ )
+         hostMatrix.setElement( i, j, i + j );
+
+   std::cerr << hostMatrix << std::endl;
+   Matrix matrix;
+   matrix = hostMatrix;
+   std::cerr << matrix << std::endl;
+   using RowCapacitiesType = typename Matrix::RowsCapacitiesType;
+   RowCapacitiesType rowCapacities;
+   matrix.getCompressedRowLengths( rowCapacities );
+   RowCapacitiesType exactRowLengths{ 0, 3, 3, 3, 3, 3, 3, 3, 3, 2 };
+   EXPECT_EQ( rowCapacities, exactRowLengths );
+   for( IndexType i = 0; i < columns; i++ )
+      for( IndexType j = 0; j < rows; j++ )
+      {
+         if( abs( i - j ) > 1 )
+            EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
+         else
+            EXPECT_EQ( matrix.getElement( i, j ), i + j );
+      }
+
+#ifdef HAVE_CUDA
+   TridiagonalCuda cudaMatrix( rows, columns );
+   cudaMatrix = hostMatrix;
+   /*for( IndexType i = 0; i < columns; i++ )
+      for( IndexType j = TNL::max( 0, i - 1 ); j < TNL::min( row, i + 1 ); j++ )
+         cudaMatrix.setElement( i, j, i + j );*/
+
+   matrix = cudaMatrix;
+   matrix.getCompressedRowLengths( rowCapacities );
+   EXPECT_EQ( rowCapacities, exactRowLengths );
+   for( IndexType i = 0; i < columns; i++ )
+      for( IndexType j = 0; j < rows; j++ )
+      {
+         if( abs( i - j ) > 1 )
+            EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
+         else
+            EXPECT_EQ( matrix.getElement( i, j ), i + j );
+      }
+#endif
+
+}
+
 template< typename Matrix >
 void denseMatrixAssignment()
 {
@@ -469,10 +529,10 @@ void denseMatrixAssignment()
 
 #ifdef HAVE_CUDA
    DenseCuda cudaMatrix( rows, columns );
-   //cudaMatrix = hostMatrix;
-   for( IndexType i = 0; i < columns; i++ )
+   cudaMatrix = hostMatrix;
+   /*for( IndexType i = 0; i < columns; i++ )
       for( IndexType j = 0; j <= i; j++ )
-         cudaMatrix.setElement( i, j, i + j );
+         cudaMatrix.setElement( i, j, i + j );*/
 
    matrix = cudaMatrix;
    matrix.getCompressedRowLengths( rowCapacities );
@@ -487,7 +547,7 @@ void denseMatrixAssignment()
       }
 #endif
 }
-
+/*
 TEST( SparseMatrixCopyTest, CSR_HostToHost )
 {
    testCopyAssignment< CSR_host, CSR_host >();
@@ -619,6 +679,43 @@ TEST( SparseMatrixCopyTest, SlicedEllpack_to_Ellpack_cuda )
    testConversion< SE_cuda, E_cuda >();
 }
 #endif
+*/
+
+////
+// Tridiagonal matrix assignment test
+TEST( SparseMatrixCopyTest, TridiagonalMatrixAssignment_to_CSR_host )
+{
+   tridiagonalMatrixAssignment< CSR_host >();
+}
+
+TEST( SparseMatrixCopyTest, TridiagonalMatrixAssignment_to_Ellpack_host )
+{
+   tridiagonalMatrixAssignment< E_host >();
+}
+
+TEST( SparseMatrixCopyTest, TridiagonalMatrixAssignment_to_SlicedEllpack_host )
+{
+   tridiagonalMatrixAssignment< SE_host >();
+}
+
+#ifdef HAVE_CUDA
+TEST( SparseMatrixCopyTest, TridiagonalMatrixAssignment_to_CSR_cuda )
+{
+   tridiagonalMatrixAssignment< CSR_cuda >();
+}
+
+TEST( SparseMatrixCopyTest, TridiagonalMatrixAssignment_to_Ellpack_cuda )
+{
+   tridiagonalMatrixAssignment< E_cuda >();
+}
+
+TEST( SparseMatrixCopyTest, TridiagonalMatrixAssignment_to_SlicedEllpack_cuda )
+{
+   tridiagonalMatrixAssignment< SE_cuda >();
+}
+#endif // HAVE_CUDA
+
+
 
 // Dense matrix assignment test
 TEST( SparseMatrixCopyTest, DenseMatrixAssignment_to_CSR_host )
-- 
GitLab


From 733401dcfd44ac508f43d1f5731bfc795226ddf0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Fri, 17 Jan 2020 22:51:01 +0100
Subject: [PATCH 102/179] Fixed tridiagonal to sparse matrix assignment.
 Debugging multidiagonal to sparse matrix assignment.

---
 .../Matrices/MultidiagonalMatrixRowView.hpp   |   2 +-
 src/TNL/Matrices/MultidiagonalMatrixView.hpp  |   3 +-
 src/TNL/Matrices/SparseMatrix.hpp             |   3 +-
 src/UnitTests/Matrices/SparseMatrixCopyTest.h | 117 ++++++++++++++----
 4 files changed, 98 insertions(+), 27 deletions(-)

diff --git a/src/TNL/Matrices/MultidiagonalMatrixRowView.hpp b/src/TNL/Matrices/MultidiagonalMatrixRowView.hpp
index 88aae3f15..855b8463a 100644
--- a/src/TNL/Matrices/MultidiagonalMatrixRowView.hpp
+++ b/src/TNL/Matrices/MultidiagonalMatrixRowView.hpp
@@ -30,7 +30,7 @@ auto
 MultidiagonalMatrixRowView< ValuesView, Indexer, DiagonalsShiftsView >::
 getSize() const -> IndexType
 {
-   return indexer.getRowSize();
+   return diagonalsShifts.getSize();//indexer.getRowSize( rowIdx );
 }
 
 template< typename ValuesView, typename Indexer, typename DiagonalsShiftsView >
diff --git a/src/TNL/Matrices/MultidiagonalMatrixView.hpp b/src/TNL/Matrices/MultidiagonalMatrixView.hpp
index 2839c997a..96312d03c 100644
--- a/src/TNL/Matrices/MultidiagonalMatrixView.hpp
+++ b/src/TNL/Matrices/MultidiagonalMatrixView.hpp
@@ -393,12 +393,13 @@ forRows( IndexType first, IndexType last, Function& function ) const
    const IndexType diagonalsCount = this->diagonalsShifts.getSize();
    const IndexType columns = this->getColumns();
    const auto indexer = this->indexer;
+   bool compute( true );
    auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
       for( IndexType localIdx = 0; localIdx < diagonalsCount; localIdx++ )
       {
          const IndexType columnIdx = rowIdx + diagonalsShifts_view[ localIdx ];
          if( columnIdx >= 0 && columnIdx < columns )
-            function( rowIdx, localIdx, columnIdx, values_view[ indexer.getGlobalIndex( rowIdx, localIdx ) ] );
+            function( rowIdx, localIdx, columnIdx, values_view[ indexer.getGlobalIndex( rowIdx, localIdx ) ], compute );
       }
    };
    Algorithms::ParallelFor< DeviceType >::exec( first, last, f );
diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp
index 6d3d9d8b8..66813a732 100644
--- a/src/TNL/Matrices/SparseMatrix.hpp
+++ b/src/TNL/Matrices/SparseMatrix.hpp
@@ -682,7 +682,6 @@ operator=( const RHSMatrix& matrix )
    Containers::Vector< IndexType, DeviceType, IndexType > rowLocalIndexes( matrix.getRows() );
    rowLocalIndexes = 0;
 
-
    // TODO: use getConstView when it works
    const auto matrixView = const_cast< RHSMatrix& >( matrix ).getView();
    const IndexType paddingIndex = this->getPaddingIndex();
@@ -697,7 +696,7 @@ operator=( const RHSMatrix& matrix )
       auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx_, RHSIndexType columnIndex, const RHSRealType& value, bool& compute ) mutable {
          RealType inValue( 0.0 );
          IndexType localIdx( rowLocalIndexes_view[ rowIdx ] );
-         if( columnIndex != paddingIndex )
+         if( value != 0.0 && columnIndex != paddingIndex )
          {
             IndexType thisGlobalIdx = segments_view.getGlobalIndex( rowIdx, localIdx++ );
             columns_view[ thisGlobalIdx ] = columnIndex;
diff --git a/src/UnitTests/Matrices/SparseMatrixCopyTest.h b/src/UnitTests/Matrices/SparseMatrixCopyTest.h
index b285f5e05..7f29191b6 100644
--- a/src/UnitTests/Matrices/SparseMatrixCopyTest.h
+++ b/src/UnitTests/Matrices/SparseMatrixCopyTest.h
@@ -421,7 +421,6 @@ void testConversion()
         checkAntiTriDiagMatrix( antiTriDiag1 );
 
         Matrix2 antiTriDiag2;
-        //TNL::Matrices::copySparseMatrix( antiTriDiag2, antiTriDiag1 );
         antiTriDiag2 = antiTriDiag1;
         checkAntiTriDiagMatrix( antiTriDiag2 );
    }
@@ -433,7 +432,6 @@ void testConversion()
         checkUnevenRowSizeMatrix( unevenRowSize1 );
 
         Matrix2 unevenRowSize2;
-        //TNL::Matrices::copySparseMatrix( unevenRowSize2, unevenRowSize1 );
         unevenRowSize2 = unevenRowSize1;
         checkUnevenRowSizeMatrix( unevenRowSize2 );
    }
@@ -451,21 +449,19 @@ void tridiagonalMatrixAssignment()
 
    const IndexType rows( 10 ), columns( 10 );
    TridiagonalHost hostMatrix( rows, columns );
-   for( IndexType i = 0; i < columns; i++ )
-      for( IndexType j = TNL::max( 0, i - 1 ); j < TNL::min( rows, i + 2 ); j++ )
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = TNL::max( 0, i - 1 ); j < TNL::min( columns, i + 2 ); j++ )
          hostMatrix.setElement( i, j, i + j );
 
-   std::cerr << hostMatrix << std::endl;
    Matrix matrix;
    matrix = hostMatrix;
-   std::cerr << matrix << std::endl;
    using RowCapacitiesType = typename Matrix::RowsCapacitiesType;
    RowCapacitiesType rowCapacities;
    matrix.getCompressedRowLengths( rowCapacities );
-   RowCapacitiesType exactRowLengths{ 0, 3, 3, 3, 3, 3, 3, 3, 3, 2 };
+   RowCapacitiesType exactRowLengths{ 1, 3, 3, 3, 3, 3, 3, 3, 3, 2 };
    EXPECT_EQ( rowCapacities, exactRowLengths );
-   for( IndexType i = 0; i < columns; i++ )
-      for( IndexType j = 0; j < rows; j++ )
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < columns; j++ )
       {
          if( abs( i - j ) > 1 )
             EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
@@ -476,15 +472,11 @@ void tridiagonalMatrixAssignment()
 #ifdef HAVE_CUDA
    TridiagonalCuda cudaMatrix( rows, columns );
    cudaMatrix = hostMatrix;
-   /*for( IndexType i = 0; i < columns; i++ )
-      for( IndexType j = TNL::max( 0, i - 1 ); j < TNL::min( row, i + 1 ); j++ )
-         cudaMatrix.setElement( i, j, i + j );*/
-
    matrix = cudaMatrix;
    matrix.getCompressedRowLengths( rowCapacities );
    EXPECT_EQ( rowCapacities, exactRowLengths );
-   for( IndexType i = 0; i < columns; i++ )
-      for( IndexType j = 0; j < rows; j++ )
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < columns; j++ )
       {
          if( abs( i - j ) > 1 )
             EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
@@ -492,7 +484,58 @@ void tridiagonalMatrixAssignment()
             EXPECT_EQ( matrix.getElement( i, j ), i + j );
       }
 #endif
+}
 
+template< typename Matrix >
+void multidiagonalMatrixAssignment()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   using MultidiagonalHost = TNL::Matrices::Multidiagonal< RealType, TNL::Devices::Host, IndexType >;
+   using MultidiagonalCuda = TNL::Matrices::Multidiagonal< RealType, TNL::Devices::Cuda, IndexType >;
+   using DiagonalsShiftsType = typename MultidiagonalHost::DiagonalsShiftsType;
+   DiagonalsShiftsType diagonals{ -4, -2, 0, 1, 3, 5 };
+
+   const IndexType rows( 10 ), columns( 10 );
+   MultidiagonalHost hostMatrix( rows, columns, diagonals );
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < columns; j++ )
+         if( diagonals.containsValue( i - j ) )
+            hostMatrix.setElement( i, j, i + j );
+
+   Matrix matrix;
+   matrix = hostMatrix;
+   using RowCapacitiesType = typename Matrix::RowsCapacitiesType;
+   RowCapacitiesType rowCapacities;
+   matrix.getCompressedRowLengths( rowCapacities );
+   RowCapacitiesType exactRowLengths{ 1, 3, 3, 3, 3, 3, 3, 3, 3, 2 };
+   EXPECT_EQ( rowCapacities, exactRowLengths );
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < columns; j++ )
+      {
+         if( diagonals.containsValue( i - j ) )
+            EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
+         else
+            EXPECT_EQ( matrix.getElement( i, j ), i + j );
+      }
+
+#ifdef HAVE_CUDA
+   MultidiagonalCuda cudaMatrix( rows, columns, diagonals );
+   cudaMatrix = hostMatrix;
+   matrix = cudaMatrix;
+   matrix.getCompressedRowLengths( rowCapacities );
+   EXPECT_EQ( rowCapacities, exactRowLengths );
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < columns; j++ )
+      {
+         if( diagonals.containsValue( i - j ) > 1 )
+            EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
+         else
+            EXPECT_EQ( matrix.getElement( i, j ), i + j );
+      }
+#endif
 }
 
 template< typename Matrix >
@@ -530,10 +573,6 @@ void denseMatrixAssignment()
 #ifdef HAVE_CUDA
    DenseCuda cudaMatrix( rows, columns );
    cudaMatrix = hostMatrix;
-   /*for( IndexType i = 0; i < columns; i++ )
-      for( IndexType j = 0; j <= i; j++ )
-         cudaMatrix.setElement( i, j, i + j );*/
-
    matrix = cudaMatrix;
    matrix.getCompressedRowLengths( rowCapacities );
    EXPECT_EQ( rowCapacities, exactRowLengths );
@@ -547,7 +586,7 @@ void denseMatrixAssignment()
       }
 #endif
 }
-/*
+
 TEST( SparseMatrixCopyTest, CSR_HostToHost )
 {
    testCopyAssignment< CSR_host, CSR_host >();
@@ -616,8 +655,8 @@ TEST( SparseMatrixCopyTest, SlicedEllpack_CudaToCuda )
 }
 #endif
 
-
-// test conversion between formats
+////
+// Test of conversion between formats
 TEST( SparseMatrixCopyTest, CSR_to_Ellpack_host )
 {
    testConversion< CSR_host, E_host >();
@@ -679,7 +718,6 @@ TEST( SparseMatrixCopyTest, SlicedEllpack_to_Ellpack_cuda )
    testConversion< SE_cuda, E_cuda >();
 }
 #endif
-*/
 
 ////
 // Tridiagonal matrix assignment test
@@ -715,8 +753,41 @@ TEST( SparseMatrixCopyTest, TridiagonalMatrixAssignment_to_SlicedEllpack_cuda )
 }
 #endif // HAVE_CUDA
 
+////
+// Multidiagonal matrix assignment test
+TEST( SparseMatrixCopyTest, MultidiagonalMatrixAssignment_to_CSR_host )
+{
+   multidiagonalMatrixAssignment< CSR_host >();
+}
+
+TEST( SparseMatrixCopyTest, MultidiagonalMatrixAssignment_to_Ellpack_host )
+{
+   multidiagonalMatrixAssignment< E_host >();
+}
+
+TEST( SparseMatrixCopyTest, MultidiagonalMatrixAssignment_to_SlicedEllpack_host )
+{
+   multidiagonalMatrixAssignment< SE_host >();
+}
+
+#ifdef HAVE_CUDA
+TEST( SparseMatrixCopyTest, MultidiagonalMatrixAssignment_to_CSR_cuda )
+{
+   multidiagonalMatrixAssignment< CSR_cuda >();
+}
+
+TEST( SparseMatrixCopyTest, MultidiagonalMatrixAssignment_to_Ellpack_cuda )
+{
+   multidiagonalMatrixAssignment< E_cuda >();
+}
 
+TEST( SparseMatrixCopyTest, MultidiagonalMatrixAssignment_to_SlicedEllpack_cuda )
+{
+   multidiagonalMatrixAssignment< SE_cuda >();
+}
+#endif // HAVE_CUDA
 
+////
 // Dense matrix assignment test
 TEST( SparseMatrixCopyTest, DenseMatrixAssignment_to_CSR_host )
 {
-- 
GitLab


From 27cb1cfa5e00fe44807e0626845ff147c6400a8d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Sat, 18 Jan 2020 10:45:02 +0100
Subject: [PATCH 103/179] Fixed multidiagonal to sparse matrix assignment.

---
 src/UnitTests/Matrices/SparseMatrixCopyTest.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/UnitTests/Matrices/SparseMatrixCopyTest.h b/src/UnitTests/Matrices/SparseMatrixCopyTest.h
index 7f29191b6..6e1427ad1 100644
--- a/src/UnitTests/Matrices/SparseMatrixCopyTest.h
+++ b/src/UnitTests/Matrices/SparseMatrixCopyTest.h
@@ -502,7 +502,7 @@ void multidiagonalMatrixAssignment()
    MultidiagonalHost hostMatrix( rows, columns, diagonals );
    for( IndexType i = 0; i < rows; i++ )
       for( IndexType j = 0; j < columns; j++ )
-         if( diagonals.containsValue( i - j ) )
+         if( diagonals.containsValue( j - i ) )
             hostMatrix.setElement( i, j, i + j );
 
    Matrix matrix;
@@ -510,15 +510,15 @@ void multidiagonalMatrixAssignment()
    using RowCapacitiesType = typename Matrix::RowsCapacitiesType;
    RowCapacitiesType rowCapacities;
    matrix.getCompressedRowLengths( rowCapacities );
-   RowCapacitiesType exactRowLengths{ 1, 3, 3, 3, 3, 3, 3, 3, 3, 2 };
+   RowCapacitiesType exactRowLengths{ 3, 4, 5, 5, 6, 5, 5, 4, 4, 3 };
    EXPECT_EQ( rowCapacities, exactRowLengths );
    for( IndexType i = 0; i < rows; i++ )
       for( IndexType j = 0; j < columns; j++ )
       {
-         if( diagonals.containsValue( i - j ) )
-            EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
-         else
+         if( diagonals.containsValue( j - i ) )
             EXPECT_EQ( matrix.getElement( i, j ), i + j );
+         else
+            EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
       }
 
 #ifdef HAVE_CUDA
@@ -530,10 +530,10 @@ void multidiagonalMatrixAssignment()
    for( IndexType i = 0; i < rows; i++ )
       for( IndexType j = 0; j < columns; j++ )
       {
-         if( diagonals.containsValue( i - j ) > 1 )
-            EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
-         else
+         if( diagonals.containsValue( j - i ) )
             EXPECT_EQ( matrix.getElement( i, j ), i + j );
+         else
+            EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
       }
 #endif
 }
-- 
GitLab


From ba7b368151559d89f5eccb9e604e3e9d8314e0fc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Sun, 19 Jan 2020 20:33:19 +0100
Subject: [PATCH 104/179] Added command line argument --with-cxx-flags to build
 script.

---
 CMakeLists.txt | 2 +-
 build          | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 68252ba6a..7d1666163 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -82,7 +82,7 @@ set( CMAKE_CXX_STANDARD_REQUIRED ON )
 set( CMAKE_CXX_EXTENSIONS OFF )
 
 # set default build options
-set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -Wall -Wno-unused-local-typedefs -Wno-unused-variable -Wno-unknown-pragmas" )
+set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${WITH_CXX_FLAGS} -pthread -Wall -Wno-unused-local-typedefs -Wno-unused-variable -Wno-unknown-pragmas" )
 set( CMAKE_CXX_FLAGS_DEBUG "-g" )
 set( CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG" )
 # pass -rdynamic only in Debug mode
diff --git a/build b/build
index 67492f02c..9deb12d10 100755
--- a/build
+++ b/build
@@ -62,6 +62,7 @@ do
         --with-tools=*                   ) WITH_TOOLS="${option#*=}" ;;
         --with-benchmarks=*              ) WITH_BENCHMARKS="${option#*=}" ;;
         --with-python=*                  ) WITH_PYTHON="${option#*=}" ;;
+        --with-cxx-flags=*               ) WITH_CXX_FLAGS="${option#*=}" ;;
         *                                )
            echo "Unknown option ${option}. Use --help for more information."
            exit 1 ;;
@@ -91,6 +92,7 @@ if [[ ${HELP} == "yes" ]]; then
     echo "   --with-tools=yes/no                   Compile the 'src/Tools' directory. 'yes' by default."
     echo "   --with-python=yes/no                  Compile the Python bindings. 'yes' by default."
     echo "   --with-benchmarks=yes/no              Compile the 'src/Benchmarks' directory. 'yes' by default."
+    echo "   --with-cxx-flags=FLAGS                Additional flags for C++ compiler."
     echo "   --cmake=CMAKE                         Path to cmake. 'cmake' by default."
     echo "   --verbose                             It enables verbose build."
     echo "   --root-dir=PATH                       Path to the TNL source code root dir."
@@ -142,6 +144,7 @@ cmake_command=(
          -DWITH_TOOLS=${WITH_TOOLS}
          -DWITH_PYTHON=${WITH_PYTHON}
          -DWITH_BENCHMARKS=${WITH_BENCHMARKS}
+         -DWITH_CXX_FLAGS=${WITH_CXX_FLAGS}
          -DDCMTK_DIR=${DCMTK_DIR}
 )
 
-- 
GitLab


From 3bfc83cce078241e3b656ebd06dcca10f85db09e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Sun, 19 Jan 2020 20:33:43 +0100
Subject: [PATCH 105/179] Fixing MultidiagonalMatrix.

---
 src/TNL/Matrices/Multidiagonal.hpp           | 6 +++---
 src/TNL/Matrices/MultidiagonalMatrixView.hpp | 9 ++++++---
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/src/TNL/Matrices/Multidiagonal.hpp b/src/TNL/Matrices/Multidiagonal.hpp
index 7bc83f2d4..94470d3d1 100644
--- a/src/TNL/Matrices/Multidiagonal.hpp
+++ b/src/TNL/Matrices/Multidiagonal.hpp
@@ -668,7 +668,7 @@ operator=( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllo
       if( std::is_same< Device, Device_ >::value )
       {
          const auto matrix_view = matrix.getView();
-         auto f = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value ) mutable {
+         auto f = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value, bool& compute ) mutable {
             value = matrix_view.getValues()[ matrix_view.getIndexer().getGlobalIndex( rowIdx, localIdx ) ];
          };
          this->forAllRows( f );
@@ -695,7 +695,7 @@ operator=( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllo
 
             ////
             // Copy matrix elements into buffer
-            auto f1 = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIndex, const RHSRealType& value ) mutable {
+            auto f1 = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIndex, const RHSRealType& value, bool& compute ) mutable {
                   const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx;
                   matrixValuesBuffer_view[ bufferIdx ] = value;
             };
@@ -707,7 +707,7 @@ operator=( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllo
 
             ////
             // Copy matrix elements from the buffer to the matrix
-            auto f2 = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType localIdx, const IndexType columnIndex, RealType& value  ) mutable {
+            auto f2 = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType localIdx, const IndexType columnIndex, RealType& value, bool& compute  ) mutable {
                const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx;
                   value = thisValuesBuffer_view[ bufferIdx ];
             };
diff --git a/src/TNL/Matrices/MultidiagonalMatrixView.hpp b/src/TNL/Matrices/MultidiagonalMatrixView.hpp
index 96312d03c..224368465 100644
--- a/src/TNL/Matrices/MultidiagonalMatrixView.hpp
+++ b/src/TNL/Matrices/MultidiagonalMatrixView.hpp
@@ -216,8 +216,10 @@ void
 MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
 setValue( const RealType& v )
 {
+   // we dont do this->values = v here because it would set even elements 'outside' the matrix
+   // method getNumberOfNonzeroElements would not well
    const RealType newValue = v;
-   auto f = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType columnIdx, RealType& value ) mutable {
+   auto f = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType columnIdx, RealType& value, bool& compute ) mutable {
       value = newValue;
    };
    this->forAllRows( f );
@@ -419,12 +421,13 @@ forRows( IndexType first, IndexType last, Function& function )
    const IndexType diagonalsCount = this->diagonalsShifts.getSize();
    const IndexType columns = this->getColumns();
    const auto indexer = this->indexer;
+   bool compute( true );
    auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
-      for( IndexType localIdx = 0; localIdx < diagonalsCount; localIdx++ )
+      for( IndexType localIdx = 0; localIdx < diagonalsCount && compute; localIdx++ )
       {
          const IndexType columnIdx = rowIdx + diagonalsShifts_view[ localIdx ];
          if( columnIdx >= 0 && columnIdx < columns )
-            function( rowIdx, localIdx, columnIdx, values_view[ indexer.getGlobalIndex( rowIdx, localIdx ) ] );
+            function( rowIdx, localIdx, columnIdx, values_view[ indexer.getGlobalIndex( rowIdx, localIdx ) ], compute );
       }
    };
    Algorithms::ParallelFor< DeviceType >::exec( first, last, f );
-- 
GitLab


From 7cc6eab9d627d5688c551dbfe588c6395d0e7553 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Sun, 19 Jan 2020 21:38:58 +0100
Subject: [PATCH 106/179] Added DenseMatrixCopyTest.

---
 src/TNL/Matrices/Dense.hpp                    |   3 +-
 src/UnitTests/Matrices/CMakeLists.txt         |  24 +-
 .../Matrices/DenseMatrixCopyTest.cpp          |  11 +
 src/UnitTests/Matrices/DenseMatrixCopyTest.cu |  11 +
 src/UnitTests/Matrices/DenseMatrixCopyTest.h  | 633 ++++++++++++++++++
 5 files changed, 672 insertions(+), 10 deletions(-)
 create mode 100644 src/UnitTests/Matrices/DenseMatrixCopyTest.cpp
 create mode 100644 src/UnitTests/Matrices/DenseMatrixCopyTest.cu
 create mode 100644 src/UnitTests/Matrices/DenseMatrixCopyTest.h

diff --git a/src/TNL/Matrices/Dense.hpp b/src/TNL/Matrices/Dense.hpp
index 21ae1bce6..3d9ff102e 100644
--- a/src/TNL/Matrices/Dense.hpp
+++ b/src/TNL/Matrices/Dense.hpp
@@ -130,7 +130,8 @@ void
 Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
 setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths )
 {
-   this->setDimensions( rowLengths.getSize(), max( rowLengths ) );
+   TNL_ASSERT_EQ( rowLengths.getSize(), this->getRows(), "" );
+   TNL_ASSERT_LE( max( rowLengths ), this->getColumns(), "" );
 }
 
 template< typename Real,
diff --git a/src/UnitTests/Matrices/CMakeLists.txt b/src/UnitTests/Matrices/CMakeLists.txt
index 287495405..b19c8b705 100644
--- a/src/UnitTests/Matrices/CMakeLists.txt
+++ b/src/UnitTests/Matrices/CMakeLists.txt
@@ -1,15 +1,18 @@
 ADD_SUBDIRECTORY( Legacy )
 
 IF( BUILD_CUDA )
-   CUDA_ADD_EXECUTABLE( SparseMatrixCopyTest SparseMatrixCopyTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
-   TARGET_LINK_LIBRARIES( SparseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} )
-
    CUDA_ADD_EXECUTABLE( SparseMatrixTest SparseMatrixTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
    TARGET_LINK_LIBRARIES( SparseMatrixTest ${GTEST_BOTH_LIBRARIES} )
 
+   CUDA_ADD_EXECUTABLE( SparseMatrixCopyTest SparseMatrixCopyTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( SparseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} )
+
    CUDA_ADD_EXECUTABLE( DenseMatrixTest DenseMatrixTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
    TARGET_LINK_LIBRARIES( DenseMatrixTest ${GTEST_BOTH_LIBRARIES} )
 
+   CUDA_ADD_EXECUTABLE( DenseMatrixCopyTest DenseMatrixCopyTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( DenseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} )
+
    CUDA_ADD_EXECUTABLE( TridiagonalMatrixTest TridiagonalMatrixTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
    TARGET_LINK_LIBRARIES( TridiagonalMatrixTest ${GTEST_BOTH_LIBRARIES} )
 
@@ -26,18 +29,22 @@ IF( BUILD_CUDA )
    TARGET_LINK_LIBRARIES( SparseMatrixTest_SlicedEllpack_segments ${GTEST_BOTH_LIBRARIES} )
 
 ELSE(  BUILD_CUDA )
-   ADD_EXECUTABLE( SparseMatrixCopyTest SparseMatrixCopyTest.cpp )
-   TARGET_COMPILE_OPTIONS( SparseMatrixCopyTest PRIVATE ${CXX_TESTS_FLAGS} )
-   TARGET_LINK_LIBRARIES( SparseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} )
-
    ADD_EXECUTABLE( SparseMatrixTest SparseMatrixTest.cpp )
    TARGET_COMPILE_OPTIONS( SparseMatrixTest PRIVATE ${CXX_TESTS_FLAGS} )
    TARGET_LINK_LIBRARIES( SparseMatrixTest ${GTEST_BOTH_LIBRARIES} )
 
+   ADD_EXECUTABLE( SparseMatrixCopyTest SparseMatrixCopyTest.cpp )
+   TARGET_COMPILE_OPTIONS( SparseMatrixCopyTest PRIVATE ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( SparseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} )
+
    ADD_EXECUTABLE( DenseMatrixTest DenseMatrixTest.cpp )
    TARGET_COMPILE_OPTIONS( DenseMatrixTest PRIVATE ${CXX_TESTS_FLAGS} )
    TARGET_LINK_LIBRARIES( DenseMatrixTest ${GTEST_BOTH_LIBRARIES} )
 
+   ADD_EXECUTABLE( DenseMatrixCopyTest DenseMatrixCopyTest.cpp )
+   TARGET_COMPILE_OPTIONS( DenseMatrixCopyTest PRIVATE ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( DenseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} )
+
    ADD_EXECUTABLE( TridiagonalMatrixTest TridiagonalMatrixTest.cpp )
    TARGET_COMPILE_OPTIONS( TridiagonalMatrixTest PRIVATE ${CXX_TESTS_FLAGS} )
    TARGET_LINK_LIBRARIES( TridiagonalMatrixTest ${GTEST_BOTH_LIBRARIES} )
@@ -60,9 +67,8 @@ ELSE(  BUILD_CUDA )
 
 ENDIF( BUILD_CUDA )
 
-
-ADD_TEST( SparseMatrixCopyTest ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixCopyTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( SparseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} )
+ADD_TEST( SparseMatrixCopyTest ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixCopyTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( DenseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/DenseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( TridiagonalMatrixTest ${EXECUTABLE_OUTPUT_PATH}/TridiagonalMatrixTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( MultidiagonalMatrixTest ${EXECUTABLE_OUTPUT_PATH}/MultidiagonalMatrixTest${CMAKE_EXECUTABLE_SUFFIX} )
diff --git a/src/UnitTests/Matrices/DenseMatrixCopyTest.cpp b/src/UnitTests/Matrices/DenseMatrixCopyTest.cpp
new file mode 100644
index 000000000..5cdd413af
--- /dev/null
+++ b/src/UnitTests/Matrices/DenseMatrixCopyTest.cpp
@@ -0,0 +1,11 @@
+/***************************************************************************
+                          DenseMatrixCopyTest.cpp  -  description
+                             -------------------
+    begin                : Jan 19, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include "DenseMatrixCopyTest.h"
diff --git a/src/UnitTests/Matrices/DenseMatrixCopyTest.cu b/src/UnitTests/Matrices/DenseMatrixCopyTest.cu
new file mode 100644
index 000000000..5fbd77efa
--- /dev/null
+++ b/src/UnitTests/Matrices/DenseMatrixCopyTest.cu
@@ -0,0 +1,11 @@
+/***************************************************************************
+                          DenseMatrixCopyTest.cu  -  description
+                             -------------------
+    begin                : Jan 19, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include "DenseMatrixCopyTest.h"
diff --git a/src/UnitTests/Matrices/DenseMatrixCopyTest.h b/src/UnitTests/Matrices/DenseMatrixCopyTest.h
new file mode 100644
index 000000000..ef7809a6b
--- /dev/null
+++ b/src/UnitTests/Matrices/DenseMatrixCopyTest.h
@@ -0,0 +1,633 @@
+/***************************************************************************
+                          DenseMatrixCopyTest.h -  description
+                             -------------------
+    begin                : Jan 19, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include <TNL/Matrices/Legacy/CSR.h>
+#include <TNL/Matrices/Legacy/Ellpack.h>
+#include <TNL/Matrices/Legacy/SlicedEllpack.h>
+
+#include <TNL/Matrices/SparseMatrix.h>
+#include <TNL/Matrices/MatrixType.h>
+#include <TNL/Matrices/Dense.h>
+#include <TNL/Matrices/Tridiagonal.h>
+#include <TNL/Matrices/Multidiagonal.h>
+#include <TNL/Containers/Segments/CSR.h>
+#include <TNL/Containers/Segments/Ellpack.h>
+#include <TNL/Containers/Segments/SlicedEllpack.h>
+
+template< typename Device, typename Index, typename IndexAllocator >
+using EllpackSegments = TNL::Containers::Segments::Ellpack< Device, Index, IndexAllocator >;
+
+template< typename Device, typename Index, typename IndexAllocator >
+using SlicedEllpackSegments = TNL::Containers::Segments::SlicedEllpack< Device, Index, IndexAllocator >;
+
+using CSR_host = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >;
+using CSR_cuda = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >;
+using E_host   = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, EllpackSegments >;
+using E_cuda   = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, EllpackSegments >;
+using SE_host  = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, SlicedEllpackSegments >;
+using SE_cuda  = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, SlicedEllpackSegments >;
+using Dense_host               = TNL::Matrices::Dense< int, TNL::Devices::Host, int, false >;
+using Dense_host_RowMajorOrder = TNL::Matrices::Dense< int, TNL::Devices::Host, int, true >;
+using Dense_cuda               = TNL::Matrices::Dense< int, TNL::Devices::Cuda, int, false >;
+using Dense_cuda_RowMajorOrder = TNL::Matrices::Dense< int, TNL::Devices::Cuda, int, true >;
+
+
+#ifdef HAVE_GTEST
+#include <gtest/gtest.h>
+
+/*
+ * Sets up the following 10x6 sparse matrix:
+ *
+ *    /  1  2             \
+ *    |           3  4  5 |
+ *    |  6  7  8          |
+ *    |     9 10 11 12 13 |
+ *    | 14 15 16 17 18    |
+ *    | 19 20             |
+ *    | 21                |
+ *    | 22                |
+ *    | 23 24 25 26 27    |
+ *    \                28 /
+ */
+template< typename Matrix >
+void setupUnevenRowSizeMatrix( Matrix& m )
+{
+    const int rows = 10;
+    const int cols = 6;
+    m.setDimensions( rows, cols );
+
+    int value = 1;
+    for( int i = 0; i < cols - 4; i++ )  // 0th row
+        m.setElement( 0, i, value++ );
+
+    for( int i = 3; i < cols; i++ )      // 1st row
+        m.setElement( 1, i, value++ );
+
+    for( int i = 0; i < cols - 3; i++ )  // 2nd row
+        m.setElement( 2, i, value++ );
+
+    for( int i = 1; i < cols; i++ )      // 3rd row
+        m.setElement( 3, i, value++ );
+
+    for( int i = 0; i < cols - 1; i++ )  // 4th row
+        m.setElement( 4, i, value++ );
+
+    for( int i = 0; i < cols - 4; i++ )  // 5th row
+        m.setElement( 5, i, value++ );
+
+    m.setElement( 6, 0, value++ );   // 6th row
+
+    m.setElement( 7, 0, value++ );   // 7th row
+
+    for( int i = 0; i < cols - 1; i++ )  // 8th row
+        m.setElement( 8, i, value++ );
+
+    m.setElement( 9, 5, value++ );   // 9th row
+}
+
+template< typename Matrix >
+void checkUnevenRowSizeMatrix( Matrix& m )
+{
+   ASSERT_EQ( m.getRows(), 10 );
+   ASSERT_EQ( m.getColumns(), 6 );
+
+   EXPECT_EQ( m.getElement( 0, 0 ),  1 );
+   EXPECT_EQ( m.getElement( 0, 1 ),  2 );
+   EXPECT_EQ( m.getElement( 0, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 5 ),  0);
+
+   EXPECT_EQ( m.getElement( 1, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 3 ),  3 );
+   EXPECT_EQ( m.getElement( 1, 4 ),  4 );
+   EXPECT_EQ( m.getElement( 1, 5 ),  5 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ),  6 );
+   EXPECT_EQ( m.getElement( 2, 1 ),  7 );
+   EXPECT_EQ( m.getElement( 2, 2 ),  8 );
+   EXPECT_EQ( m.getElement( 2, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 3, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 1 ),  9 );
+   EXPECT_EQ( m.getElement( 3, 2 ), 10 );
+   EXPECT_EQ( m.getElement( 3, 3 ), 11 );
+   EXPECT_EQ( m.getElement( 3, 4 ), 12 );
+   EXPECT_EQ( m.getElement( 3, 5 ), 13 );
+
+   EXPECT_EQ( m.getElement( 4, 0 ), 14 );
+   EXPECT_EQ( m.getElement( 4, 1 ), 15 );
+   EXPECT_EQ( m.getElement( 4, 2 ), 16 );
+   EXPECT_EQ( m.getElement( 4, 3 ), 17 );
+   EXPECT_EQ( m.getElement( 4, 4 ), 18 );
+   EXPECT_EQ( m.getElement( 4, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 5, 0 ), 19 );
+   EXPECT_EQ( m.getElement( 5, 1 ), 20 );
+   EXPECT_EQ( m.getElement( 5, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 6, 0 ), 21 );
+   EXPECT_EQ( m.getElement( 6, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 7, 0 ), 22 );
+   EXPECT_EQ( m.getElement( 7, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 8, 0 ), 23 );
+   EXPECT_EQ( m.getElement( 8, 1 ), 24 );
+   EXPECT_EQ( m.getElement( 8, 2 ), 25 );
+   EXPECT_EQ( m.getElement( 8, 3 ), 26 );
+   EXPECT_EQ( m.getElement( 8, 4 ), 27 );
+   EXPECT_EQ( m.getElement( 8, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 9, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 9, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 9, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 9, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 9, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 9, 5 ), 28 );
+}
+
+/*
+ * Sets up the following 7x6 sparse matrix:
+ *
+ *    /              2  1 \
+ *    |           5  4  3 |
+ *    |        8  7  6    |
+ *    |    11 10  9       |
+ *    | 14 13 12          |
+ *    | 16 15             |
+ *    \ 17                /
+ */
+template< typename Matrix >
+void setupAntiTriDiagMatrix( Matrix& m )
+{
+    const int rows = 7;
+    const int cols = 6;
+    m.setDimensions( rows, cols );
+
+    int value = 1;
+    for( int i = 0; i < rows; i++ )
+        for( int j = cols - 1; j > 2; j-- )
+            if( j - i + 1 < cols && j - i + 1 >= 0 )
+                m.setElement( i, j - i + 1, value++ );
+}
+
+template< typename Matrix >
+void checkAntiTriDiagMatrix( Matrix& m )
+{
+   ASSERT_EQ( m.getRows(), 7 );
+   ASSERT_EQ( m.getColumns(), 6 );
+
+   EXPECT_EQ( m.getElement( 0, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 4 ),  2 );
+   EXPECT_EQ( m.getElement( 0, 5 ),  1);
+
+   EXPECT_EQ( m.getElement( 1, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 3 ),  5 );
+   EXPECT_EQ( m.getElement( 1, 4 ),  4 );
+   EXPECT_EQ( m.getElement( 1, 5 ),  3 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 2 ),  8 );
+   EXPECT_EQ( m.getElement( 2, 3 ),  7 );
+   EXPECT_EQ( m.getElement( 2, 4 ),  6 );
+   EXPECT_EQ( m.getElement( 2, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 3, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 1 ), 11 );
+   EXPECT_EQ( m.getElement( 3, 2 ), 10 );
+   EXPECT_EQ( m.getElement( 3, 3 ),  9 );
+   EXPECT_EQ( m.getElement( 3, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 4, 0 ), 14 );
+   EXPECT_EQ( m.getElement( 4, 1 ), 13 );
+   EXPECT_EQ( m.getElement( 4, 2 ), 12 );
+   EXPECT_EQ( m.getElement( 4, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 5, 0 ), 16 );
+   EXPECT_EQ( m.getElement( 5, 1 ), 15 );
+   EXPECT_EQ( m.getElement( 5, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 6, 0 ), 17 );
+   EXPECT_EQ( m.getElement( 6, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 5 ),  0 );
+}
+
+/*
+ * Sets up the following 7x6 sparse matrix:
+ *
+ *    / 1  2             \
+ *    | 3  4  5          |
+ *    |    6  7  8       |
+ *    |       9 10 11    |
+ *    |         12 13 14 |
+ *    |            15 16 |
+ *    \               17 /
+ */
+template< typename Matrix >
+void setupTriDiagMatrix( Matrix& m )
+{
+   const int rows = 7;
+   const int cols = 6;
+   m.setDimensions( rows, cols );
+
+   int value = 1;
+   for( int i = 0; i < rows; i++ )
+      for( int j = 0; j < 3; j++ )
+         if( i + j - 1 >= 0 && i + j - 1 < cols )
+            m.setElement( i, i + j - 1, value++ );
+}
+
+template< typename Matrix >
+void checkTriDiagMatrix( Matrix& m )
+{
+   ASSERT_EQ( m.getRows(), 7 );
+   ASSERT_EQ( m.getColumns(), 6 );
+
+   EXPECT_EQ( m.getElement( 0, 0 ),  1 );
+   EXPECT_EQ( m.getElement( 0, 1 ),  2 );
+   EXPECT_EQ( m.getElement( 0, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 1, 0 ),  3 );
+   EXPECT_EQ( m.getElement( 1, 1 ),  4 );
+   EXPECT_EQ( m.getElement( 1, 2 ),  5 );
+   EXPECT_EQ( m.getElement( 1, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 1 ),  6 );
+   EXPECT_EQ( m.getElement( 2, 2 ),  7 );
+   EXPECT_EQ( m.getElement( 2, 3 ),  8 );
+   EXPECT_EQ( m.getElement( 2, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 3, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 2 ),  9 );
+   EXPECT_EQ( m.getElement( 3, 3 ), 10 );
+   EXPECT_EQ( m.getElement( 3, 4 ), 11 );
+   EXPECT_EQ( m.getElement( 3, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 4, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 3 ), 12 );
+   EXPECT_EQ( m.getElement( 4, 4 ), 13 );
+   EXPECT_EQ( m.getElement( 4, 5 ), 14 );
+
+   EXPECT_EQ( m.getElement( 5, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 4 ), 15 );
+   EXPECT_EQ( m.getElement( 5, 5 ), 16 );
+
+   EXPECT_EQ( m.getElement( 6, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 5 ), 17 );
+}
+
+template< typename Matrix1, typename Matrix2 >
+void testCopyAssignment()
+{
+   {
+      SCOPED_TRACE("Tri Diagonal Matrix");
+
+      Matrix1 triDiag1;
+      setupTriDiagMatrix( triDiag1 );
+      checkTriDiagMatrix( triDiag1 );
+
+      Matrix2 triDiag2;
+      triDiag2 = triDiag1;
+      checkTriDiagMatrix( triDiag1 );
+      checkTriDiagMatrix( triDiag2 );
+   }
+   {
+      SCOPED_TRACE("Anti Tri Diagonal Matrix");
+      Matrix1 antiTriDiag1;
+      setupAntiTriDiagMatrix( antiTriDiag1 );
+      checkAntiTriDiagMatrix( antiTriDiag1 );
+
+      Matrix2 antiTriDiag2;
+      antiTriDiag2 = antiTriDiag1;
+      checkAntiTriDiagMatrix( antiTriDiag2 );
+   }
+   {
+      SCOPED_TRACE("Uneven Row Size Matrix");
+      Matrix1 unevenRowSize1;
+      setupUnevenRowSizeMatrix( unevenRowSize1 );
+      checkUnevenRowSizeMatrix( unevenRowSize1 );
+
+      Matrix2 unevenRowSize2;
+      unevenRowSize2 = unevenRowSize1;
+
+      checkUnevenRowSizeMatrix( unevenRowSize2 );
+   }
+}
+
+template< typename Matrix >
+void tridiagonalMatrixAssignment()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   using TridiagonalHost = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Host, IndexType >;
+   using TridiagonalCuda = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Cuda, IndexType >;
+
+   const IndexType rows( 10 ), columns( 10 );
+   TridiagonalHost hostMatrix( rows, columns );
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = TNL::max( 0, i - 1 ); j < TNL::min( columns, i + 2 ); j++ )
+         hostMatrix.setElement( i, j, i + j );
+
+   Matrix matrix;
+   matrix = hostMatrix;
+   using RowCapacitiesType = typename Matrix::RowsCapacitiesType;
+   RowCapacitiesType rowCapacities;
+   matrix.getCompressedRowLengths( rowCapacities );
+   RowCapacitiesType exactRowLengths{ 1, 3, 3, 3, 3, 3, 3, 3, 3, 2 };
+   EXPECT_EQ( rowCapacities, exactRowLengths );
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < columns; j++ )
+      {
+         if( abs( i - j ) > 1 )
+            EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
+         else
+            EXPECT_EQ( matrix.getElement( i, j ), i + j );
+      }
+
+#ifdef HAVE_CUDA
+   TridiagonalCuda cudaMatrix( rows, columns );
+   cudaMatrix = hostMatrix;
+   matrix = cudaMatrix;
+   matrix.getCompressedRowLengths( rowCapacities );
+   EXPECT_EQ( rowCapacities, exactRowLengths );
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < columns; j++ )
+      {
+         if( abs( i - j ) > 1 )
+            EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
+         else
+            EXPECT_EQ( matrix.getElement( i, j ), i + j );
+      }
+#endif
+}
+
+template< typename Matrix >
+void multidiagonalMatrixAssignment()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   using MultidiagonalHost = TNL::Matrices::Multidiagonal< RealType, TNL::Devices::Host, IndexType >;
+   using MultidiagonalCuda = TNL::Matrices::Multidiagonal< RealType, TNL::Devices::Cuda, IndexType >;
+   using DiagonalsShiftsType = typename MultidiagonalHost::DiagonalsShiftsType;
+   DiagonalsShiftsType diagonals{ -4, -2, 0, 1, 3, 5 };
+
+   const IndexType rows( 10 ), columns( 10 );
+   MultidiagonalHost hostMatrix( rows, columns, diagonals );
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < columns; j++ )
+         if( diagonals.containsValue( j - i ) )
+            hostMatrix.setElement( i, j, i + j );
+
+   Matrix matrix;
+   matrix = hostMatrix;
+   using RowCapacitiesType = typename Matrix::RowsCapacitiesType;
+   RowCapacitiesType rowCapacities;
+   matrix.getCompressedRowLengths( rowCapacities );
+   RowCapacitiesType exactRowLengths{ 3, 4, 5, 5, 6, 5, 5, 4, 4, 3 };
+   EXPECT_EQ( rowCapacities, exactRowLengths );
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < columns; j++ )
+      {
+         if( diagonals.containsValue( j - i ) )
+            EXPECT_EQ( matrix.getElement( i, j ), i + j );
+         else
+            EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
+      }
+
+#ifdef HAVE_CUDA
+   MultidiagonalCuda cudaMatrix( rows, columns, diagonals );
+   cudaMatrix = hostMatrix;
+   matrix = cudaMatrix;
+   matrix.getCompressedRowLengths( rowCapacities );
+   EXPECT_EQ( rowCapacities, exactRowLengths );
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < columns; j++ )
+      {
+         if( diagonals.containsValue( j - i ) )
+            EXPECT_EQ( matrix.getElement( i, j ), i + j );
+         else
+            EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
+      }
+#endif
+}
+
+template< typename Matrix >
+void denseMatrixAssignment()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   using DenseHost = TNL::Matrices::Dense< RealType, TNL::Devices::Host, IndexType >;
+   using DenseCuda = TNL::Matrices::Dense< RealType, TNL::Devices::Cuda, IndexType >;
+
+   const IndexType rows( 10 ), columns( 10 );
+   DenseHost hostMatrix( rows, columns );
+   for( IndexType i = 0; i < columns; i++ )
+      for( IndexType j = 0; j <= i; j++ )
+         hostMatrix( i, j ) = i + j;
+
+   Matrix matrix;
+   matrix = hostMatrix;
+   using RowCapacitiesType = typename Matrix::RowsCapacitiesType;
+   RowCapacitiesType rowCapacities;
+   matrix.getCompressedRowLengths( rowCapacities );
+   RowCapacitiesType exactRowLengths{ 0, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
+   EXPECT_EQ( rowCapacities, exactRowLengths );
+   for( IndexType i = 0; i < columns; i++ )
+      for( IndexType j = 0; j < rows; j++ )
+      {
+         if( j > i )
+            EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
+         else
+            EXPECT_EQ( matrix.getElement( i, j ), i + j );
+      }
+
+#ifdef HAVE_CUDA
+   DenseCuda cudaMatrix( rows, columns );
+   cudaMatrix = hostMatrix;
+   matrix = cudaMatrix;
+   matrix.getCompressedRowLengths( rowCapacities );
+   EXPECT_EQ( rowCapacities, exactRowLengths );
+   for( IndexType i = 0; i < columns; i++ )
+      for( IndexType j = 0; j < rows; j++ )
+      {
+         if( j > i )
+            EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
+         else
+            EXPECT_EQ( matrix.getElement( i, j ), i + j );
+      }
+#endif
+}
+
+TEST( DenseMatrixCopyTest, Dense_HostToDense_Host )
+{
+   testCopyAssignment< Dense_host,               Dense_host >();
+   testCopyAssignment< Dense_host_RowMajorOrder, Dense_host >();
+   testCopyAssignment< Dense_host,               Dense_host_RowMajorOrder >();
+   testCopyAssignment< Dense_host_RowMajorOrder, Dense_host_RowMajorOrder >();
+}
+
+#ifdef HAVE_CUDA
+TEST( DenseMatrixCopyTest, Dense_HostToDense_Cuda )
+{
+   testCopyAssignment< Dense_host,               Dense_cuda >();
+   testCopyAssignment< Dense_host_RowMajorOrder, Dense_cuda >();
+   testCopyAssignment< Dense_host,               Dense_cuda_RowMajorOrder >();
+   testCopyAssignment< Dense_host_RowMajorOrder, Dense_cuda_RowMajorOrder >();
+}
+
+TEST( DenseMatrixCopyTest, Dense_CudaToDense_Host )
+{
+   testCopyAssignment< Dense_cuda,               Dense_host >();
+   testCopyAssignment< Dense_cuda_RowMajorOrder, Dense_host >();
+   testCopyAssignment< Dense_cuda,               Dense_host_RowMajorOrder >();
+   testCopyAssignment< Dense_cuda_RowMajorOrder, Dense_host_RowMajorOrder >();
+}
+
+TEST( DenseMatrixCopyTest, Dense_CudaToDense_Cuda )
+{
+   testCopyAssignment< Dense_cuda,               Dense_cuda >();
+   testCopyAssignment< Dense_cuda_RowMajorOrder, Dense_cuda >();
+   testCopyAssignment< Dense_cuda,               Dense_cuda_RowMajorOrder >();
+   testCopyAssignment< Dense_cuda_RowMajorOrder, Dense_cuda_RowMajorOrder >();
+}
+#endif // HAVE_CUDA
+
+
+TEST( DenseMatrixCopyTest, CSR_HostToDense_Host )
+{
+   testCopyAssignment< CSR_host, Dense_host >();
+   testCopyAssignment< CSR_host, Dense_host_RowMajorOrder >();
+}
+
+#ifdef HAVE_CUDA
+TEST( DenseMatrixCopyTest, CSR_HostToDense_cuda )
+{
+   testCopyAssignment< CSR_host, Dense_cuda >();
+   testCopyAssignment< CSR_host, Dense_cuda_RowMajorOrder >();
+}
+
+TEST( DenseMatrixCopyTest, CSR_CudaToDense_host )
+{
+   testCopyAssignment< CSR_cuda, Dense_host >();
+   testCopyAssignment< CSR_cuda, Dense_host_RowMajorOrder >();
+}
+
+TEST( DenseMatrixCopyTest, CSR_CudaToDense_cuda )
+{
+   testCopyAssignment< CSR_cuda, Dense_cuda >();
+   testCopyAssignment< CSR_cuda, Dense_cuda_RowMajorOrder >();
+}
+#endif
+
+////
+// Tridiagonal matrix assignment test
+TEST( DenseMatrixCopyTest, TridiagonalMatrixAssignment_to_Dense_host )
+{
+   tridiagonalMatrixAssignment< Dense_host >();
+   tridiagonalMatrixAssignment< Dense_host_RowMajorOrder >();
+}
+
+#ifdef HAVE_CUDA
+TEST( DenseMatrixCopyTest, TridiagonalMatrixAssignment_to_Dense_cuda )
+{
+   tridiagonalMatrixAssignment< Dense_cuda >();
+   tridiagonalMatrixAssignment< Dense_cuda_RowMajorOrder >();
+}
+#endif // HAVE_CUDA
+
+////
+// Multidiagonal matrix assignment test
+TEST( DenseMatrixCopyTest, MultidiagonalMatrixAssignment_to_Dense_host )
+{
+   multidiagonalMatrixAssignment< Dense_host >();
+   multidiagonalMatrixAssignment< Dense_host_RowMajorOrder >();
+}
+
+#ifdef HAVE_CUDA
+TEST( DenseMatrixCopyTest, MultidiagonalMatrixAssignment_to_Dense_cuda )
+{
+   multidiagonalMatrixAssignment< Dense_cuda >();
+   multidiagonalMatrixAssignment< Dense_cuda_RowMajorOrder >();
+}
+#endif // HAVE_CUDA
+
+////
+// Dense matrix assignment test
+TEST( DenseMatrixCopyTest, DenseMatrixAssignment_to_Dense_host )
+{
+   denseMatrixAssignment< Dense_host >();
+   denseMatrixAssignment< Dense_host_RowMajorOrder >();
+}
+
+#ifdef HAVE_CUDA
+TEST( DenseMatrixCopyTest, DenseMatrixAssignment_to_Dense_cuda )
+{
+   denseMatrixAssignment< Dense_cuda >();
+   denseMatrixAssignment< Dense_cuda_RowMajorOrder >();
+}
+#endif // HAVE_CUDA
+
+#endif //HAVE_GTEST
+
+#include "../main.h"
-- 
GitLab


From bb234945233bf2f929440f62e3d9643c61ad8d1e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Tue, 21 Jan 2020 22:17:46 +0100
Subject: [PATCH 107/179] Implementing dense matrix assignment.

---
 src/TNL/Matrices/Dense.h                     |  27 ++-
 src/TNL/Matrices/Dense.hpp                   | 220 ++++++++++++-------
 src/TNL/Matrices/Multidiagonal.h             |   3 +
 src/TNL/Matrices/Multidiagonal.hpp           |  14 ++
 src/TNL/Matrices/MultidiagonalMatrixView.h   |   3 +
 src/TNL/Matrices/MultidiagonalMatrixView.hpp |  16 +-
 src/TNL/Matrices/Tridiagonal.h               |   3 +
 src/TNL/Matrices/Tridiagonal.hpp             |  16 +-
 src/TNL/Matrices/TridiagonalMatrixView.h     |   3 +
 src/TNL/Matrices/TridiagonalMatrixView.hpp   |  16 +-
 src/UnitTests/Matrices/DenseMatrixCopyTest.h |  52 +++--
 11 files changed, 276 insertions(+), 97 deletions(-)

diff --git a/src/TNL/Matrices/Dense.h b/src/TNL/Matrices/Dense.h
index 2e71316e9..8c109ac1e 100644
--- a/src/TNL/Matrices/Dense.h
+++ b/src/TNL/Matrices/Dense.h
@@ -167,12 +167,31 @@ class Dense : public Matrix< Real, Device, Index >
                                 Vector2& x,
                                 const RealType& omega = 1.0 ) const;
 
-      // copy assignment
+      /**
+       * \brief Assignment operator for exactly the same type of the dense matrix.
+       * 
+       * @param matrix
+       * @return 
+       */
       Dense& operator=( const Dense& matrix );
 
-      // cross-device copy assignment
-      template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAlocator_ >
-      Dense& operator=( const Dense< Real_, Device_, Index_, RowMajorOrder_, RealAlocator_ >& matrix );
+      /**
+       * \brief Assignment operator for other dense matrices.
+       * 
+       * @param matrix
+       * @return 
+       */
+      template< typename RHSReal, typename RHSDevice, typename RHSIndex,
+                 bool RHSRowMajorOrder, typename RHSRealAllocator >
+      Dense& operator=( const Dense< RHSReal, RHSDevice, RHSIndex, RHSRowMajorOrder, RHSRealAllocator >& matrix );
+
+      /**
+       * \brief Assignment operator for other (sparse) types of matrices.
+       * @param matrix
+       * @return 
+       */
+      template< typename RHSMatrix >
+      Dense& operator=( const RHSMatrix& matrix );
 
       template< typename Real_, typename Device_, typename Index_, typename RealAllocator_ >
       bool operator==( const Dense< Real_, Device_, Index_, RowMajorOrder >& matrix ) const;
diff --git a/src/TNL/Matrices/Dense.hpp b/src/TNL/Matrices/Dense.hpp
index 3d9ff102e..e1acfee67 100644
--- a/src/TNL/Matrices/Dense.hpp
+++ b/src/TNL/Matrices/Dense.hpp
@@ -118,7 +118,7 @@ void
 Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
 setLike( const Matrix_& matrix )
 {
-   Matrix< Real, Device, Index, RealAllocator >::setLike( matrix );
+   this->setDimensions( matrix.getRows(), matrix.getColumns() );
 }
 
 template< typename Real,
@@ -896,39 +896,81 @@ operator=( const Dense< Real, Device, Index, RowMajorOrder, RealAllocator >& mat
 {
    setLike( matrix );
    this->values = matrix.values;
-   /*const IndexType bufferRowsCount( 128 );
-   const IndexType columns = this->getColumns();
-   const size_t bufferSize = bufferRowsCount * columns;
-   Containers::Vector< RealType, Device, IndexType, RealAllocatorType > sourceValuesBuffer( bufferSize );
-   Containers::Vector< RealType, DeviceType, IndexType, RealAllocatorType > destinationValuesBuffer( bufferSize );
-   auto sourceValuesBuffer_view = sourceValuesBuffer.getView();
-   auto destinationValuesBuffer_view = destinationValuesBuffer.getView();
-
-   IndexType baseRow( 0 );
-   const IndexType rowsCount = this->getRows();
-   while( baseRow < rowsCount )
+   return *this;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+   template< typename RHSReal, typename RHSDevice, typename RHSIndex,
+             bool RHSRowMajorOrder, typename RHSRealAllocator >
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >&
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
+operator=( const Dense< RHSReal, RHSDevice, RHSIndex, RHSRowMajorOrder, RHSRealAllocator >& matrix )
+{
+   using RHSMatrix = Dense< RHSReal, RHSDevice, RHSIndex, RHSRowMajorOrder, RHSRealAllocator >;
+   using RHSIndexType = typename RHSMatrix::IndexType;
+   using RHSRealType = typename RHSMatrix::RealType;
+   using RHSDeviceType = typename RHSMatrix::DeviceType;
+
+   this->setLike( matrix );
+   if( RowMajorOrder == RHSRowMajorOrder )
    {
-      const IndexType lastRow = min( baseRow + bufferRowsCount, rowsCount );
+      this->values = matrix.values;
+      return *this;
+   }
 
-      ////
-      // Copy matrix elements into buffer
-      auto f1 = [=] __cuda_callable__ ( Index rowIdx, Index columnIdx, Index globalIdx, const Real& value ) mutable {
-         const IndexType bufferIdx = ( rowIdx - baseRow ) * columns + columnIdx;
-         sourceValuesBuffer_view[ bufferIdx ] = value;
-      };
-      matrix.forRows( baseRow, lastRow, f1 );
-      destinationValuesBuffer = sourceValuesBuffer;
-
-      ////
-      // Copy buffer to this matrix
-      auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx, RealType& value ) mutable {
-         const IndexType bufferIdx = ( rowIdx - baseRow ) * columns + columnIdx;
-         value = destinationValuesBuffer_view[ bufferIdx ];
+   auto this_view = this->view;
+   if( std::is_same< DeviceType, RHSDeviceType >::value )
+   {
+      const auto segments_view = this->segments.getView();
+      auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIdx, const RHSRealType& value, bool& compute ) mutable {
+         this_view( rowIdx, columnIdx ) = value;
       };
-      this->forRows( baseRow, lastRow, f2 );
-      baseRow += bufferRowsCount;
+      matrix.forAllRows( f );
    }
-   return *this;*/
+   else
+   {
+      const IndexType maxRowLength = matrix.getColumns();
+      const IndexType bufferRowsCount( 128 );
+      const size_t bufferSize = bufferRowsCount * maxRowLength;
+      Containers::Vector< RHSRealType, RHSDeviceType, RHSIndexType > matrixValuesBuffer( bufferSize );
+      Containers::Vector< RealType, DeviceType, IndexType > thisValuesBuffer( bufferSize );
+      auto matrixValuesBuffer_view = matrixValuesBuffer.getView();
+      auto thisValuesBuffer_view = thisValuesBuffer.getView();
+
+      IndexType baseRow( 0 );
+      const IndexType rowsCount = this->getRows();
+      while( baseRow < rowsCount )
+      {
+         const IndexType lastRow = min( baseRow + bufferRowsCount, rowsCount );
+
+         ////
+         // Copy matrix elements into buffer
+         auto f1 = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIdx, const RHSRealType& value, bool& compute ) mutable {
+            const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + columnIdx;
+            matrixValuesBuffer_view[ bufferIdx ] = value;
+         };
+         matrix.forRows( baseRow, lastRow, f1 );
+
+         ////
+         // Copy the source matrix buffer to this matrix buffer
+         thisValuesBuffer_view = matrixValuesBuffer_view;
+
+         ////
+         // Copy matrix elements from the buffer to the matrix.
+         auto this_view = this->view;
+         auto f2 = [=] __cuda_callable__ ( IndexType columnIdx, IndexType bufferRowIdx ) mutable {
+            IndexType bufferIdx = bufferRowIdx * maxRowLength + columnIdx;
+            this_view( baseRow + bufferRowIdx, columnIdx ) = thisValuesBuffer_view[ bufferIdx ];
+         };
+         Algorithms::ParallelFor2D< DeviceType >::exec( ( IndexType ) 0, ( IndexType ) 0, ( IndexType ) maxRowLength, ( IndexType ) min( bufferRowsCount, this->getRows() - baseRow ), f2 );
+         baseRow += bufferRowsCount;
+      }
+   }
+   return *this;
 }
 
 template< typename Real,
@@ -936,59 +978,85 @@ template< typename Real,
           typename Index,
           bool RowMajorOrder,
           typename RealAllocator >
-   template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ >
+   template< typename RHSMatrix >
 Dense< Real, Device, Index, RowMajorOrder, RealAllocator >&
 Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
-operator=( const Dense< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix )
+operator=( const RHSMatrix& matrix )
 {
-   this->setLike( matrix );
-   if( RowMajorOrder == RowMajorOrder_ )
-      this->values = matrix.getValues();
+   using RHSIndexType = typename RHSMatrix::IndexType;
+   using RHSRealType = typename RHSMatrix::RealType;
+   using RHSDeviceType = typename RHSMatrix::DeviceType;
+   using RHSRealAllocatorType = typename RHSMatrix::RealAllocatorType;
+
+   Containers::Vector< RHSIndexType, RHSDeviceType, RHSIndexType > rowLengths;
+   matrix.getCompressedRowLengths( rowLengths );
+   this->setDimensions( matrix.getRows(), matrix.getColumns() );
+
+   // TODO: use getConstView when it works
+   const auto matrixView = const_cast< RHSMatrix& >( matrix ).getView();
+   auto values_view = this->values.getView();
+   RHSIndexType padding_index = matrix.getPaddingIndex();
+   this->values = 0.0;
+
+   if( std::is_same< DeviceType, RHSDeviceType >::value )
+   {
+      const auto segments_view = this->segments.getView();
+      auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx_, RHSIndexType columnIdx, const RHSRealType& value, bool& compute ) mutable {
+         if( value != 0.0 && columnIdx != padding_index )
+            values_view[ segments_view.getGlobalIndex( rowIdx, columnIdx ) ] = value;
+      };
+      matrix.forAllRows( f );
+   }
    else
    {
-      if( std::is_same< DeviceType, Device_ >::value )
+      const IndexType maxRowLength = max( rowLengths );
+      const IndexType bufferRowsCount( 128 );
+      const size_t bufferSize = bufferRowsCount * maxRowLength;
+      Containers::Vector< RHSRealType, RHSDeviceType, RHSIndexType, RHSRealAllocatorType > matrixValuesBuffer( bufferSize );
+      Containers::Vector< RHSIndexType, RHSDeviceType, RHSIndexType > matrixColumnsBuffer( bufferSize );
+      Containers::Vector< RealType, DeviceType, IndexType, RealAllocatorType > thisValuesBuffer( bufferSize );
+      Containers::Vector< IndexType, DeviceType, IndexType > thisColumnsBuffer( bufferSize );
+      auto matrixValuesBuffer_view = matrixValuesBuffer.getView();
+      auto matrixColumnsBuffer_view = matrixColumnsBuffer.getView();
+      auto thisValuesBuffer_view = thisValuesBuffer.getView();
+      auto thisColumnsBuffer_view = thisColumnsBuffer.getView();
+
+      IndexType baseRow( 0 );
+      const IndexType rowsCount = this->getRows();
+      while( baseRow < rowsCount )
       {
-         auto this_view = this->getView();
-         auto f = [=] __cuda_callable__ ( Index_ rowIdx, Index_ columnIdx, Index_ globalIdx, const Real_& value, bool& compute ) mutable {
-            this_view.getRow( rowIdx ).setElement( columnIdx, value );
+         const IndexType lastRow = min( baseRow + bufferRowsCount, rowsCount );
+         thisColumnsBuffer = padding_index;
+         matrixColumnsBuffer_view = padding_index;
+
+         ////
+         // Copy matrix elements into buffer
+         auto f1 = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIndex, const RHSRealType& value, bool& compute ) mutable {
+            if( columnIndex != padding_index )
+            {
+               const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx;
+               matrixColumnsBuffer_view[ bufferIdx ] = columnIndex;
+               matrixValuesBuffer_view[ bufferIdx ] = value;
+            }
          };
-         matrix.forAllRows( f );
-      }
-      else
-      {
-         const IndexType bufferRowsCount( 128 );
-         const IndexType columns = this->getColumns();
-         const size_t bufferSize = bufferRowsCount * columns;
-         Containers::Vector< RealType, Device_, IndexType, RealAllocator_ > sourceValuesBuffer( bufferSize );
-         Containers::Vector< RealType, DeviceType, IndexType, RealAllocatorType > destinationValuesBuffer( bufferSize );
-         auto sourceValuesBuffer_view = sourceValuesBuffer.getView();
-         auto destinationValuesBuffer_view = destinationValuesBuffer.getView();
-
-         IndexType baseRow( 0 );
-         const IndexType rowsCount = this->getRows();
-         while( baseRow < rowsCount )
-         {
-            const IndexType lastRow = min( baseRow + bufferRowsCount, rowsCount );
-
-            ////
-            // Copy matrix elements into buffer
-            auto f1 = [=] __cuda_callable__ ( Index_ rowIdx, Index_ columnIdx, Index_ globalIdx, const Real_& value, bool& compute ) mutable {
-               const IndexType bufferIdx = ( rowIdx - baseRow ) * columns + columnIdx;
-               sourceValuesBuffer_view[ bufferIdx ] = value;
-            };
-            matrix.forRows( baseRow, lastRow, f1 );
-
-            destinationValuesBuffer = sourceValuesBuffer;
-
-            ////
-            // Copy buffer to this matrix
-            auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx, RealType& value, bool& compute ) mutable {
-               const IndexType bufferIdx = ( rowIdx - baseRow ) * columns + columnIdx;
-               value = destinationValuesBuffer_view[ bufferIdx ];
-            };
-            this->forRows( baseRow, lastRow, f2 );
-            baseRow += bufferRowsCount;
-         }
+         matrix.forRows( baseRow, lastRow, f1 );
+
+         ////
+         // Copy the source matrix buffer to this matrix buffer
+         thisValuesBuffer_view = matrixValuesBuffer_view;
+         thisColumnsBuffer_view = matrixColumnsBuffer_view;
+
+         ////
+         // Copy matrix elements from the buffer to the matrix
+         auto this_view = this->view;
+         auto f2 = [=] __cuda_callable__ ( IndexType bufferColumnIdx, IndexType bufferRowIdx ) mutable {
+            IndexType bufferIdx = bufferRowIdx * maxRowLength + bufferColumnIdx;
+            IndexType columnIdx = thisColumnsBuffer_view[ bufferIdx ];
+            if( columnIdx != padding_index )
+               this_view( baseRow + bufferRowIdx, columnIdx ) = thisValuesBuffer_view[ bufferIdx ];
+         };
+         Algorithms::ParallelFor2D< DeviceType >::exec( ( IndexType ) 0, ( IndexType ) 0, ( IndexType ) maxRowLength, ( IndexType ) min( bufferRowsCount, this->getRows() - baseRow ), f2 );
+         baseRow += bufferRowsCount;
       }
    }
    this->view = this->getView();
diff --git a/src/TNL/Matrices/Multidiagonal.h b/src/TNL/Matrices/Multidiagonal.h
index 9e5f92295..927e52449 100644
--- a/src/TNL/Matrices/Multidiagonal.h
+++ b/src/TNL/Matrices/Multidiagonal.h
@@ -201,6 +201,9 @@ class Multidiagonal : public Matrix< Real, Device, Index, RealAllocator >
 
       IndexerType& getIndexer();
 
+      __cuda_callable__
+      IndexType getPaddingIndex() const;
+
    protected:
 
       __cuda_callable__
diff --git a/src/TNL/Matrices/Multidiagonal.hpp b/src/TNL/Matrices/Multidiagonal.hpp
index 94470d3d1..5d83004f2 100644
--- a/src/TNL/Matrices/Multidiagonal.hpp
+++ b/src/TNL/Matrices/Multidiagonal.hpp
@@ -830,6 +830,20 @@ getElementIndex( const IndexType row, const IndexType column ) const
    return this->indexer.getGlobalIndex( row, localIdx );
 }
 
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+__cuda_callable__
+Index
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+getPaddingIndex() const
+{
+   return this->view.getPaddingIndex();
+}
+
 /*
 template<>
 class MultidiagonalDeviceDependentCode< Devices::Host >
diff --git a/src/TNL/Matrices/MultidiagonalMatrixView.h b/src/TNL/Matrices/MultidiagonalMatrixView.h
index 1e5a9bd28..f623a3ca6 100644
--- a/src/TNL/Matrices/MultidiagonalMatrixView.h
+++ b/src/TNL/Matrices/MultidiagonalMatrixView.h
@@ -163,6 +163,9 @@ class MultidiagonalMatrixView : public MatrixView< Real, Device, Index >
       __cuda_callable__
       IndexerType& getIndexer();
 
+      __cuda_callable__
+      IndexType getPaddingIndex() const;
+
    protected:
 
       __cuda_callable__
diff --git a/src/TNL/Matrices/MultidiagonalMatrixView.hpp b/src/TNL/Matrices/MultidiagonalMatrixView.hpp
index 224368465..f35c6d713 100644
--- a/src/TNL/Matrices/MultidiagonalMatrixView.hpp
+++ b/src/TNL/Matrices/MultidiagonalMatrixView.hpp
@@ -713,11 +713,25 @@ template< typename Real,
           typename Index,
           bool RowMajorOrder >
 __cuda_callable__
-Index MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+Index
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
 getElementIndex( const IndexType row, const IndexType localIdx ) const
 {
    return this->indexer.getGlobalIndex( row, localIdx );
 }
 
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+__cuda_callable__
+Index
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getPaddingIndex() const
+{
+   return -1;
+}
+
+
 } // namespace Matrices
 } // namespace TNL
diff --git a/src/TNL/Matrices/Tridiagonal.h b/src/TNL/Matrices/Tridiagonal.h
index 82549e744..3f8902310 100644
--- a/src/TNL/Matrices/Tridiagonal.h
+++ b/src/TNL/Matrices/Tridiagonal.h
@@ -174,6 +174,9 @@ class Tridiagonal : public Matrix< Real, Device, Index, RealAllocator >
 
       IndexerType& getIndexer();
 
+      __cuda_callable__
+      IndexType getPaddingIndex() const;
+
    protected:
 
       __cuda_callable__
diff --git a/src/TNL/Matrices/Tridiagonal.hpp b/src/TNL/Matrices/Tridiagonal.hpp
index 8f4f4e190..d99715a47 100644
--- a/src/TNL/Matrices/Tridiagonal.hpp
+++ b/src/TNL/Matrices/Tridiagonal.hpp
@@ -678,7 +678,8 @@ template< typename Real,
           bool RowMajorOrder,
           typename RealAllocator >
 __cuda_callable__
-Index Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+Index
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
 getElementIndex( const IndexType row, const IndexType column ) const
 {
    IndexType localIdx = column - row;
@@ -691,6 +692,19 @@ getElementIndex( const IndexType row, const IndexType column ) const
    return this->indexer.getGlobalIndex( row, localIdx );
 }
 
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+__cuda_callable__
+Index
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+getPaddingIndex() const
+{
+   return this->view.getPaddingIndex();
+}
+
 /*
 template<>
 class TridiagonalDeviceDependentCode< Devices::Host >
diff --git a/src/TNL/Matrices/TridiagonalMatrixView.h b/src/TNL/Matrices/TridiagonalMatrixView.h
index 61b005c5a..82b76c73f 100644
--- a/src/TNL/Matrices/TridiagonalMatrixView.h
+++ b/src/TNL/Matrices/TridiagonalMatrixView.h
@@ -151,6 +151,9 @@ class TridiagonalMatrixView : public MatrixView< Real, Device, Index >
       __cuda_callable__
       IndexerType& getIndexer();
 
+      __cuda_callable__
+      IndexType getPaddingIndex() const;
+
    protected:
 
       __cuda_callable__
diff --git a/src/TNL/Matrices/TridiagonalMatrixView.hpp b/src/TNL/Matrices/TridiagonalMatrixView.hpp
index 7fc5fd6b7..6e293ffd0 100644
--- a/src/TNL/Matrices/TridiagonalMatrixView.hpp
+++ b/src/TNL/Matrices/TridiagonalMatrixView.hpp
@@ -675,7 +675,8 @@ template< typename Real,
           typename Index,
           bool RowMajorOrder >
 __cuda_callable__
-Index TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+Index
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
 getElementIndex( const IndexType row, const IndexType column ) const
 {
    IndexType localIdx = column - row;
@@ -688,5 +689,18 @@ getElementIndex( const IndexType row, const IndexType column ) const
    return this->indexer.getGlobalIndex( row, localIdx );
 }
 
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+__cuda_callable__
+Index
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getPaddingIndex() const
+{
+   return -1;
+}
+
+
 } // namespace Matrices
 } // namespace TNL
diff --git a/src/UnitTests/Matrices/DenseMatrixCopyTest.h b/src/UnitTests/Matrices/DenseMatrixCopyTest.h
index ef7809a6b..3ef31f107 100644
--- a/src/UnitTests/Matrices/DenseMatrixCopyTest.h
+++ b/src/UnitTests/Matrices/DenseMatrixCopyTest.h
@@ -59,9 +59,20 @@ using Dense_cuda_RowMajorOrder = TNL::Matrices::Dense< int, TNL::Devices::Cuda,
 template< typename Matrix >
 void setupUnevenRowSizeMatrix( Matrix& m )
 {
-    const int rows = 10;
-    const int cols = 6;
-    m.setDimensions( rows, cols );
+   const int rows = 10;
+   const int cols = 6;
+   m.setDimensions( rows, cols );
+   typename Matrix::CompressedRowLengthsVector rowLengths;
+   rowLengths.setSize( rows );
+   rowLengths.setValue( 5 );
+   rowLengths.setElement( 0, 2 );
+   rowLengths.setElement( 1,  3 );
+   rowLengths.setElement( 2,  3 );
+   rowLengths.setElement( 5,  2 );
+   rowLengths.setElement( 6,  1 );
+   rowLengths.setElement( 7,  1 );
+   rowLengths.setElement( 9,  1 );
+   m.setCompressedRowLengths( rowLengths );
 
     int value = 1;
     for( int i = 0; i < cols - 4; i++ )  // 0th row
@@ -183,15 +194,21 @@ void checkUnevenRowSizeMatrix( Matrix& m )
 template< typename Matrix >
 void setupAntiTriDiagMatrix( Matrix& m )
 {
-    const int rows = 7;
-    const int cols = 6;
-    m.setDimensions( rows, cols );
+   const int rows = 7;
+   const int cols = 6;
+   m.setDimensions( rows, cols );
+   typename Matrix::CompressedRowLengthsVector rowLengths;
+   rowLengths.setSize( rows );
+   rowLengths.setValue( 3 );
+   rowLengths.setElement( 0, 4);
+   rowLengths.setElement( 1,  4 );
+   m.setCompressedRowLengths( rowLengths );
 
-    int value = 1;
-    for( int i = 0; i < rows; i++ )
-        for( int j = cols - 1; j > 2; j-- )
-            if( j - i + 1 < cols && j - i + 1 >= 0 )
-                m.setElement( i, j - i + 1, value++ );
+   int value = 1;
+   for( int i = 0; i < rows; i++ )
+      for( int j = cols - 1; j > 2; j-- )
+         if( j - i + 1 < cols && j - i + 1 >= 0 )
+            m.setElement( i, j - i + 1, value++ );
 }
 
 template< typename Matrix >
@@ -267,6 +284,13 @@ void setupTriDiagMatrix( Matrix& m )
    const int rows = 7;
    const int cols = 6;
    m.setDimensions( rows, cols );
+   typename Matrix::CompressedRowLengthsVector rowLengths;
+   rowLengths.setSize( rows );
+   rowLengths.setValue( 3 );
+   rowLengths.setElement( 0 , 4 );
+   rowLengths.setElement( 1,  4 );
+   m.setCompressedRowLengths( rowLengths );
+
 
    int value = 1;
    for( int i = 0; i < rows; i++ )
@@ -387,7 +411,7 @@ void tridiagonalMatrixAssignment()
 
    Matrix matrix;
    matrix = hostMatrix;
-   using RowCapacitiesType = typename Matrix::RowsCapacitiesType;
+   using RowCapacitiesType = TNL::Containers::Vector< IndexType, DeviceType, IndexType >;
    RowCapacitiesType rowCapacities;
    matrix.getCompressedRowLengths( rowCapacities );
    RowCapacitiesType exactRowLengths{ 1, 3, 3, 3, 3, 3, 3, 3, 3, 2 };
@@ -439,7 +463,7 @@ void multidiagonalMatrixAssignment()
 
    Matrix matrix;
    matrix = hostMatrix;
-   using RowCapacitiesType = typename Matrix::RowsCapacitiesType;
+   using RowCapacitiesType = TNL::Containers::Vector< IndexType, DeviceType, IndexType >;
    RowCapacitiesType rowCapacities;
    matrix.getCompressedRowLengths( rowCapacities );
    RowCapacitiesType exactRowLengths{ 3, 4, 5, 5, 6, 5, 5, 4, 4, 3 };
@@ -488,7 +512,7 @@ void denseMatrixAssignment()
 
    Matrix matrix;
    matrix = hostMatrix;
-   using RowCapacitiesType = typename Matrix::RowsCapacitiesType;
+   using RowCapacitiesType = TNL::Containers::Vector< IndexType, DeviceType, IndexType >;
    RowCapacitiesType rowCapacities;
    matrix.getCompressedRowLengths( rowCapacities );
    RowCapacitiesType exactRowLengths{ 0, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
-- 
GitLab


From 78c47bd249e66866baf369915fb4de94319bedf8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Tue, 21 Jan 2020 22:18:45 +0100
Subject: [PATCH 108/179] Debugging sparse matrix assignment in CUDA.

---
 src/TNL/Containers/Segments/CSRView.hpp       |  1 +
 src/TNL/Containers/Segments/SegmentView.h     | 10 +++++++++-
 src/TNL/Matrices/SparseMatrix.hpp             | 16 +++++++++-------
 src/UnitTests/Matrices/SparseMatrixCopyTest.h |  1 -
 4 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/src/TNL/Containers/Segments/CSRView.hpp b/src/TNL/Containers/Segments/CSRView.hpp
index cc4d16fe6..d8ea9b06e 100644
--- a/src/TNL/Containers/Segments/CSRView.hpp
+++ b/src/TNL/Containers/Segments/CSRView.hpp
@@ -156,6 +156,7 @@ auto
 CSRView< Device, Index >::
 getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType
 {
+   printf( "----> size %d \n", offsets[ segmentIdx + 1 ] );
    return SegmentViewType( offsets[ segmentIdx ], offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ], 1 );
 }
 
diff --git a/src/TNL/Containers/Segments/SegmentView.h b/src/TNL/Containers/Segments/SegmentView.h
index 29f2e7781..edfe31df4 100644
--- a/src/TNL/Containers/Segments/SegmentView.h
+++ b/src/TNL/Containers/Segments/SegmentView.h
@@ -29,8 +29,16 @@ class SegmentView< Index, false >
       SegmentView( const IndexType offset,
                    const IndexType size,
                    const IndexType step )
-      : segmentOffset( offset ), segmentSize( size ), step( step ){};
+      : segmentOffset( offset ), segmentSize( size ), step( step )
+      {
+         printf( "--- size = %d \n", size );
+      };
 
+      __cuda_callable__
+      SegmentView( const SegmentView& view )
+      : segmentOffset( view.segmentOffset ), segmentSize( view.segmentSize ), step( view.step )
+      {
+      };
       __cuda_callable__
       IndexType getSize() const
       {
diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp
index 66813a732..447d8d250 100644
--- a/src/TNL/Matrices/SparseMatrix.hpp
+++ b/src/TNL/Matrices/SparseMatrix.hpp
@@ -673,7 +673,6 @@ operator=( const RHSMatrix& matrix )
    using RHSRealType = typename RHSMatrix::RealType;
    using RHSDeviceType = typename RHSMatrix::DeviceType;
    using RHSRealAllocatorType = typename RHSMatrix::RealAllocatorType;
-   using RHSIndexAllocatorType = typename Allocators::Default< RHSDeviceType >::template Allocator< RHSIndexType >;
 
    Containers::Vector< RHSIndexType, RHSDeviceType, RHSIndexType > rowLengths;
    matrix.getCompressedRowLengths( rowLengths );
@@ -712,9 +711,9 @@ operator=( const RHSMatrix& matrix )
       const IndexType bufferRowsCount( 128 );
       const size_t bufferSize = bufferRowsCount * maxRowLength;
       Containers::Vector< RHSRealType, RHSDeviceType, RHSIndexType, RHSRealAllocatorType > matrixValuesBuffer( bufferSize );
-      Containers::Vector< RHSIndexType, RHSDeviceType, RHSIndexType, RHSIndexAllocatorType > matrixColumnsBuffer( bufferSize );
+      Containers::Vector< RHSIndexType, RHSDeviceType, RHSIndexType > matrixColumnsBuffer( bufferSize );
       Containers::Vector< RealType, DeviceType, IndexType, RealAllocatorType > thisValuesBuffer( bufferSize );
-      Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocatorType > thisColumnsBuffer( bufferSize );
+      Containers::Vector< IndexType, DeviceType, IndexType > thisColumnsBuffer( bufferSize );
       auto matrixValuesBuffer_view = matrixValuesBuffer.getView();
       auto matrixColumnsBuffer_view = matrixColumnsBuffer.getView();
       auto thisValuesBuffer_view = thisValuesBuffer.getView();
@@ -754,12 +753,15 @@ operator=( const RHSMatrix& matrix )
             RealType inValue( 0.0 );
             IndexType bufferIdx, localIdx( rowLocalIndexes_view[ rowIdx ] );
             auto matrixRow = matrix_view.getRow( rowIdx );
-            while( inValue == 0.0 && localIdx < matrixRow.getSize() ) //matrix_columns )
+            IndexType s = matrixRow.getSize();
+            //printf( " row %d size %d \n", rowIdx, s );
+            while( inValue == 0.0 && localIdx < 0 )
             {
                bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx++;
-               inValue = thisValuesBuffer_view[ bufferIdx ];
+               TNL_ASSERT_LT( bufferIdx, bufferSize, "" );
+               //inValue = thisValuesBuffer_view[ bufferIdx ];
             }
-            rowLocalIndexes_view[ rowIdx ] = localIdx;
+            /*rowLocalIndexes_view[ rowIdx ] = localIdx;
             if( inValue == 0.0 )
             {
                columnIndex = paddingIndex;
@@ -769,7 +771,7 @@ operator=( const RHSMatrix& matrix )
             {
                columnIndex = thisColumnsBuffer_view[ bufferIdx ];//column - 1;
                value = inValue;
-            }
+            }*/
          };
          this->forRows( baseRow, lastRow, f2 );
          baseRow += bufferRowsCount;
diff --git a/src/UnitTests/Matrices/SparseMatrixCopyTest.h b/src/UnitTests/Matrices/SparseMatrixCopyTest.h
index 6e1427ad1..46777f6c0 100644
--- a/src/UnitTests/Matrices/SparseMatrixCopyTest.h
+++ b/src/UnitTests/Matrices/SparseMatrixCopyTest.h
@@ -64,7 +64,6 @@ void setupUnevenRowSizeMatrix( Matrix& m )
 {
     const int rows = 10;
     const int cols = 6;
-    m.reset();
     m.setDimensions( rows, cols );
     typename Matrix::CompressedRowLengthsVector rowLengths;
     rowLengths.setSize( rows );
-- 
GitLab


From 5be2891b58b375441e8785b281b4a603e25852bd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Wed, 22 Jan 2020 21:02:00 +0100
Subject: [PATCH 109/179] Fixing segments.

---
 src/TNL/Containers/Segments/CSR.hpp                |  2 +-
 src/TNL/Containers/Segments/CSRView.hpp            |  4 ++--
 src/TNL/Containers/Segments/SegmentView.h          | 10 +++-------
 src/UnitTests/Containers/Segments/SegmentsTest.hpp | 12 ++++++------
 4 files changed, 12 insertions(+), 16 deletions(-)

diff --git a/src/TNL/Containers/Segments/CSR.hpp b/src/TNL/Containers/Segments/CSR.hpp
index 3581748fa..9a05d84f7 100644
--- a/src/TNL/Containers/Segments/CSR.hpp
+++ b/src/TNL/Containers/Segments/CSR.hpp
@@ -207,7 +207,7 @@ void
 CSR< Device, Index, IndexAllocator >::
 forAll( Function& f, Args... args ) const
 {
-   this->forSegments( 0, this->getSize(), f, args... );
+   this->forSegments( 0, this->getSegmentsCount(), f, args... );
 }
 
 template< typename Device,
diff --git a/src/TNL/Containers/Segments/CSRView.hpp b/src/TNL/Containers/Segments/CSRView.hpp
index d8ea9b06e..d6ec55b6a 100644
--- a/src/TNL/Containers/Segments/CSRView.hpp
+++ b/src/TNL/Containers/Segments/CSRView.hpp
@@ -156,7 +156,7 @@ auto
 CSRView< Device, Index >::
 getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType
 {
-   printf( "----> size %d \n", offsets[ segmentIdx + 1 ] );
+   printf( "----> segmentIdx %d offset %d size %d ptr %p \n",  segmentIdx, offsets[ segmentIdx ], offsets.getSize(), offsets.getData() );
    return SegmentViewType( offsets[ segmentIdx ], offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ], 1 );
 }
 
@@ -186,7 +186,7 @@ void
 CSRView< Device, Index >::
 forAll( Function& f, Args... args ) const
 {
-   this->forSegments( 0, this->getSize(), f, args... );
+   this->forSegments( 0, this->getSegmentsCount(), f, args... );
 }
 
 template< typename Device,
diff --git a/src/TNL/Containers/Segments/SegmentView.h b/src/TNL/Containers/Segments/SegmentView.h
index edfe31df4..eeb3f9d24 100644
--- a/src/TNL/Containers/Segments/SegmentView.h
+++ b/src/TNL/Containers/Segments/SegmentView.h
@@ -29,16 +29,12 @@ class SegmentView< Index, false >
       SegmentView( const IndexType offset,
                    const IndexType size,
                    const IndexType step )
-      : segmentOffset( offset ), segmentSize( size ), step( step )
-      {
-         printf( "--- size = %d \n", size );
-      };
+      : segmentOffset( offset ), segmentSize( size ), step( step ){};
 
       __cuda_callable__
       SegmentView( const SegmentView& view )
-      : segmentOffset( view.segmentOffset ), segmentSize( view.segmentSize ), step( view.step )
-      {
-      };
+      : segmentOffset( view.segmentOffset ), segmentSize( view.segmentSize ), step( view.step ){};
+
       __cuda_callable__
       IndexType getSize() const
       {
diff --git a/src/UnitTests/Containers/Segments/SegmentsTest.hpp b/src/UnitTests/Containers/Segments/SegmentsTest.hpp
index 8320fafe5..590b39881 100644
--- a/src/UnitTests/Containers/Segments/SegmentsTest.hpp
+++ b/src/UnitTests/Containers/Segments/SegmentsTest.hpp
@@ -127,17 +127,17 @@ void test_AllReduction_MaximumInSegments()
 
    TNL::Containers::Vector< IndexType, DeviceType, IndexType > v( segments.getStorageSize() );
 
-   IndexType k( 1 );
+   /*IndexType k( 1 );
    for( IndexType i = 0; i < segmentsCount; i++ )
       for( IndexType j = 0; j < segmentSize; j++ )
-         v.setElement( segments.getGlobalIndex( i, j ), k++ );
-   /*auto view = v.getView();
-   auto init = [=] __cuda_callable__ ( const IndexType i, const IndexType j ) mutable -> bool {
-      view[ j ] =  j + 1;
+         v.setElement( segments.getGlobalIndex( i, j ), k++ );*/
+   auto view = v.getView();
+   auto init = [=] __cuda_callable__ ( const IndexType segmentIdx, const IndexType localIdx, const IndexType globalIdx ) mutable -> bool {
+      view[ globalIdx ] =  segmentIdx * 5 + localIdx + 1;
       return true;
    };
    segments.forAll( init );
-   std::cerr << v << std::endl;*/
+   std::cerr << v << std::endl;
 
    TNL::Containers::Vector< IndexType, DeviceType, IndexType >result( segmentsCount );
 
-- 
GitLab


From 64c2d435eeacb92fb39066e0273d7d66082f26cd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Thu, 23 Jan 2020 21:32:56 +0100
Subject: [PATCH 110/179] Fixed sparse matrix assignment.

---
 src/TNL/Containers/Segments/CSR.hpp           |  1 +
 src/TNL/Containers/Segments/CSRView.h         |  2 ++
 src/TNL/Containers/Segments/CSRView.hpp       |  6 ++--
 src/TNL/Containers/Segments/Ellpack.hpp       |  3 +-
 src/TNL/Containers/Segments/EllpackView.h     |  6 ++--
 src/TNL/Containers/Segments/EllpackView.hpp   |  9 +++--
 src/TNL/Containers/Segments/SlicedEllpack.hpp |  1 +
 .../Containers/Segments/SlicedEllpackView.h   |  2 ++
 .../Containers/Segments/SlicedEllpackView.hpp |  3 ++
 src/TNL/Matrices/Dense.hpp                    |  1 -
 src/TNL/Matrices/DenseMatrixView.hpp          |  1 +
 src/TNL/Matrices/MatrixView.hpp               |  1 +
 src/TNL/Matrices/Multidiagonal.h              |  2 ++
 src/TNL/Matrices/Multidiagonal.hpp            |  3 +-
 src/TNL/Matrices/MultidiagonalMatrixView.h    |  2 ++
 src/TNL/Matrices/MultidiagonalMatrixView.hpp  |  1 +
 src/TNL/Matrices/SparseMatrix.h               |  2 +-
 src/TNL/Matrices/SparseMatrix.hpp             | 34 ++++++++++---------
 src/TNL/Matrices/SparseMatrixView.hpp         |  3 +-
 src/TNL/Matrices/Tridiagonal.h                |  2 ++
 src/TNL/Matrices/Tridiagonal.hpp              |  1 +
 .../Containers/Segments/SegmentsTest.hpp      |  5 ---
 src/UnitTests/Matrices/SparseMatrixCopyTest.h |  5 +++
 23 files changed, 62 insertions(+), 34 deletions(-)

diff --git a/src/TNL/Containers/Segments/CSR.hpp b/src/TNL/Containers/Segments/CSR.hpp
index 9a05d84f7..55dcba74c 100644
--- a/src/TNL/Containers/Segments/CSR.hpp
+++ b/src/TNL/Containers/Segments/CSR.hpp
@@ -253,6 +253,7 @@ CSR< Device, Index, IndexAllocator >::
 operator=( const CSR< Device_, Index_, IndexAllocator_ >& source )
 {
    this->offsets = source.offsets;
+   return *this;
 }
 
 template< typename Device,
diff --git a/src/TNL/Containers/Segments/CSRView.h b/src/TNL/Containers/Segments/CSRView.h
index 2ad849f97..759fe8ff7 100644
--- a/src/TNL/Containers/Segments/CSRView.h
+++ b/src/TNL/Containers/Segments/CSRView.h
@@ -52,8 +52,10 @@ class CSRView
 
       static String getSerializationType();
 
+      __cuda_callable__
       ViewType getView();
 
+      __cuda_callable__
       ConstViewType getConstView() const;
 
       /**
diff --git a/src/TNL/Containers/Segments/CSRView.hpp b/src/TNL/Containers/Segments/CSRView.hpp
index d6ec55b6a..043e06e04 100644
--- a/src/TNL/Containers/Segments/CSRView.hpp
+++ b/src/TNL/Containers/Segments/CSRView.hpp
@@ -66,6 +66,7 @@ getSerializationType()
 
 template< typename Device,
           typename Index >
+__cuda_callable__
 typename CSRView< Device, Index >::ViewType
 CSRView< Device, Index >::
 getView()
@@ -75,6 +76,7 @@ getView()
 
 template< typename Device,
           typename Index >
+__cuda_callable__
 typename CSRView< Device, Index >::ConstViewType
 CSRView< Device, Index >::
 getConstView() const
@@ -156,7 +158,6 @@ auto
 CSRView< Device, Index >::
 getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType
 {
-   printf( "----> segmentIdx %d offset %d size %d ptr %p \n",  segmentIdx, offsets[ segmentIdx ], offsets.getSize(), offsets.getData() );
    return SegmentViewType( offsets[ segmentIdx ], offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ], 1 );
 }
 
@@ -167,7 +168,7 @@ void
 CSRView< Device, Index >::
 forSegments( IndexType first, IndexType last, Function& f, Args... args ) const
 {
-   const auto offsetsView = this->offsets.getConstView();
+   const auto offsetsView = this->offsets;
    auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable {
       const IndexType begin = offsetsView[ segmentIdx ];
       const IndexType end = offsetsView[ segmentIdx + 1 ];
@@ -228,6 +229,7 @@ CSRView< Device, Index >::
 operator=( const CSRView& view )
 {
    this->offsets.copy( view.offsets );
+   return *this;
 }
 
 template< typename Device,
diff --git a/src/TNL/Containers/Segments/Ellpack.hpp b/src/TNL/Containers/Segments/Ellpack.hpp
index 8763c2e5d..663a65bc8 100644
--- a/src/TNL/Containers/Segments/Ellpack.hpp
+++ b/src/TNL/Containers/Segments/Ellpack.hpp
@@ -293,7 +293,7 @@ void
 Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >::
 forAll( Function& f, Args... args ) const
 {
-   this->forSegments( 0, this->getSize(), f, args... );
+   this->forSegments( 0, this->getSegmentsCount(), f, args... );
 }
 
 template< typename Device,
@@ -364,6 +364,7 @@ operator=( const Ellpack< Device_, Index_, IndexAllocator_, RowMajorOrder_, Alig
    this->segmentSize = source.segmentSize;
    this->size = source.size;
    this->alignedSize = roundUpDivision( size, this->getAlignment() ) * this->getAlignment();
+   return *this;
 }
 
 template< typename Device,
diff --git a/src/TNL/Containers/Segments/EllpackView.h b/src/TNL/Containers/Segments/EllpackView.h
index f64b04068..3870f0802 100644
--- a/src/TNL/Containers/Segments/EllpackView.h
+++ b/src/TNL/Containers/Segments/EllpackView.h
@@ -37,7 +37,7 @@ class EllpackView
       template< typename Device_, typename Index_ >
       using ViewTemplate = EllpackView< Device_, Index_ >;
       using ViewType = EllpackView;
-      //using ConstViewType = EllpackView< Device, std::add_const_t< Index > >;
+      using ConstViewType = EllpackView< Device, std::add_const_t< Index > >;
       using SegmentViewType = SegmentView< IndexType, RowMajorOrder >;
 
       __cuda_callable__
@@ -54,9 +54,11 @@ class EllpackView
 
       static String getSerializationType();
 
+      __cuda_callable__
       ViewType getView();
 
-      //ConstViewType getConstView() const;
+      __cuda_callable__
+      ConstViewType getConstView() const;
 
       /**
        * \brief Number segments.
diff --git a/src/TNL/Containers/Segments/EllpackView.hpp b/src/TNL/Containers/Segments/EllpackView.hpp
index c0d0b3721..ea2dc0d21 100644
--- a/src/TNL/Containers/Segments/EllpackView.hpp
+++ b/src/TNL/Containers/Segments/EllpackView.hpp
@@ -78,6 +78,7 @@ template< typename Device,
           typename Index,
           bool RowMajorOrder,
           int Alignment >
+__cuda_callable__
 typename EllpackView< Device, Index, RowMajorOrder, Alignment >::ViewType
 EllpackView< Device, Index, RowMajorOrder, Alignment >::
 getView()
@@ -85,16 +86,17 @@ getView()
    return ViewType( segmentSize, size, alignedSize );
 }
 
-/*template< typename Device,
+template< typename Device,
           typename Index,
           bool RowMajorOrder,
           int Alignment >
+__cuda_callable__
 typename EllpackView< Device, Index, RowMajorOrder, Alignment >::ConstViewType
 EllpackView< Device, Index, RowMajorOrder, Alignment >::
 getConstView() const
 {
    return ConstViewType( segmentSize, size, alignedSize );
-}*/
+}
 
 template< typename Device,
           typename Index,
@@ -233,7 +235,7 @@ void
 EllpackView< Device, Index, RowMajorOrder, Alignment >::
 forAll( Function& f, Args... args ) const
 {
-   this->forSegments( 0, this->getSize(), f, args... );
+   this->forSegments( 0, this->getSegmentsCount(), f, args... );
 }
 
 template< typename Device,
@@ -302,6 +304,7 @@ operator=( const EllpackView< Device, Index, RowMajorOrder, Alignment >& view )
    this->segmentSize = view.segmentSize;
    this->size = view.size;
    this->alignedSize = view.alignedSize;
+   return *this;
 }
 
 template< typename Device,
diff --git a/src/TNL/Containers/Segments/SlicedEllpack.hpp b/src/TNL/Containers/Segments/SlicedEllpack.hpp
index 62e2ca7d5..3d3a6d8c3 100644
--- a/src/TNL/Containers/Segments/SlicedEllpack.hpp
+++ b/src/TNL/Containers/Segments/SlicedEllpack.hpp
@@ -408,6 +408,7 @@ operator=( const SlicedEllpack< Device_, Index_, IndexAllocator_, RowMajorOrder_
    this->segmentsCount = source.segmentsCount;
    this->sliceOffsets = source.sliceOffsets;
    this->sliceSegmentSizes = source.sliceSegmentSizes;
+   return *this;
 }
 
 template< typename Device,
diff --git a/src/TNL/Containers/Segments/SlicedEllpackView.h b/src/TNL/Containers/Segments/SlicedEllpackView.h
index c8c73c3f2..2b310a805 100644
--- a/src/TNL/Containers/Segments/SlicedEllpackView.h
+++ b/src/TNL/Containers/Segments/SlicedEllpackView.h
@@ -56,8 +56,10 @@ class SlicedEllpackView
 
       static String getSerializationType();
 
+      __cuda_callable__
       ViewType getView();
 
+      __cuda_callable__
       ConstViewType getConstView() const;
 
       __cuda_callable__
diff --git a/src/TNL/Containers/Segments/SlicedEllpackView.hpp b/src/TNL/Containers/Segments/SlicedEllpackView.hpp
index 98a3d9b81..3e3c8c09c 100644
--- a/src/TNL/Containers/Segments/SlicedEllpackView.hpp
+++ b/src/TNL/Containers/Segments/SlicedEllpackView.hpp
@@ -89,6 +89,7 @@ template< typename Device,
           typename Index,
           bool RowMajorOrder,
           int SliceSize >
+__cuda_callable__
 typename SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >::ViewType
 SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >::
 getView()
@@ -100,6 +101,7 @@ template< typename Device,
           typename Index,
           bool RowMajorOrder,
           int SliceSize >
+__cuda_callable__
 typename SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >::ConstViewType
 SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >::
 getConstView() const
@@ -357,6 +359,7 @@ operator=( const SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >& v
    this->segmentsCount = view.segmentsCount;
    this->sliceOffsets.copy( view.sliceOffsets );
    this->sliceSegmentSizes.copy( view.sliceSegmentSizes );
+   return *this;
 }
 
 template< typename Device,
diff --git a/src/TNL/Matrices/Dense.hpp b/src/TNL/Matrices/Dense.hpp
index e1acfee67..91a98e7f9 100644
--- a/src/TNL/Matrices/Dense.hpp
+++ b/src/TNL/Matrices/Dense.hpp
@@ -925,7 +925,6 @@ operator=( const Dense< RHSReal, RHSDevice, RHSIndex, RHSRowMajorOrder, RHSRealA
    auto this_view = this->view;
    if( std::is_same< DeviceType, RHSDeviceType >::value )
    {
-      const auto segments_view = this->segments.getView();
       auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIdx, const RHSRealType& value, bool& compute ) mutable {
          this_view( rowIdx, columnIdx ) = value;
       };
diff --git a/src/TNL/Matrices/DenseMatrixView.hpp b/src/TNL/Matrices/DenseMatrixView.hpp
index 890606436..a11ff263c 100644
--- a/src/TNL/Matrices/DenseMatrixView.hpp
+++ b/src/TNL/Matrices/DenseMatrixView.hpp
@@ -901,6 +901,7 @@ operator=( const DenseMatrixView& matrix )
 {
    MatrixView< Real, Device, Index >::operator=( matrix );
    this->segments = matrix.segments;
+   return *this;
 }
 
 template< typename Real,
diff --git a/src/TNL/Matrices/MatrixView.hpp b/src/TNL/Matrices/MatrixView.hpp
index 275a22870..360478d05 100644
--- a/src/TNL/Matrices/MatrixView.hpp
+++ b/src/TNL/Matrices/MatrixView.hpp
@@ -131,6 +131,7 @@ operator=( const MatrixView& view )
    rows = view.rows;
    columns = view.columns;
    values.copy( view.values );
+   return *this;
 }
 
 template< typename Real,
diff --git a/src/TNL/Matrices/Multidiagonal.h b/src/TNL/Matrices/Multidiagonal.h
index 927e52449..749ddfae7 100644
--- a/src/TNL/Matrices/Multidiagonal.h
+++ b/src/TNL/Matrices/Multidiagonal.h
@@ -113,8 +113,10 @@ class Multidiagonal : public Matrix< Real, Device, Index, RealAllocator >
       template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ >
       bool operator != ( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix ) const;
 
+      __cuda_callable__
       RowView getRow( const IndexType& rowIdx );
 
+      __cuda_callable__
       const RowView getRow( const IndexType& rowIdx ) const;
 
       void setValue( const RealType& v );
diff --git a/src/TNL/Matrices/Multidiagonal.hpp b/src/TNL/Matrices/Multidiagonal.hpp
index 5d83004f2..659d6d4eb 100644
--- a/src/TNL/Matrices/Multidiagonal.hpp
+++ b/src/TNL/Matrices/Multidiagonal.hpp
@@ -683,9 +683,7 @@ operator=( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllo
          Containers::Vector< RealType, DeviceType, IndexType, RealAllocatorType > thisValuesBuffer( bufferSize );
          Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocatorType > thisColumnsBuffer( bufferSize );
          auto matrixValuesBuffer_view = matrixValuesBuffer.getView();
-         auto matrixColumnsBuffer_view = matrixColumnsBuffer.getView();
          auto thisValuesBuffer_view = thisValuesBuffer.getView();
-         auto thisColumnsBuffer_view = thisColumnsBuffer.getView();
 
          IndexType baseRow( 0 );
          const IndexType rowsCount = this->getRows();
@@ -716,6 +714,7 @@ operator=( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllo
          }
       }
    }
+   return *this;
 }
 
 template< typename Real,
diff --git a/src/TNL/Matrices/MultidiagonalMatrixView.h b/src/TNL/Matrices/MultidiagonalMatrixView.h
index f623a3ca6..97ff94f85 100644
--- a/src/TNL/Matrices/MultidiagonalMatrixView.h
+++ b/src/TNL/Matrices/MultidiagonalMatrixView.h
@@ -87,8 +87,10 @@ class MultidiagonalMatrixView : public MatrixView< Real, Device, Index >
       template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ >
       bool operator != ( const MultidiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix ) const;
 
+      __cuda_callable__
       RowView getRow( const IndexType& rowIdx );
 
+      __cuda_callable__
       const RowView getRow( const IndexType& rowIdx ) const;
 
       void setValue( const RealType& v );
diff --git a/src/TNL/Matrices/MultidiagonalMatrixView.hpp b/src/TNL/Matrices/MultidiagonalMatrixView.hpp
index f35c6d713..33010cebc 100644
--- a/src/TNL/Matrices/MultidiagonalMatrixView.hpp
+++ b/src/TNL/Matrices/MultidiagonalMatrixView.hpp
@@ -338,6 +338,7 @@ operator=( const MultidiagonalMatrixView& view )
    this->diagonalsShifts.copy( view.diagonalsShifts );
    this->hostDiagonalsShifts.copy( view.hostDiagonalsShifts );
    this->indexer = view.indexer;
+   return *this;
 }
 
 template< typename Real,
diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h
index 26d5d2d84..9f91ee7d1 100644
--- a/src/TNL/Matrices/SparseMatrix.h
+++ b/src/TNL/Matrices/SparseMatrix.h
@@ -93,7 +93,7 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
       void getCompressedRowLengths( Vector& rowLengths ) const;
 
       [[deprecated]]
-      virtual IndexType getRowLength( const IndexType row ) const {};
+      virtual IndexType getRowLength( const IndexType row ) const { return 0;};
 
       template< typename Matrix >
       void setLike( const Matrix& matrix );
diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp
index 447d8d250..cf4472922 100644
--- a/src/TNL/Matrices/SparseMatrix.hpp
+++ b/src/TNL/Matrices/SparseMatrix.hpp
@@ -210,7 +210,7 @@ Index
 SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
 getNumberOfNonzeroMatrixElements() const
 {
-   this->view.getNumberOfNonzeroMatrixElements();
+   return this->view.getNumberOfNonzeroMatrixElements();
 }
 
 template< typename Real,
@@ -602,7 +602,6 @@ operator=( const Dense< Real_, Device_, Index_, RowMajorOrder, RealAllocator_ >&
       Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocatorType > thisColumnsBuffer( bufferSize );
       auto matrixValuesBuffer_view = matrixValuesBuffer.getView();
       auto thisValuesBuffer_view = thisValuesBuffer.getView();
-      auto thisColumnsBuffer_view = thisColumnsBuffer.getView();
 
       IndexType baseRow( 0 );
       const IndexType rowsCount = this->getRows();
@@ -689,11 +688,10 @@ operator=( const RHSMatrix& matrix )
    auto rowLocalIndexes_view = rowLocalIndexes.getView();
    columns_view = paddingIndex;
 
-   if( std::is_same< DeviceType, RHSDeviceType >::value )
+   /*if( std::is_same< DeviceType, RHSDeviceType >::value )
    {
       const auto segments_view = this->segments.getView();
       auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx_, RHSIndexType columnIndex, const RHSRealType& value, bool& compute ) mutable {
-         RealType inValue( 0.0 );
          IndexType localIdx( rowLocalIndexes_view[ rowIdx ] );
          if( value != 0.0 && columnIndex != paddingIndex )
          {
@@ -705,7 +703,7 @@ operator=( const RHSMatrix& matrix )
       };
       matrix.forAllRows( f );
    }
-   else
+   else*/
    {
       const IndexType maxRowLength = max( rowLengths );
       const IndexType bufferRowsCount( 128 );
@@ -714,10 +712,13 @@ operator=( const RHSMatrix& matrix )
       Containers::Vector< RHSIndexType, RHSDeviceType, RHSIndexType > matrixColumnsBuffer( bufferSize );
       Containers::Vector< RealType, DeviceType, IndexType, RealAllocatorType > thisValuesBuffer( bufferSize );
       Containers::Vector< IndexType, DeviceType, IndexType > thisColumnsBuffer( bufferSize );
+      Containers::Vector< IndexType, DeviceType, IndexType > thisRowLengths;
+      thisRowLengths = rowLengths;
       auto matrixValuesBuffer_view = matrixValuesBuffer.getView();
       auto matrixColumnsBuffer_view = matrixColumnsBuffer.getView();
       auto thisValuesBuffer_view = thisValuesBuffer.getView();
       auto thisColumnsBuffer_view = thisColumnsBuffer.getView();
+      matrixValuesBuffer_view = 0.0;
 
       IndexType baseRow( 0 );
       const IndexType rowsCount = this->getRows();
@@ -735,6 +736,7 @@ operator=( const RHSMatrix& matrix )
                const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx;
                matrixColumnsBuffer_view[ bufferIdx ] = columnIndex;
                matrixValuesBuffer_view[ bufferIdx ] = value;
+               //std::cerr << " <<<<< rowIdx = " << rowIdx << " localIdx = " << localIdx << " value = " << value << " bufferIdx = " << bufferIdx << std::endl;
             }
          };
          matrix.forRows( baseRow, lastRow, f1 );
@@ -748,20 +750,20 @@ operator=( const RHSMatrix& matrix )
          // Copy matrix elements from the buffer to the matrix and ignoring
          // zero matrix elements
          const IndexType matrix_columns = this->getColumns();
-         auto matrix_view = matrix.getView();
-         auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx_, IndexType& columnIndex, RealType& value, bool& compute ) mutable {
+         const auto thisRowLengths_view = thisRowLengths.getConstView();
+         auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType& columnIndex, RealType& value, bool& compute ) mutable {
             RealType inValue( 0.0 );
-            IndexType bufferIdx, localIdx( rowLocalIndexes_view[ rowIdx ] );
-            auto matrixRow = matrix_view.getRow( rowIdx );
-            IndexType s = matrixRow.getSize();
-            //printf( " row %d size %d \n", rowIdx, s );
-            while( inValue == 0.0 && localIdx < 0 )
+            size_t bufferIdx;
+            IndexType bufferLocalIdx( rowLocalIndexes_view[ rowIdx ] );
+            while( inValue == 0.0 && localIdx < thisRowLengths_view[ rowIdx ] )
             {
-               bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx++;
+               bufferIdx = ( rowIdx - baseRow ) * maxRowLength + bufferLocalIdx++;
                TNL_ASSERT_LT( bufferIdx, bufferSize, "" );
-               //inValue = thisValuesBuffer_view[ bufferIdx ];
+               inValue = thisValuesBuffer_view[ bufferIdx ];
             }
-            /*rowLocalIndexes_view[ rowIdx ] = localIdx;
+            //std::cerr << "rowIdx = " << rowIdx << " localIdx = " << localIdx << " bufferLocalIdx = " << bufferLocalIdx 
+            //          << " inValue = " << inValue << " bufferIdx = " << bufferIdx << std::endl;
+            rowLocalIndexes_view[ rowIdx ] = bufferLocalIdx;
             if( inValue == 0.0 )
             {
                columnIndex = paddingIndex;
@@ -771,7 +773,7 @@ operator=( const RHSMatrix& matrix )
             {
                columnIndex = thisColumnsBuffer_view[ bufferIdx ];//column - 1;
                value = inValue;
-            }*/
+            }
          };
          this->forRows( baseRow, lastRow, f2 );
          baseRow += bufferRowsCount;
diff --git a/src/TNL/Matrices/SparseMatrixView.hpp b/src/TNL/Matrices/SparseMatrixView.hpp
index 965a51b8b..4ac0a29b8 100644
--- a/src/TNL/Matrices/SparseMatrixView.hpp
+++ b/src/TNL/Matrices/SparseMatrixView.hpp
@@ -139,7 +139,7 @@ Index
 SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
 getRowLength( const IndexType row ) const
 {
-
+   return 0;
 }
 
 template< typename Real,
@@ -525,6 +525,7 @@ operator=( const SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView
    MatrixView< Real, Device, Index >::operator=( matrix );
    this->columnIndexes.copy( matrix.columnIndexes );
    this->segments = matrix.segments;
+   return *this;
 }
 
 template< typename Real,
diff --git a/src/TNL/Matrices/Tridiagonal.h b/src/TNL/Matrices/Tridiagonal.h
index 3f8902310..6f0c6a548 100644
--- a/src/TNL/Matrices/Tridiagonal.h
+++ b/src/TNL/Matrices/Tridiagonal.h
@@ -91,8 +91,10 @@ class Tridiagonal : public Matrix< Real, Device, Index, RealAllocator >
       template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ >
       bool operator != ( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix ) const;
 
+      __cuda_callable__
       RowView getRow( const IndexType& rowIdx );
 
+      __cuda_callable__
       const RowView getRow( const IndexType& rowIdx ) const;
 
       void setValue( const RealType& v );
diff --git a/src/TNL/Matrices/Tridiagonal.hpp b/src/TNL/Matrices/Tridiagonal.hpp
index d99715a47..2ccdc4838 100644
--- a/src/TNL/Matrices/Tridiagonal.hpp
+++ b/src/TNL/Matrices/Tridiagonal.hpp
@@ -592,6 +592,7 @@ operator=( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAlloca
          this->forAllRows( f );
       }
    }
+   return *this;
 }
 
 template< typename Real,
diff --git a/src/UnitTests/Containers/Segments/SegmentsTest.hpp b/src/UnitTests/Containers/Segments/SegmentsTest.hpp
index 590b39881..6d4692dbe 100644
--- a/src/UnitTests/Containers/Segments/SegmentsTest.hpp
+++ b/src/UnitTests/Containers/Segments/SegmentsTest.hpp
@@ -127,17 +127,12 @@ void test_AllReduction_MaximumInSegments()
 
    TNL::Containers::Vector< IndexType, DeviceType, IndexType > v( segments.getStorageSize() );
 
-   /*IndexType k( 1 );
-   for( IndexType i = 0; i < segmentsCount; i++ )
-      for( IndexType j = 0; j < segmentSize; j++ )
-         v.setElement( segments.getGlobalIndex( i, j ), k++ );*/
    auto view = v.getView();
    auto init = [=] __cuda_callable__ ( const IndexType segmentIdx, const IndexType localIdx, const IndexType globalIdx ) mutable -> bool {
       view[ globalIdx ] =  segmentIdx * 5 + localIdx + 1;
       return true;
    };
    segments.forAll( init );
-   std::cerr << v << std::endl;
 
    TNL::Containers::Vector< IndexType, DeviceType, IndexType >result( segmentsCount );
 
diff --git a/src/UnitTests/Matrices/SparseMatrixCopyTest.h b/src/UnitTests/Matrices/SparseMatrixCopyTest.h
index 46777f6c0..053f1e9fb 100644
--- a/src/UnitTests/Matrices/SparseMatrixCopyTest.h
+++ b/src/UnitTests/Matrices/SparseMatrixCopyTest.h
@@ -458,6 +458,7 @@ void tridiagonalMatrixAssignment()
    RowCapacitiesType rowCapacities;
    matrix.getCompressedRowLengths( rowCapacities );
    RowCapacitiesType exactRowLengths{ 1, 3, 3, 3, 3, 3, 3, 3, 3, 2 };
+
    EXPECT_EQ( rowCapacities, exactRowLengths );
    for( IndexType i = 0; i < rows; i++ )
       for( IndexType j = 0; j < columns; j++ )
@@ -510,6 +511,10 @@ void multidiagonalMatrixAssignment()
    RowCapacitiesType rowCapacities;
    matrix.getCompressedRowLengths( rowCapacities );
    RowCapacitiesType exactRowLengths{ 3, 4, 5, 5, 6, 5, 5, 4, 4, 3 };
+   /*std::cerr << "hostMatrix " << hostMatrix << std::endl;
+   std::cerr << "matrix " << matrix << std::endl;
+   std::cerr << "rowCapacities " << rowCapacities << std::endl;*/
+
    EXPECT_EQ( rowCapacities, exactRowLengths );
    for( IndexType i = 0; i < rows; i++ )
       for( IndexType j = 0; j < columns; j++ )
-- 
GitLab


From 9ff08661cf33881fd1b7ceda79ddeae9ff43a4ed Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Thu, 23 Jan 2020 22:21:56 +0100
Subject: [PATCH 111/179] Fixing matrix reader - it can throw exceptions.

---
 src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h b/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h
index 9c58d25b0..ea39d80b7 100644
--- a/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h
+++ b/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h
@@ -365,8 +365,7 @@ struct LinearSolversBenchmark
       // load the matrix
       if( ends_with( file_matrix, ".mtx" ) ) {
          Matrices::MatrixReader< MatrixType > reader;
-         if( ! reader.readMtxFile( file_matrix, *matrixPointer ) )
-            return false;
+         reader.readMtxFile( file_matrix, *matrixPointer );
       }
       else {
          matrixPointer->load( file_matrix );
-- 
GitLab


From 488faa318e66ee3667966ca2e72a9a2a1738bea2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Thu, 23 Jan 2020 23:05:26 +0100
Subject: [PATCH 112/179] Deleting unused variables.

---
 src/TNL/Matrices/MultidiagonalMatrixView.hpp | 1 -
 src/TNL/Matrices/TridiagonalMatrixView.hpp   | 1 -
 2 files changed, 2 deletions(-)

diff --git a/src/TNL/Matrices/MultidiagonalMatrixView.hpp b/src/TNL/Matrices/MultidiagonalMatrixView.hpp
index 33010cebc..2bd5392df 100644
--- a/src/TNL/Matrices/MultidiagonalMatrixView.hpp
+++ b/src/TNL/Matrices/MultidiagonalMatrixView.hpp
@@ -485,7 +485,6 @@ vectorProduct( const InVector& inVector, OutVector& outVector ) const
 
    const auto inVectorView = inVector.getConstView();
    auto outVectorView = outVector.getView();
-   const auto valuesView = this->values.getConstView();
    auto fetch = [=] __cuda_callable__ ( const IndexType& row, const IndexType& column, const RealType& value ) -> RealType {
       return value * inVectorView[ column ];
    };
diff --git a/src/TNL/Matrices/TridiagonalMatrixView.hpp b/src/TNL/Matrices/TridiagonalMatrixView.hpp
index 6e293ffd0..99e3e87d4 100644
--- a/src/TNL/Matrices/TridiagonalMatrixView.hpp
+++ b/src/TNL/Matrices/TridiagonalMatrixView.hpp
@@ -449,7 +449,6 @@ vectorProduct( const InVector& inVector, OutVector& outVector ) const
 
    const auto inVectorView = inVector.getConstView();
    auto outVectorView = outVector.getView();
-   const auto valuesView = this->values.getConstView();
    auto fetch = [=] __cuda_callable__ ( const IndexType& row, const IndexType& column, const RealType& value ) -> RealType {
       return value * inVectorView[ column ];
    };
-- 
GitLab


From 20459748647069bf6fb65140eaa3ec4f86fe5f83 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Thu, 23 Jan 2020 23:05:54 +0100
Subject: [PATCH 113/179] Uncommenting sparse matrix assignment code for the
 same devices.

---
 src/TNL/Matrices/SparseMatrix.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp
index cf4472922..d3641e030 100644
--- a/src/TNL/Matrices/SparseMatrix.hpp
+++ b/src/TNL/Matrices/SparseMatrix.hpp
@@ -688,7 +688,7 @@ operator=( const RHSMatrix& matrix )
    auto rowLocalIndexes_view = rowLocalIndexes.getView();
    columns_view = paddingIndex;
 
-   /*if( std::is_same< DeviceType, RHSDeviceType >::value )
+   if( std::is_same< DeviceType, RHSDeviceType >::value )
    {
       const auto segments_view = this->segments.getView();
       auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx_, RHSIndexType columnIndex, const RHSRealType& value, bool& compute ) mutable {
@@ -703,7 +703,7 @@ operator=( const RHSMatrix& matrix )
       };
       matrix.forAllRows( f );
    }
-   else*/
+   else
    {
       const IndexType maxRowLength = max( rowLengths );
       const IndexType bufferRowsCount( 128 );
-- 
GitLab


From 178431c56823ef1dc517db51b4da6f402e0346e5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Sun, 26 Jan 2020 18:06:13 +0100
Subject: [PATCH 114/179] Fix of Array documentation.

---
 src/TNL/Containers/ArrayView.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/TNL/Containers/ArrayView.h b/src/TNL/Containers/ArrayView.h
index b4e063b7e..05e3e9791 100644
--- a/src/TNL/Containers/ArrayView.h
+++ b/src/TNL/Containers/ArrayView.h
@@ -238,7 +238,7 @@ public:
    ArrayView& operator=( const T& array );
 
    /**
-    * \brief Shallow copy of the array view
+    * \brief Makes shallow copy of the array view.
     * 
     * \param view Reference to the source array view.
     * \return Reference to this array view.
-- 
GitLab


From e5020801922e854f2901cbdbfb6235b74fcf1e53 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Sun, 26 Jan 2020 18:06:22 +0100
Subject: [PATCH 115/179] Added check against cross device memory acces to
 Array and ArrayView.

---
 src/TNL/Algorithms/CudaScanKernel.h             |  2 +-
 src/TNL/Algorithms/Scan.hpp                     | 10 +++++-----
 src/TNL/Containers/Array.h                      | 10 ++++++++--
 src/TNL/Containers/Array.hpp                    | 12 ++++++++++++
 src/TNL/Containers/ArrayView.h                  | 10 ++++++++--
 src/TNL/Containers/ArrayView.hpp                | 12 ++++++++++++
 src/UnitTests/Containers/DistributedArrayTest.h |  2 +-
 7 files changed, 47 insertions(+), 11 deletions(-)

diff --git a/src/TNL/Algorithms/CudaScanKernel.h b/src/TNL/Algorithms/CudaScanKernel.h
index 79a201959..97912b234 100644
--- a/src/TNL/Algorithms/CudaScanKernel.h
+++ b/src/TNL/Algorithms/CudaScanKernel.h
@@ -277,7 +277,7 @@ struct CudaScanKernelLauncher
               elementsInBlock,
               &deviceInput[ gridOffset ],
               &deviceOutput[ gridOffset ],
-              &blockSums[ gridIdx * maxGridSize() ] );
+              &blockSums.getData()[ gridIdx * maxGridSize() ] );
       }
 
       // synchronize the null-stream after all grids
diff --git a/src/TNL/Algorithms/Scan.hpp b/src/TNL/Algorithms/Scan.hpp
index 7b6d31ece..fc1f2f1e5 100644
--- a/src/TNL/Algorithms/Scan.hpp
+++ b/src/TNL/Algorithms/Scan.hpp
@@ -225,8 +225,8 @@ perform( Vector& v,
 
    CudaScanKernelLauncher< Type, RealType, IndexType >::perform(
       end - begin,
-      &v[ begin ],  // input
-      &v[ begin ],  // output
+      &v.getData()[ begin ],  // input
+      &v.getData()[ begin ],  // output
       reduction,
       zero );
 #else
@@ -251,8 +251,8 @@ performFirstPhase( Vector& v,
 
    return CudaScanKernelLauncher< Type, RealType, IndexType >::performFirstPhase(
       end - begin,
-      &v[ begin ],  // input
-      &v[ begin ],  // output
+      &v.getData()[ begin ],  // input
+      &v.getData()[ begin ],  // output
       reduction,
       zero );
 #else
@@ -279,7 +279,7 @@ performSecondPhase( Vector& v,
 
    CudaScanKernelLauncher< Type, RealType, IndexType >::performSecondPhase(
       end - begin,
-      &v[ begin ],  // output
+      &v.getData()[ begin ],  // output
       blockShifts.getData(),
       reduction,
       shift );
diff --git a/src/TNL/Containers/Array.h b/src/TNL/Containers/Array.h
index 117cb32ae..a9705e66f 100644
--- a/src/TNL/Containers/Array.h
+++ b/src/TNL/Containers/Array.h
@@ -446,7 +446,10 @@ class Array
        * to the memory space where the array was allocated. For example, if the
        * array was allocated in the host memory, it can be called only from
        * host, and if the array was allocated in the device memory, it can be
-       * called only from device kernels.
+       * called only from device kernels. If NDEBUG is not defined, assertions
+       * inside this methods performs runtime checks for cross-device memory
+       * accesses which lead to segmentation fault. If you need to do just a 
+       * pointer arithmetics use \e getData instead.
        *
        * \param i The index of the element to be accessed.
        * \return Reference to the \e i-th element.
@@ -460,7 +463,10 @@ class Array
        * to the memory space where the array was allocated. For example, if the
        * array was allocated in the host memory, it can be called only from
        * host, and if the array was allocated in the device memory, it can be
-       * called only from device kernels.
+       * called only from device kernels. If NDEBUG is not defined, assertions
+       * inside this methods performs runtime checks for cross-device memory
+       * accesses which lead to segmentation fault. If you need to do just a 
+       * pointer arithmetics use \e getData instead.
        *
        * \param i The index of the element to be accessed.
        * \return Constant reference to the \e i-th element.
diff --git a/src/TNL/Containers/Array.hpp b/src/TNL/Containers/Array.hpp
index 24e3f8b43..2a60986f5 100644
--- a/src/TNL/Containers/Array.hpp
+++ b/src/TNL/Containers/Array.hpp
@@ -509,6 +509,12 @@ Value&
 Array< Value, Device, Index, Allocator >::
 operator[]( const Index& i )
 {
+#ifdef __CUDA_ARCH__
+   TNL_ASSERT( ( std::is_same< Device, Devices::Cuda >::value ), "Attempt to access data not allocated on CUDA device from CUDA device." );
+#else
+   TNL_ASSERT( ( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Sequential >::value ),
+               "Attempt to access data not allocated on the host from the host." );
+#endif
    TNL_ASSERT_GE( i, (Index) 0, "Element index must be non-negative." );
    TNL_ASSERT_LT( i, this->getSize(), "Element index is out of bounds." );
    return this->data[ i ];
@@ -523,6 +529,12 @@ const Value&
 Array< Value, Device, Index, Allocator >::
 operator[]( const Index& i ) const
 {
+#ifdef __CUDA_ARCH__
+   TNL_ASSERT( ( std::is_same< Device, Devices::Cuda >::value ), "Attempt to access data not allocated on CUDA device from CUDA device." );
+#else
+   TNL_ASSERT( ( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Sequential >::value ),
+               "Attempt to access data not allocated on the host from the host." );
+#endif
    TNL_ASSERT_GE( i, (Index) 0, "Element index must be non-negative." );
    TNL_ASSERT_LT( i, this->getSize(), "Element index is out of bounds." );
    return this->data[ i ];
diff --git a/src/TNL/Containers/ArrayView.h b/src/TNL/Containers/ArrayView.h
index 05e3e9791..af54aef8a 100644
--- a/src/TNL/Containers/ArrayView.h
+++ b/src/TNL/Containers/ArrayView.h
@@ -347,7 +347,10 @@ public:
     * to the memory space where the data was allocated. For example, if the
     * data was allocated in the host memory, it can be called only from
     * host, and if the data was allocated in the device memory, it can be
-    * called only from device kernels.
+    * called only from device kernels. If NDEBUG is not defined, assertions
+    * inside this methods performs runtime checks for cross-device memory
+    * accesses which lead to segmentation fault. If you need to do just a 
+    * pointer arithmetics use \e getData instead.
     *
     * \param i The index of the element to be accessed.
     * \return Reference to the \e i-th element.
@@ -362,7 +365,10 @@ public:
     * to the memory space where the data was allocated. For example, if the
     * data was allocated in the host memory, it can be called only from
     * host, and if the data was allocated in the device memory, it can be
-    * called only from device kernels.
+    * called only from device kernels. If NDEBUG is not defined, assertions
+    * inside this methods performs runtime checks for cross-device memory
+    * accesses which lead to segmentation fault. If you need to do just a 
+    * pointer arithmetics use \e getData instead.
     *
     * \param i The index of the element to be accessed.
     * \return Constant reference to the \e i-th element.
diff --git a/src/TNL/Containers/ArrayView.hpp b/src/TNL/Containers/ArrayView.hpp
index 4ef8ac3f6..81e143ac2 100644
--- a/src/TNL/Containers/ArrayView.hpp
+++ b/src/TNL/Containers/ArrayView.hpp
@@ -252,6 +252,12 @@ __cuda_callable__
 Value& ArrayView< Value, Device, Index >::
 operator[]( Index i )
 {
+#ifdef __CUDA_ARCH__
+   TNL_ASSERT( ( std::is_same< Device, Devices::Cuda >::value ), "Attempt to access data not allocated on CUDA device from CUDA device." );
+#else
+   TNL_ASSERT( ( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Sequential >::value ),
+               "Attempt to access data not allocated on the host from the host." );
+#endif
    TNL_ASSERT_GE( i, 0, "Element index must be non-negative." );
    TNL_ASSERT_LT( i, this->getSize(), "Element index is out of bounds." );
    return data[ i ];
@@ -265,6 +271,12 @@ const
 Value& ArrayView< Value, Device, Index >::
 operator[]( Index i ) const
 {
+#ifdef __CUDA_ARCH__
+   TNL_ASSERT( ( std::is_same< Device, Devices::Cuda >::value ), "Attempt to access data not allocated on CUDA device from CUDA device." );
+#else
+   TNL_ASSERT( ( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Sequential >::value ),
+               "Attempt to access data not allocated on the host from the host." );
+#endif
    TNL_ASSERT_GE( i, 0, "Element index must be non-negative." );
    TNL_ASSERT_LT( i, this->getSize(), "Element index is out of bounds." );
    return data[ i ];
diff --git a/src/UnitTests/Containers/DistributedArrayTest.h b/src/UnitTests/Containers/DistributedArrayTest.h
index f4bd35830..204bc6fe7 100644
--- a/src/UnitTests/Containers/DistributedArrayTest.h
+++ b/src/UnitTests/Containers/DistributedArrayTest.h
@@ -94,7 +94,7 @@ TYPED_TEST( DistributedArrayTest, copyFromGlobal )
    ArrayViewType localArrayView = this->distributedArray.getLocalView();
    auto globalView = globalArray.getConstView();
    const auto localRange = this->distributedArray.getLocalRange();
-   globalView.bind( &globalArray[ localRange.getBegin() ], localRange.getEnd() - localRange.getBegin() );
+   globalView.bind( &globalArray.getData()[ localRange.getBegin() ], localRange.getEnd() - localRange.getBegin() );
    EXPECT_EQ( localArrayView, globalView );
 }
 
-- 
GitLab


From 335a61d58a18fbf8133bdaedc167c3a042e2dd14 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Mon, 27 Jan 2020 18:41:29 +0100
Subject: [PATCH 116/179] Fixed distributed matrix.

---
 src/TNL/Matrices/DistributedSpMV.h        |  2 +-
 src/TNL/Matrices/ThreePartVector.h        | 22 ++++++++++++++++++++++
 src/UnitTests/Matrices/SparseMatrixTest.h |  2 +-
 3 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/src/TNL/Matrices/DistributedSpMV.h b/src/TNL/Matrices/DistributedSpMV.h
index 8460ded4d..a4711dce0 100644
--- a/src/TNL/Matrices/DistributedSpMV.h
+++ b/src/TNL/Matrices/DistributedSpMV.h
@@ -176,7 +176,7 @@ public:
              continue;
          if( commPatternStarts( rank, j ) < commPatternEnds( rank, j ) )
             commRequests.push_back( CommunicatorType::IRecv(
-                     &globalBuffer[ commPatternStarts( rank, j ) ],
+                     globalBuffer.getPointer( commPatternStarts( rank, j ) ),
                      commPatternEnds( rank, j ) - commPatternStarts( rank, j ),
                      j, 0, group ) );
       }
diff --git a/src/TNL/Matrices/ThreePartVector.h b/src/TNL/Matrices/ThreePartVector.h
index f57e3e116..f28f544f5 100644
--- a/src/TNL/Matrices/ThreePartVector.h
+++ b/src/TNL/Matrices/ThreePartVector.h
@@ -75,6 +75,17 @@ public:
          return right[ i - left.getSize() - middle.getSize() ];
    }
 
+   __cuda_callable__
+   const Real* getPointer( Index i ) const
+   {
+      if( i < left.getSize() )
+         return &left.getData()[ i ];
+      else if( i < left.getSize() + middle.getSize() )
+         return &middle.getData()[ i - left.getSize() ];
+      else
+         return &right.getData()[ i - left.getSize() - middle.getSize() ];
+   }
+
    friend std::ostream& operator<<( std::ostream& str, const ThreePartVectorView& v )
    {
       str << "[\n\tleft: " << v.left << ",\n\tmiddle: " << v.middle << ",\n\tright: " << v.right << "\n]";
@@ -143,6 +154,17 @@ public:
          return right[ i - left.getSize() - middle.getSize() ];
    }
 
+   __cuda_callable__
+   const Real* getPointer( Index i ) const
+   {
+      if( i < left.getSize() )
+         return &left.getData()[ i ];
+      else if( i < left.getSize() + middle.getSize() )
+         return &middle.getData()[ i - left.getSize() ];
+      else
+         return &right.getData()[ i - left.getSize() - middle.getSize() ];
+   }
+
    friend std::ostream& operator<<( std::ostream& str, const ThreePartVector& v )
    {
       str << "[\n\tleft: " << v.left << ",\n\tmiddle: " << v.middle << ",\n\tright: " << v.right << "\n]";
diff --git a/src/UnitTests/Matrices/SparseMatrixTest.h b/src/UnitTests/Matrices/SparseMatrixTest.h
index 8b1d57566..b08d66c33 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest.h
+++ b/src/UnitTests/Matrices/SparseMatrixTest.h
@@ -24,7 +24,7 @@ using CSR_cuda_int = TNL::Matrices::CSR< int, TNL::Devices::Cuda, int >;
 
 TEST( SparseMatrixTest, CSR_perforSORIterationTest_Host )
 {
-    test_PerformSORIteration< CSR_host_float >();
+    //test_PerformSORIteration< CSR_host_float >();
 }
 
 #ifdef HAVE_CUDA
-- 
GitLab


From ae650de9553307195fecd2eb1ec35d160b99110b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Mon, 27 Jan 2020 20:56:01 +0100
Subject: [PATCH 117/179] Added sparse matrix values holder.

---
 src/TNL/Matrices/details/ValuesHolder.h | 78 +++++++++++++++++++++++++
 1 file changed, 78 insertions(+)
 create mode 100644 src/TNL/Matrices/details/ValuesHolder.h

diff --git a/src/TNL/Matrices/details/ValuesHolder.h b/src/TNL/Matrices/details/ValuesHolder.h
new file mode 100644
index 000000000..e54ec8026
--- /dev/null
+++ b/src/TNL/Matrices/details/ValuesHolder.h
@@ -0,0 +1,78 @@
+/***************************************************************************
+                          ValuesHolder.h  -  description
+                             -------------------
+    begin                : Jan 27, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+namespace TNL {
+   namespace Matrices {
+      namespace details {
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename RealAllocator >
+class ValuesHolder
+: public Containers::Vector< Real, Device, Index, RealAllocator >
+{};
+
+template< typename Device,
+          typename Index,
+          typename RealAllocator >
+class ValuesHolder< bool, Device, Index, RealAllocator >
+{
+   public:
+
+      using RealType = bool;
+      using DeviceType = Device;
+      using IndexType = Index;
+
+      ValuesHolder()
+      : size( 0 ){};
+
+      ValuesdHolder( const IndexType& size )
+      : size( size ){};
+
+      void setSize( const IndexType& size ) { this->size = size; };
+
+      __cuda_callable__
+      IndexType getSize() const { return this->size; };
+
+      __cuda_callable__
+      bool operator[]( const IndexType& i ) const { return true; };
+
+      
+   protected:
+
+      IndexType size;
+
+};
+
+/**
+ * \brief Serialization of arrays into binary files.
+ */
+template< typename Device, typename Index, typename Allocator >
+File& operator<<( File& file, const ValuesHolder< bool, Device, Index, Allocator >& array ) { return file; };
+
+template< typename Device, typename Index, typename Allocator >
+File& operator<<( File&& file, const ValuesHolder< bool, Device, Index, Allocator >& array ) { return file; };
+
+/**
+ * \brief Deserialization of arrays from binary files.
+ */
+template< typename Device, typename Index, typename Allocator >
+File& operator>>( File& file, ValuesHolder< bool, Device, Index, Allocator >& array ) { return file; };
+
+template< typename Device, typename Index, typename Allocator >
+File& operator>>( File&& file, ValuesHolder< bool, Device, Index, Allocator >& array ) { return file; };
+
+
+      } //namespace details
+   } //namepsace Matrices
+} //namespace TNL
\ No newline at end of file
-- 
GitLab


From 888869bdf2467730c55c4a343738f322f44b9806 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Tue, 28 Jan 2020 21:19:13 +0100
Subject: [PATCH 118/179] Adding matrix values holder.

---
 src/TNL/Matrices/Legacy/Sparse.h        |   2 +-
 src/TNL/Matrices/Matrix.h               |  12 +--
 src/TNL/Matrices/Matrix.hpp             | 108 ++++++++++++++----------
 src/TNL/Matrices/details/ValuesHolder.h |  62 +++++++++++---
 4 files changed, 120 insertions(+), 64 deletions(-)

diff --git a/src/TNL/Matrices/Legacy/Sparse.h b/src/TNL/Matrices/Legacy/Sparse.h
index 12c76a6a0..8970b182e 100644
--- a/src/TNL/Matrices/Legacy/Sparse.h
+++ b/src/TNL/Matrices/Legacy/Sparse.h
@@ -26,7 +26,7 @@ class Sparse : public Matrix< Real, Device, Index >
    typedef Real RealType;
    typedef Device DeviceType;
    typedef Index IndexType;
-   typedef typename Matrix< RealType, DeviceType, IndexType >::ValuesVector ValuesVector;
+   typedef typename Matrix< RealType, DeviceType, IndexType >::ValuesHolderType ValuesVector;
    typedef Containers::Vector< IndexType, DeviceType, IndexType > ColumnIndexesVector;
    typedef Matrix< Real, Device, Index > BaseType;
    typedef SparseRow< RealType, IndexType > MatrixRow;
diff --git a/src/TNL/Matrices/Matrix.h b/src/TNL/Matrices/Matrix.h
index a5f2b6b8f..b30a28718 100644
--- a/src/TNL/Matrices/Matrix.h
+++ b/src/TNL/Matrices/Matrix.h
@@ -16,6 +16,7 @@
 #include <TNL/Containers/Vector.h>
 #include <TNL/Containers/VectorView.h>
 #include <TNL/Matrices/MatrixView.h>
+#include <TNL/Matrices/details/ValuesHolder.h>
 
 namespace TNL {
 /**
@@ -26,7 +27,8 @@ namespace Matrices {
 template< typename Real = double,
           typename Device = Devices::Host,
           typename Index = int,
-          typename RealAllocator = typename Allocators::Default< Device >::template Allocator< Real > >
+          typename RealAllocator = typename Allocators::Default< Device >::template Allocator< Real >,
+          typename ValuesHolder = typename details::ValuesHolder< Real, Device, Index, RealAllocator > >
 class Matrix : public Object
 {
 public:
@@ -36,7 +38,7 @@ public:
    using CompressedRowLengthsVector = Containers::Vector< IndexType, DeviceType, IndexType >;
    using CompressedRowLengthsVectorView = Containers::VectorView< IndexType, DeviceType, IndexType >;
    using ConstCompressedRowLengthsVectorView = typename CompressedRowLengthsVectorView::ConstViewType;
-   using ValuesVector = Containers::Vector< RealType, DeviceType, IndexType, RealAllocator >;
+   using ValuesHolderType = ValuesHolder;
    using RealAllocatorType = RealAllocator;
    using ViewType = MatrixView< Real, Device, Index >;
    using ConstViewType = MatrixView< std::add_const_t< Real >, Device, Index >;
@@ -90,9 +92,9 @@ public:
    virtual Real getElement( const IndexType row,
                             const IndexType column ) const = 0;
 
-   const ValuesVector& getValues() const;
+   const ValuesHolderType& getValues() const;
 
-   ValuesVector& getValues();
+   ValuesHolderType& getValues();
 
    // TODO: parallelize and optimize for sparse matrices
    template< typename Matrix >
@@ -131,7 +133,7 @@ public:
 
    IndexType rows, columns, numberOfColors;
 
-   ValuesVector values;
+   ValuesHolderType values;
 };
 
 template< typename Real, typename Device, typename Index >
diff --git a/src/TNL/Matrices/Matrix.hpp b/src/TNL/Matrices/Matrix.hpp
index 2d5906d23..29dedcf2b 100644
--- a/src/TNL/Matrices/Matrix.hpp
+++ b/src/TNL/Matrices/Matrix.hpp
@@ -22,20 +22,22 @@ namespace Matrices {
 template< typename Real,
           typename Device,
           typename Index,
-          typename RealAllocator >
-Matrix< Real, Device, Index, RealAllocator >::
+          typename RealAllocator,
+          typename ValuesHolder >
+Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::
 Matrix( const RealAllocatorType& allocator )
 : rows( 0 ),
   columns( 0 ),
-   values( allocator )
+  values( allocator )
 {
 }
 
 template< typename Real,
           typename Device,
           typename Index,
-          typename RealAllocator >
-Matrix< Real, Device, Index, RealAllocator >::
+          typename RealAllocator,
+          typename ValuesHolder >
+Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::
 Matrix( const IndexType rows_, const IndexType columns_, const RealAllocatorType& allocator )
 : rows( rows_ ),
   columns( columns_ ),
@@ -46,8 +48,9 @@ Matrix( const IndexType rows_, const IndexType columns_, const RealAllocatorType
 template< typename Real,
           typename Device,
           typename Index,
-          typename RealAllocator >
-void Matrix< Real, Device, Index, RealAllocator >::setDimensions( const IndexType rows,
+          typename RealAllocator,
+          typename ValuesHolder >
+void Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::setDimensions( const IndexType rows,
                                                    const IndexType columns )
 {
    TNL_ASSERT( rows > 0 && columns > 0,
@@ -60,7 +63,7 @@ void Matrix< Real, Device, Index, RealAllocator >::setDimensions( const IndexTyp
           typename Device,
           typename Index,
           typename RealAllocator >
-void Matrix< Real, Device, Index, RealAllocator >::getCompressedRowLengths( CompressedRowLengthsVector& rowLengths ) const
+void Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::getCompressedRowLengths( CompressedRowLengthsVector& rowLengths ) const
 {
    rowLengths.setSize( this->getRows() );
    getCompressedRowLengths( rowLengths.getView() );
@@ -69,8 +72,9 @@ void Matrix< Real, Device, Index, RealAllocator >::getCompressedRowLengths( Comp
 template< typename Real,
           typename Device,
           typename Index,
-          typename RealAllocator >
-void Matrix< Real, Device, Index, RealAllocator >::getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const
+          typename RealAllocator,
+          typename ValuesHolder >
+void Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const
 {
    TNL_ASSERT_EQ( rowLengths.getSize(), this->getRows(), "invalid size of the rowLengths vector" );
    for( IndexType row = 0; row < this->getRows(); row++ )
@@ -80,9 +84,10 @@ void Matrix< Real, Device, Index, RealAllocator >::getCompressedRowLengths( Comp
 template< typename Real,
           typename Device,
           typename Index,
-          typename RealAllocator >
+          typename RealAllocator,
+          typename ValuesHolder >
    template< typename Matrix_ >
-void Matrix< Real, Device, Index, RealAllocator >::setLike( const Matrix_& matrix )
+void Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::setLike( const Matrix_& matrix )
 {
    setDimensions( matrix.getRows(), matrix.getColumns() );
 }
@@ -90,8 +95,9 @@ void Matrix< Real, Device, Index, RealAllocator >::setLike( const Matrix_& matri
 template< typename Real,
           typename Device,
           typename Index,
-          typename RealAllocator >
-Index Matrix< Real, Device, Index, RealAllocator >::getAllocatedElementsCount() const
+          typename RealAllocator,
+          typename ValuesHolder >
+Index Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::getAllocatedElementsCount() const
 {
    return this->values.getSize();
 }
@@ -99,8 +105,9 @@ Index Matrix< Real, Device, Index, RealAllocator >::getAllocatedElementsCount()
 template< typename Real,
           typename Device,
           typename Index,
-          typename RealAllocator >
-Index Matrix< Real, Device, Index, RealAllocator >::getNumberOfNonzeroMatrixElements() const
+          typename RealAllocator,
+          typename ValuesHolder >
+Index Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::getNumberOfNonzeroMatrixElements() const
 {
    const auto values_view = this->values.getConstView();
    auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType {
@@ -112,9 +119,10 @@ Index Matrix< Real, Device, Index, RealAllocator >::getNumberOfNonzeroMatrixElem
 template< typename Real,
           typename Device,
           typename Index,
-          typename RealAllocator >
+          typename RealAllocator,
+          typename ValuesHolder >
 __cuda_callable__
-Index Matrix< Real, Device, Index, RealAllocator >::getRows() const
+Index Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::getRows() const
 {
    return this->rows;
 }
@@ -122,9 +130,10 @@ Index Matrix< Real, Device, Index, RealAllocator >::getRows() const
 template< typename Real,
           typename Device,
           typename Index,
-          typename RealAllocator >
+          typename RealAllocator,
+          typename ValuesHolder >
 __cuda_callable__
-Index Matrix< Real, Device, Index, RealAllocator >::getColumns() const
+Index Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::getColumns() const
 {
    return this->columns;
 }
@@ -132,9 +141,10 @@ Index Matrix< Real, Device, Index, RealAllocator >::getColumns() const
 template< typename Real,
           typename Device,
           typename Index,
-          typename RealAllocator >
-const typename Matrix< Real, Device, Index, RealAllocator >::ValuesVector&
-Matrix< Real, Device, Index, RealAllocator >::
+          typename RealAllocator,
+          typename ValuesHolder >
+const typename Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::ValuesHolderType&
+Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::
 getValues() const
 {
    return this->values;
@@ -143,9 +153,10 @@ getValues() const
 template< typename Real,
           typename Device,
           typename Index,
-          typename RealAllocator >
-typename Matrix< Real, Device, Index, RealAllocator >::ValuesVector&
-Matrix< Real, Device, Index, RealAllocator >::
+          typename RealAllocator,
+          typename ValuesHolder >
+typename Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::ValuesHolderType&
+Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::
 getValues()
 {
    return this->values;
@@ -154,8 +165,9 @@ getValues()
 template< typename Real,
           typename Device,
           typename Index,
-          typename RealAllocator >
-void Matrix< Real, Device, Index, RealAllocator >::reset()
+          typename RealAllocator,
+          typename ValuesHolder >
+void Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::reset()
 {
    this->rows = 0;
    this->columns = 0;
@@ -165,9 +177,10 @@ void Matrix< Real, Device, Index, RealAllocator >::reset()
 template< typename Real,
           typename Device,
           typename Index,
-          typename RealAllocator >
+          typename RealAllocator,
+          typename ValuesHolder >
    template< typename MatrixT >
-bool Matrix< Real, Device, Index, RealAllocator >::operator == ( const MatrixT& matrix ) const
+bool Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::operator == ( const MatrixT& matrix ) const
 {
    if( this->getRows() != matrix.getRows() ||
        this->getColumns() != matrix.getColumns() )
@@ -182,9 +195,10 @@ bool Matrix< Real, Device, Index, RealAllocator >::operator == ( const MatrixT&
 template< typename Real,
           typename Device,
           typename Index,
-          typename RealAllocator >
+          typename RealAllocator,
+          typename ValuesHolder >
    template< typename MatrixT >
-bool Matrix< Real, Device, Index, RealAllocator >::operator != ( const MatrixT& matrix ) const
+bool Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::operator != ( const MatrixT& matrix ) const
 {
    return ! operator == ( matrix );
 }
@@ -192,8 +206,9 @@ bool Matrix< Real, Device, Index, RealAllocator >::operator != ( const MatrixT&
 template< typename Real,
           typename Device,
           typename Index,
-          typename RealAllocator >
-void Matrix< Real, Device, Index, RealAllocator >::save( File& file ) const
+          typename RealAllocator,
+          typename ValuesHolder >
+void Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::save( File& file ) const
 {
    Object::save( file );
    file.save( &this->rows );
@@ -204,8 +219,9 @@ void Matrix< Real, Device, Index, RealAllocator >::save( File& file ) const
 template< typename Real,
           typename Device,
           typename Index,
-          typename RealAllocator >
-void Matrix< Real, Device, Index, RealAllocator >::load( File& file )
+          typename RealAllocator,
+          typename ValuesHolder >
+void Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::load( File& file )
 {
    Object::load( file );
    file.load( &this->rows );
@@ -216,18 +232,20 @@ void Matrix< Real, Device, Index, RealAllocator >::load( File& file )
 template< typename Real,
           typename Device,
           typename Index,
-          typename RealAllocator >
-void Matrix< Real, Device, Index, RealAllocator >::print( std::ostream& str ) const
+          typename RealAllocator,
+          typename ValuesHolder >
+void Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::print( std::ostream& str ) const
 {
 }
 
 template< typename Real,
           typename Device,
           typename Index,
-          typename RealAllocator >
+          typename RealAllocator,
+          typename ValuesHolder >
 __cuda_callable__
 const Index&
-Matrix< Real, Device, Index, RealAllocator >::
+Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::
 getNumberOfColors() const
 {
    return this->numberOfColors;
@@ -236,9 +254,10 @@ getNumberOfColors() const
 template< typename Real,
           typename Device,
           typename Index,
-          typename RealAllocator >
+          typename RealAllocator,
+          typename ValuesHolder >
 void
-Matrix< Real, Device, Index, RealAllocator >::
+Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::
 computeColorsVector(Containers::Vector<Index, Device, Index> &colorsVector)
 {
     for( IndexType i = this->getRows() - 1; i >= 0; i-- )
@@ -274,9 +293,10 @@ computeColorsVector(Containers::Vector<Index, Device, Index> &colorsVector)
 template< typename Real,
           typename Device,
           typename Index,
-          typename RealAllocator >
+          typename RealAllocator,
+          typename ValuesHolder >
 void
-Matrix< Real, Device, Index, RealAllocator >::
+Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::
 copyFromHostToCuda( Matrix< Real, Devices::Host, Index >& matrix )
 {
     this->numberOfColors = matrix.getNumberOfColors();
diff --git a/src/TNL/Matrices/details/ValuesHolder.h b/src/TNL/Matrices/details/ValuesHolder.h
index e54ec8026..9b4ab7540 100644
--- a/src/TNL/Matrices/details/ValuesHolder.h
+++ b/src/TNL/Matrices/details/ValuesHolder.h
@@ -23,9 +23,8 @@ class ValuesHolder
 {};
 
 template< typename Device,
-          typename Index,
-          typename RealAllocator >
-class ValuesHolder< bool, Device, Index, RealAllocator >
+          typename Index >
+class BooleanValuesHolder
 {
    public:
 
@@ -33,10 +32,10 @@ class ValuesHolder< bool, Device, Index, RealAllocator >
       using DeviceType = Device;
       using IndexType = Index;
 
-      ValuesHolder()
+      BooleanValuesHolder()
       : size( 0 ){};
 
-      ValuesdHolder( const IndexType& size )
+      BooleanValuesHolder( const IndexType& size )
       : size( size ){};
 
       void setSize( const IndexType& size ) { this->size = size; };
@@ -47,32 +46,67 @@ class ValuesHolder< bool, Device, Index, RealAllocator >
       __cuda_callable__
       bool operator[]( const IndexType& i ) const { return true; };
 
-      
    protected:
 
       IndexType size;
-
 };
 
 /**
- * \brief Serialization of arrays into binary files.
+ * \brief Serialization of values holder into binary files.
  */
 template< typename Device, typename Index, typename Allocator >
-File& operator<<( File& file, const ValuesHolder< bool, Device, Index, Allocator >& array ) { return file; };
+File& operator<<( File& file, const ValuesHolder< bool, Device, Index, Allocator >& holder ) {
+   file << holder.getSize();
+   return file; };
 
 template< typename Device, typename Index, typename Allocator >
-File& operator<<( File&& file, const ValuesHolder< bool, Device, Index, Allocator >& array ) { return file; };
+File& operator<<( File&& file, const ValuesHolder< bool, Device, Index, Allocator >& holder ) {
+   file << holder.getSize();
+   return file; };
 
 /**
- * \brief Deserialization of arrays from binary files.
+ * \brief Deserialization of values holder from binary files.
  */
 template< typename Device, typename Index, typename Allocator >
-File& operator>>( File& file, ValuesHolder< bool, Device, Index, Allocator >& array ) { return file; };
+File& operator>>( File& file, ValuesHolder< bool, Device, Index, Allocator >& holder ) {
+   Index size;
+   file >> size;
+   holder.setSize( size );
+   return file; };
 
 template< typename Device, typename Index, typename Allocator >
-File& operator>>( File&& file, ValuesHolder< bool, Device, Index, Allocator >& array ) { return file; };
+File& operator>>( File&& file, ValuesHolder< bool, Device, Index, Allocator >& holder ) {
+   Index size;
+   file >> size;
+   holder.setSize( size );
+   return file; };
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename RealAllocator >
+struct ValuesHolderSetter
+{
+   using type = ValuesHolder< Real, Device, Index, RealAllocator >;
+};
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename RealAllocator >
+struct SparseMatrixValuesHolderSetter
+{
+   using type = ValuesHolder< Real, Device, Index, RealAllocator >;
+};
 
+template< typename Device,
+          typename Index,
+          typename RealAllocator >
+struct SparseMatrixValuesHolderSetter< bool, Device, Index, RealAllocator >
+{
+   using type = BooleanValuesHolder< Device, Index >;
+};
 
       } //namespace details
    } //namepsace Matrices
-} //namespace TNL
\ No newline at end of file
+} //namespace TNL
-- 
GitLab


From 4a70c74f04beae9c9d204e4acbb1d6b34116959d Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Wed, 29 Jan 2020 17:29:17 +0100
Subject: [PATCH 119/179] Added matrix values holder.

---
 src/TNL/Matrices/Dense.h                |  4 +-
 src/TNL/Matrices/DenseMatrixView.h      |  4 +-
 src/TNL/Matrices/Multidiagonal.h        |  4 +-
 src/TNL/Matrices/SparseMatrix.h         |  4 +-
 src/TNL/Matrices/Tridiagonal.h          |  4 +-
 src/TNL/Matrices/details/ValuesHolder.h | 50 +++++++++++++++++++++++--
 6 files changed, 56 insertions(+), 14 deletions(-)

diff --git a/src/TNL/Matrices/Dense.h b/src/TNL/Matrices/Dense.h
index 8c109ac1e..cee69688b 100644
--- a/src/TNL/Matrices/Dense.h
+++ b/src/TNL/Matrices/Dense.h
@@ -36,8 +36,8 @@ class Dense : public Matrix< Real, Device, Index >
       using IndexType = Index;
       using RealAllocatorType = RealAllocator;
       using BaseType = Matrix< Real, Device, Index, RealAllocator >;
-      using ValuesType = typename BaseType::ValuesVector;
-      using ValuesViewType = typename ValuesType::ViewType;
+      using ValuesHolderType = typename BaseType::ValuesHolderType;
+      using ValuesViewType = typename ValuesHolderType::ViewType;
       using SegmentsType = Containers::Segments::Ellpack< DeviceType, IndexType, typename Allocators::Default< Device >::template Allocator< IndexType >, RowMajorOrder, 1 >;
       using SegmentViewType = typename SegmentsType::SegmentViewType;
       using ViewType = DenseMatrixView< Real, Device, Index, RowMajorOrder >;
diff --git a/src/TNL/Matrices/DenseMatrixView.h b/src/TNL/Matrices/DenseMatrixView.h
index d963dd7c3..c5771f2ee 100644
--- a/src/TNL/Matrices/DenseMatrixView.h
+++ b/src/TNL/Matrices/DenseMatrixView.h
@@ -39,8 +39,8 @@ class DenseMatrixView : public MatrixView< Real, Device, Index >
       using DeviceType = Device;
       using IndexType = Index;
       using BaseType = Matrix< Real, Device, Index >;
-      using ValuesType = typename BaseType::ValuesVector;
-      using ValuesViewType = typename ValuesType::ViewType;
+      using ValuesHolderType = typename BaseType::ValuesHolderType;
+      using ValuesViewType = typename ValuesHolderType::ViewType;
       using SegmentsType = Containers::Segments::Ellpack< DeviceType, IndexType, typename Allocators::Default< Device >::template Allocator< IndexType >, RowMajorOrder, 1 >;
       using SegmentsViewType = typename SegmentsType::ViewType;
       using SegmentViewType = typename SegmentsType::SegmentViewType;
diff --git a/src/TNL/Matrices/Multidiagonal.h b/src/TNL/Matrices/Multidiagonal.h
index 749ddfae7..c93dc7d9c 100644
--- a/src/TNL/Matrices/Multidiagonal.h
+++ b/src/TNL/Matrices/Multidiagonal.h
@@ -35,8 +35,8 @@ class Multidiagonal : public Matrix< Real, Device, Index, RealAllocator >
       using RealAllocatorType = RealAllocator;
       using IndexAllocatorType = IndexAllocator;
       using BaseType = Matrix< Real, Device, Index, RealAllocator >;
-      using ValuesType = typename BaseType::ValuesVector;
-      using ValuesViewType = typename ValuesType::ViewType;
+      using ValuesHolderType = typename BaseType::ValuesHolderType;
+      using ValuesViewType = typename ValuesHolderType::ViewType;
       using IndexerType = details::MultidiagonalMatrixIndexer< IndexType, RowMajorOrder >;
       using DiagonalsShiftsType = Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocatorType >;
       using DiagonalsShiftsView = typename DiagonalsShiftsType::ViewType;
diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h
index 9f91ee7d1..ffcccfade 100644
--- a/src/TNL/Matrices/SparseMatrix.h
+++ b/src/TNL/Matrices/SparseMatrix.h
@@ -46,8 +46,8 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
       using RowsCapacitiesType = Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocatorType >;
       using RowsCapacitiesView = Containers::VectorView< IndexType, DeviceType, IndexType >;
       using ConstRowsCapacitiesView = typename RowsCapacitiesView::ConstViewType;
-      using ValuesVectorType = typename Matrix< Real, Device, Index, RealAllocator >::ValuesVector;
-      using ValuesViewType = typename ValuesVectorType::ViewType;
+      using ValuesHolderType = typename Matrix< Real, Device, Index, RealAllocator >::ValuesHolderType;
+      using ValuesViewType = typename ValuesHolderType::ViewType;
       using ColumnsIndexesVectorType = Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocatorType >;
       using ColumnsIndexesViewType = typename ColumnsIndexesVectorType::ViewType;
       using ViewType = SparseMatrixView< Real, Device, Index, MatrixType, SegmentsViewTemplate >;
diff --git a/src/TNL/Matrices/Tridiagonal.h b/src/TNL/Matrices/Tridiagonal.h
index 6f0c6a548..b65cfb527 100644
--- a/src/TNL/Matrices/Tridiagonal.h
+++ b/src/TNL/Matrices/Tridiagonal.h
@@ -34,8 +34,8 @@ class Tridiagonal : public Matrix< Real, Device, Index, RealAllocator >
       using RealAllocatorType = RealAllocator;
       using BaseType = Matrix< Real, Device, Index, RealAllocator >;
       using IndexerType = details::TridiagonalMatrixIndexer< IndexType, RowMajorOrder >;
-      using ValuesType = typename BaseType::ValuesVector;
-      using ValuesViewType = typename ValuesType::ViewType;
+      using ValuesHolderType = typename BaseType::ValuesHolderType;
+      using ValuesViewType = typename ValuesHolderType::ViewType;
       using ViewType = TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >;
       using ConstViewType = TridiagonalMatrixView< typename std::add_const< Real >::type, Device, Index, RowMajorOrder >;
       using RowView = TridiagonalMatrixRowView< ValuesViewType, IndexerType >;
diff --git a/src/TNL/Matrices/details/ValuesHolder.h b/src/TNL/Matrices/details/ValuesHolder.h
index 9b4ab7540..c76f0d344 100644
--- a/src/TNL/Matrices/details/ValuesHolder.h
+++ b/src/TNL/Matrices/details/ValuesHolder.h
@@ -14,13 +14,54 @@ namespace TNL {
    namespace Matrices {
       namespace details {
 
+
+template< typename Real,
+          typename Device,
+          typename Index >
+struct ValuesHolderView
+: public Containers::VectorView< Real, Device, Index >
+{
+   using RealType = Real;
+   using DeviceType = Device;
+   using IndexType = Index;
+
+   using Containers::VectorView< Real, Device, Index >::VectorView;
+   using Containers::VectorView< Real, Device, Index >::operator=;
+   /*__cuda_callable__
+   ValuesHolderView() = default;
+
+   __cuda_callable__
+   explicit ValuesHolderView( const ValuesHolderView& ) = default;
+
+   __cuda_callable__
+   ValuesHolderView( ValuesHolderView&& ) = default;*/
+
+};
+
 template< typename Real,
           typename Device,
           typename Index,
-          typename RealAllocator >
-class ValuesHolder
-: public Containers::Vector< Real, Device, Index, RealAllocator >
-{};
+          typename Allocator >
+struct ValuesHolder
+: public Containers::Vector< Real, Device, Index, Allocator >
+{
+   using RealType = Real;
+   using DeviceType = Device;
+   using IndexType = Index;
+   using AllocatorType = Allocator;
+   using ViewType = ValuesHolderView< Real, Device, Index >;
+
+   using Containers::Vector< Real, Device, Index, Allocator >::Vector;
+   using Containers::Vector< Real, Device, Index, Allocator >::operator=;
+   /*ValuesHolder() = default;
+
+   explicit ValuesHolder( const ValuesHolder& ) = default;
+
+   explicit ValuesHolder( const ValuesHolder& vector, const AllocatorType& allocator );
+
+   ValuesHolder( ValuesHolder&& ) = default;*/
+
+};
 
 template< typename Device,
           typename Index >
@@ -31,6 +72,7 @@ class BooleanValuesHolder
       using RealType = bool;
       using DeviceType = Device;
       using IndexType = Index;
+      using ViewType = BooleanValuesHolder;
 
       BooleanValuesHolder()
       : size( 0 ){};
-- 
GitLab


From d502b0758edaf8b11c6a6a525e558efcd53918bd Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Fri, 31 Jan 2020 10:46:15 +0100
Subject: [PATCH 120/179] ValuesHolder for matrices was not good idea :-).

---
 src/TNL/Matrices/Dense.h                      |    4 +-
 src/TNL/Matrices/DenseMatrixView.h            |    4 +-
 src/TNL/Matrices/Legacy/Sparse.h              |    2 +-
 src/TNL/Matrices/Matrix.h                     |   12 +-
 src/TNL/Matrices/Matrix.hpp                   |  106 +-
 src/TNL/Matrices/MatrixView.h                 |    4 +-
 src/TNL/Matrices/MatrixView.hpp               |   26 +-
 src/TNL/Matrices/Multidiagonal.hpp            |    5 +-
 src/TNL/Matrices/SparseMatrix.h               |   11 +-
 src/TNL/Matrices/SparseMatrix.hpp             |   17 +-
 src/TNL/Matrices/SparseMatrixRowView.h        |    9 +-
 src/TNL/Matrices/SparseMatrixRowView.hpp      |   40 +-
 src/TNL/Matrices/SparseMatrixView.h           |    8 +-
 src/TNL/Matrices/Tridiagonal.hpp              |    1 -
 src/TNL/Matrices/details/ValuesHolder.h       |  154 --
 .../Matrices/BinarySparseMatrixTest.cpp       |   11 +
 .../Matrices/BinarySparseMatrixTest.cu        |   11 +
 .../Matrices/BinarySparseMatrixTest.h         |  117 ++
 .../Matrices/BinarySparseMatrixTest.hpp       | 1573 +++++++++++++++++
 src/UnitTests/Matrices/CMakeLists.txt         |    8 +
 20 files changed, 1845 insertions(+), 278 deletions(-)
 delete mode 100644 src/TNL/Matrices/details/ValuesHolder.h
 create mode 100644 src/UnitTests/Matrices/BinarySparseMatrixTest.cpp
 create mode 100644 src/UnitTests/Matrices/BinarySparseMatrixTest.cu
 create mode 100644 src/UnitTests/Matrices/BinarySparseMatrixTest.h
 create mode 100644 src/UnitTests/Matrices/BinarySparseMatrixTest.hpp

diff --git a/src/TNL/Matrices/Dense.h b/src/TNL/Matrices/Dense.h
index cee69688b..ada48ee02 100644
--- a/src/TNL/Matrices/Dense.h
+++ b/src/TNL/Matrices/Dense.h
@@ -36,8 +36,8 @@ class Dense : public Matrix< Real, Device, Index >
       using IndexType = Index;
       using RealAllocatorType = RealAllocator;
       using BaseType = Matrix< Real, Device, Index, RealAllocator >;
-      using ValuesHolderType = typename BaseType::ValuesHolderType;
-      using ValuesViewType = typename ValuesHolderType::ViewType;
+      using ValuesVectorType = typename BaseType::ValuesVectorType;
+      using ValuesViewType = typename ValuesVectorType::ViewType;
       using SegmentsType = Containers::Segments::Ellpack< DeviceType, IndexType, typename Allocators::Default< Device >::template Allocator< IndexType >, RowMajorOrder, 1 >;
       using SegmentViewType = typename SegmentsType::SegmentViewType;
       using ViewType = DenseMatrixView< Real, Device, Index, RowMajorOrder >;
diff --git a/src/TNL/Matrices/DenseMatrixView.h b/src/TNL/Matrices/DenseMatrixView.h
index c5771f2ee..95a7c4769 100644
--- a/src/TNL/Matrices/DenseMatrixView.h
+++ b/src/TNL/Matrices/DenseMatrixView.h
@@ -39,8 +39,8 @@ class DenseMatrixView : public MatrixView< Real, Device, Index >
       using DeviceType = Device;
       using IndexType = Index;
       using BaseType = Matrix< Real, Device, Index >;
-      using ValuesHolderType = typename BaseType::ValuesHolderType;
-      using ValuesViewType = typename ValuesHolderType::ViewType;
+      using ValuesVectorType = typename BaseType::ValuesVectorType;
+      using ValuesViewType = typename ValuesVectorType::ViewType;
       using SegmentsType = Containers::Segments::Ellpack< DeviceType, IndexType, typename Allocators::Default< Device >::template Allocator< IndexType >, RowMajorOrder, 1 >;
       using SegmentsViewType = typename SegmentsType::ViewType;
       using SegmentViewType = typename SegmentsType::SegmentViewType;
diff --git a/src/TNL/Matrices/Legacy/Sparse.h b/src/TNL/Matrices/Legacy/Sparse.h
index 8970b182e..4de00cb2e 100644
--- a/src/TNL/Matrices/Legacy/Sparse.h
+++ b/src/TNL/Matrices/Legacy/Sparse.h
@@ -26,7 +26,7 @@ class Sparse : public Matrix< Real, Device, Index >
    typedef Real RealType;
    typedef Device DeviceType;
    typedef Index IndexType;
-   typedef typename Matrix< RealType, DeviceType, IndexType >::ValuesHolderType ValuesVector;
+   typedef typename Matrix< RealType, DeviceType, IndexType >::ValuesVectorType ValuesVector;
    typedef Containers::Vector< IndexType, DeviceType, IndexType > ColumnIndexesVector;
    typedef Matrix< Real, Device, Index > BaseType;
    typedef SparseRow< RealType, IndexType > MatrixRow;
diff --git a/src/TNL/Matrices/Matrix.h b/src/TNL/Matrices/Matrix.h
index b30a28718..cf61f9efa 100644
--- a/src/TNL/Matrices/Matrix.h
+++ b/src/TNL/Matrices/Matrix.h
@@ -16,7 +16,6 @@
 #include <TNL/Containers/Vector.h>
 #include <TNL/Containers/VectorView.h>
 #include <TNL/Matrices/MatrixView.h>
-#include <TNL/Matrices/details/ValuesHolder.h>
 
 namespace TNL {
 /**
@@ -27,8 +26,7 @@ namespace Matrices {
 template< typename Real = double,
           typename Device = Devices::Host,
           typename Index = int,
-          typename RealAllocator = typename Allocators::Default< Device >::template Allocator< Real >,
-          typename ValuesHolder = typename details::ValuesHolder< Real, Device, Index, RealAllocator > >
+          typename RealAllocator = typename Allocators::Default< Device >::template Allocator< Real > >
 class Matrix : public Object
 {
 public:
@@ -38,7 +36,7 @@ public:
    using CompressedRowLengthsVector = Containers::Vector< IndexType, DeviceType, IndexType >;
    using CompressedRowLengthsVectorView = Containers::VectorView< IndexType, DeviceType, IndexType >;
    using ConstCompressedRowLengthsVectorView = typename CompressedRowLengthsVectorView::ConstViewType;
-   using ValuesHolderType = ValuesHolder;
+   using ValuesVectorType = Containers::Vector< Real, Device, Index, RealAllocator >;
    using RealAllocatorType = RealAllocator;
    using ViewType = MatrixView< Real, Device, Index >;
    using ConstViewType = MatrixView< std::add_const_t< Real >, Device, Index >;
@@ -92,9 +90,9 @@ public:
    virtual Real getElement( const IndexType row,
                             const IndexType column ) const = 0;
 
-   const ValuesHolderType& getValues() const;
+   const ValuesVectorType& getValues() const;
 
-   ValuesHolderType& getValues();
+   ValuesVectorType& getValues();
 
    // TODO: parallelize and optimize for sparse matrices
    template< typename Matrix >
@@ -133,7 +131,7 @@ public:
 
    IndexType rows, columns, numberOfColors;
 
-   ValuesHolderType values;
+   ValuesVectorType values;
 };
 
 template< typename Real, typename Device, typename Index >
diff --git a/src/TNL/Matrices/Matrix.hpp b/src/TNL/Matrices/Matrix.hpp
index 29dedcf2b..0236f94f7 100644
--- a/src/TNL/Matrices/Matrix.hpp
+++ b/src/TNL/Matrices/Matrix.hpp
@@ -22,9 +22,8 @@ namespace Matrices {
 template< typename Real,
           typename Device,
           typename Index,
-          typename RealAllocator,
-          typename ValuesHolder >
-Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::
+          typename RealAllocator >
+Matrix< Real, Device, Index, RealAllocator >::
 Matrix( const RealAllocatorType& allocator )
 : rows( 0 ),
   columns( 0 ),
@@ -35,9 +34,8 @@ Matrix( const RealAllocatorType& allocator )
 template< typename Real,
           typename Device,
           typename Index,
-          typename RealAllocator,
-          typename ValuesHolder >
-Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::
+          typename RealAllocator >
+Matrix< Real, Device, Index, RealAllocator >::
 Matrix( const IndexType rows_, const IndexType columns_, const RealAllocatorType& allocator )
 : rows( rows_ ),
   columns( columns_ ),
@@ -48,9 +46,8 @@ Matrix( const IndexType rows_, const IndexType columns_, const RealAllocatorType
 template< typename Real,
           typename Device,
           typename Index,
-          typename RealAllocator,
-          typename ValuesHolder >
-void Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::setDimensions( const IndexType rows,
+          typename RealAllocator >
+void Matrix< Real, Device, Index, RealAllocator >::setDimensions( const IndexType rows,
                                                    const IndexType columns )
 {
    TNL_ASSERT( rows > 0 && columns > 0,
@@ -63,7 +60,7 @@ void Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::setDimensions(
           typename Device,
           typename Index,
           typename RealAllocator >
-void Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::getCompressedRowLengths( CompressedRowLengthsVector& rowLengths ) const
+void Matrix< Real, Device, Index, RealAllocator >::getCompressedRowLengths( CompressedRowLengthsVector& rowLengths ) const
 {
    rowLengths.setSize( this->getRows() );
    getCompressedRowLengths( rowLengths.getView() );
@@ -72,9 +69,8 @@ void Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::getCompressedRo
 template< typename Real,
           typename Device,
           typename Index,
-          typename RealAllocator,
-          typename ValuesHolder >
-void Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const
+          typename RealAllocator >
+void Matrix< Real, Device, Index, RealAllocator >::getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const
 {
    TNL_ASSERT_EQ( rowLengths.getSize(), this->getRows(), "invalid size of the rowLengths vector" );
    for( IndexType row = 0; row < this->getRows(); row++ )
@@ -84,10 +80,9 @@ void Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::getCompressedRo
 template< typename Real,
           typename Device,
           typename Index,
-          typename RealAllocator,
-          typename ValuesHolder >
+          typename RealAllocator >
    template< typename Matrix_ >
-void Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::setLike( const Matrix_& matrix )
+void Matrix< Real, Device, Index, RealAllocator >::setLike( const Matrix_& matrix )
 {
    setDimensions( matrix.getRows(), matrix.getColumns() );
 }
@@ -95,9 +90,8 @@ void Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::setLike( const
 template< typename Real,
           typename Device,
           typename Index,
-          typename RealAllocator,
-          typename ValuesHolder >
-Index Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::getAllocatedElementsCount() const
+          typename RealAllocator >
+Index Matrix< Real, Device, Index, RealAllocator >::getAllocatedElementsCount() const
 {
    return this->values.getSize();
 }
@@ -105,9 +99,8 @@ Index Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::getAllocatedEl
 template< typename Real,
           typename Device,
           typename Index,
-          typename RealAllocator,
-          typename ValuesHolder >
-Index Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::getNumberOfNonzeroMatrixElements() const
+          typename RealAllocator >
+Index Matrix< Real, Device, Index, RealAllocator >::getNumberOfNonzeroMatrixElements() const
 {
    const auto values_view = this->values.getConstView();
    auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType {
@@ -119,10 +112,9 @@ Index Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::getNumberOfNon
 template< typename Real,
           typename Device,
           typename Index,
-          typename RealAllocator,
-          typename ValuesHolder >
+          typename RealAllocator >
 __cuda_callable__
-Index Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::getRows() const
+Index Matrix< Real, Device, Index, RealAllocator >::getRows() const
 {
    return this->rows;
 }
@@ -130,10 +122,9 @@ Index Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::getRows() cons
 template< typename Real,
           typename Device,
           typename Index,
-          typename RealAllocator,
-          typename ValuesHolder >
+          typename RealAllocator >
 __cuda_callable__
-Index Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::getColumns() const
+Index Matrix< Real, Device, Index, RealAllocator >::getColumns() const
 {
    return this->columns;
 }
@@ -141,10 +132,9 @@ Index Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::getColumns() c
 template< typename Real,
           typename Device,
           typename Index,
-          typename RealAllocator,
-          typename ValuesHolder >
-const typename Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::ValuesHolderType&
-Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::
+          typename RealAllocator >
+const typename Matrix< Real, Device, Index, RealAllocator >::ValuesVectorType&
+Matrix< Real, Device, Index, RealAllocator >::
 getValues() const
 {
    return this->values;
@@ -153,10 +143,9 @@ getValues() const
 template< typename Real,
           typename Device,
           typename Index,
-          typename RealAllocator,
-          typename ValuesHolder >
-typename Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::ValuesHolderType&
-Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::
+          typename RealAllocator >
+typename Matrix< Real, Device, Index, RealAllocator >::ValuesVectorType&
+Matrix< Real, Device, Index, RealAllocator >::
 getValues()
 {
    return this->values;
@@ -165,9 +154,8 @@ getValues()
 template< typename Real,
           typename Device,
           typename Index,
-          typename RealAllocator,
-          typename ValuesHolder >
-void Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::reset()
+          typename RealAllocator >
+void Matrix< Real, Device, Index, RealAllocator >::reset()
 {
    this->rows = 0;
    this->columns = 0;
@@ -177,10 +165,9 @@ void Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::reset()
 template< typename Real,
           typename Device,
           typename Index,
-          typename RealAllocator,
-          typename ValuesHolder >
+          typename RealAllocator >
    template< typename MatrixT >
-bool Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::operator == ( const MatrixT& matrix ) const
+bool Matrix< Real, Device, Index, RealAllocator >::operator == ( const MatrixT& matrix ) const
 {
    if( this->getRows() != matrix.getRows() ||
        this->getColumns() != matrix.getColumns() )
@@ -195,10 +182,9 @@ bool Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::operator == ( c
 template< typename Real,
           typename Device,
           typename Index,
-          typename RealAllocator,
-          typename ValuesHolder >
+          typename RealAllocator >
    template< typename MatrixT >
-bool Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::operator != ( const MatrixT& matrix ) const
+bool Matrix< Real, Device, Index, RealAllocator >::operator != ( const MatrixT& matrix ) const
 {
    return ! operator == ( matrix );
 }
@@ -206,9 +192,8 @@ bool Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::operator != ( c
 template< typename Real,
           typename Device,
           typename Index,
-          typename RealAllocator,
-          typename ValuesHolder >
-void Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::save( File& file ) const
+          typename RealAllocator >
+void Matrix< Real, Device, Index, RealAllocator >::save( File& file ) const
 {
    Object::save( file );
    file.save( &this->rows );
@@ -219,9 +204,8 @@ void Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::save( File& fil
 template< typename Real,
           typename Device,
           typename Index,
-          typename RealAllocator,
-          typename ValuesHolder >
-void Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::load( File& file )
+          typename RealAllocator >
+void Matrix< Real, Device, Index, RealAllocator >::load( File& file )
 {
    Object::load( file );
    file.load( &this->rows );
@@ -232,20 +216,18 @@ void Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::load( File& fil
 template< typename Real,
           typename Device,
           typename Index,
-          typename RealAllocator,
-          typename ValuesHolder >
-void Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::print( std::ostream& str ) const
+          typename RealAllocator >
+void Matrix< Real, Device, Index, RealAllocator >::print( std::ostream& str ) const
 {
 }
 
 template< typename Real,
           typename Device,
           typename Index,
-          typename RealAllocator,
-          typename ValuesHolder >
+          typename RealAllocator >
 __cuda_callable__
 const Index&
-Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::
+Matrix< Real, Device, Index, RealAllocator >::
 getNumberOfColors() const
 {
    return this->numberOfColors;
@@ -254,10 +236,9 @@ getNumberOfColors() const
 template< typename Real,
           typename Device,
           typename Index,
-          typename RealAllocator,
-          typename ValuesHolder >
+          typename RealAllocator >
 void
-Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::
+Matrix< Real, Device, Index, RealAllocator >::
 computeColorsVector(Containers::Vector<Index, Device, Index> &colorsVector)
 {
     for( IndexType i = this->getRows() - 1; i >= 0; i-- )
@@ -293,10 +274,9 @@ computeColorsVector(Containers::Vector<Index, Device, Index> &colorsVector)
 template< typename Real,
           typename Device,
           typename Index,
-          typename RealAllocator,
-          typename ValuesHolder >
+          typename RealAllocator >
 void
-Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::
+Matrix< Real, Device, Index, RealAllocator >::
 copyFromHostToCuda( Matrix< Real, Devices::Host, Index >& matrix )
 {
     this->numberOfColors = matrix.getNumberOfColors();
diff --git a/src/TNL/Matrices/MatrixView.h b/src/TNL/Matrices/MatrixView.h
index 2a6429df5..cd1beda9c 100644
--- a/src/TNL/Matrices/MatrixView.h
+++ b/src/TNL/Matrices/MatrixView.h
@@ -95,11 +95,11 @@ public:
     * \brief Shallow copy of the matrix view.
     *
     * @param view
-    * @return 
+    * @return
     */
    __cuda_callable__
    MatrixView& operator=( const MatrixView& view );
-   
+
    // TODO: parallelize and optimize for sparse matrices
    template< typename Matrix >
    bool operator == ( const Matrix& matrix ) const;
diff --git a/src/TNL/Matrices/MatrixView.hpp b/src/TNL/Matrices/MatrixView.hpp
index 360478d05..363fec208 100644
--- a/src/TNL/Matrices/MatrixView.hpp
+++ b/src/TNL/Matrices/MatrixView.hpp
@@ -35,7 +35,7 @@ template< typename Real,
           typename Index >
 __cuda_callable__
 MatrixView< Real, Device, Index >::
-MatrixView( const IndexType rows_, 
+MatrixView( const IndexType rows_,
             const IndexType columns_,
             const ValuesView& values_ )
  : rows( rows_ ), columns( columns_ ), values( values_ )
@@ -45,7 +45,9 @@ MatrixView( const IndexType rows_,
 template< typename Real,
           typename Device,
           typename Index >
-void MatrixView< Real, Device, Index >::getCompressedRowLengths( CompressedRowLengthsVector& rowLengths ) const
+void
+MatrixView< Real, Device, Index >::
+getCompressedRowLengths( CompressedRowLengthsVector& rowLengths ) const
 {
    rowLengths.setSize( this->getRows() );
    getCompressedRowLengths( rowLengths.getView() );
@@ -54,7 +56,9 @@ void MatrixView< Real, Device, Index >::getCompressedRowLengths( CompressedRowLe
 template< typename Real,
           typename Device,
           typename Index >
-void MatrixView< Real, Device, Index >::getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const
+void
+MatrixView< Real, Device, Index >::
+getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const
 {
    TNL_ASSERT_EQ( rowLengths.getSize(), this->getRows(), "invalid size of the rowLengths vector" );
    for( IndexType row = 0; row < this->getRows(); row++ )
@@ -64,7 +68,9 @@ void MatrixView< Real, Device, Index >::getCompressedRowLengths( CompressedRowLe
 template< typename Real,
           typename Device,
           typename Index >
-Index MatrixView< Real, Device, Index >::getAllocatedElementsCount() const
+Index
+MatrixView< Real, Device, Index >::
+getAllocatedElementsCount() const
 {
    return this->values.getSize();
 }
@@ -72,7 +78,9 @@ Index MatrixView< Real, Device, Index >::getAllocatedElementsCount() const
 template< typename Real,
           typename Device,
           typename Index >
-Index MatrixView< Real, Device, Index >::getNumberOfNonzeroMatrixElements() const
+Index
+MatrixView< Real, Device, Index >::
+getNumberOfNonzeroMatrixElements() const
 {
    const auto values_view = this->values.getConstView();
    auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType {
@@ -109,12 +117,12 @@ getValues() const
 {
    return this->values;
 }
-   
+
 template< typename Real,
           typename Device,
           typename Index >
 __cuda_callable__
-typename MatrixView< Real, Device, Index >::ValuesView& 
+typename MatrixView< Real, Device, Index >::ValuesView&
 MatrixView< Real, Device, Index >::
 getValues()
 {
@@ -124,7 +132,7 @@ template< typename Real,
           typename Device,
           typename Index >
 __cuda_callable__
-MatrixView< Real, Device, Index >& 
+MatrixView< Real, Device, Index >&
 MatrixView< Real, Device, Index >::
 operator=( const MatrixView& view )
 {
@@ -202,7 +210,7 @@ getNumberOfColors() const
 template< typename Real,
           typename Device,
           typename Index >
-void 
+void
 MatrixView< Real, Device, Index >::
 computeColorsVector(Containers::Vector<Index, Device, Index> &colorsVector)
 {
diff --git a/src/TNL/Matrices/Multidiagonal.hpp b/src/TNL/Matrices/Multidiagonal.hpp
index 659d6d4eb..e8eb66751 100644
--- a/src/TNL/Matrices/Multidiagonal.hpp
+++ b/src/TNL/Matrices/Multidiagonal.hpp
@@ -277,7 +277,6 @@ Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator
 reset()
 {
    Matrix< Real, Device, Index >::reset();
-   this->values.reset();
 }
 
 template< typename Real,
@@ -488,7 +487,7 @@ template< typename Real,
           typename IndexAllocator >
 template< typename Vector >
 __cuda_callable__
-typename Vector::RealType 
+typename Vector::RealType
 Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
 rowVectorProduct( const IndexType row, const Vector& vector ) const
 {
@@ -503,7 +502,7 @@ template< typename Real,
           typename IndexAllocator >
    template< typename InVector,
              typename OutVector >
-void 
+void
 Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
 vectorProduct( const InVector& inVector, OutVector& outVector ) const
 {
diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h
index ffcccfade..49e3b45bb 100644
--- a/src/TNL/Matrices/SparseMatrix.h
+++ b/src/TNL/Matrices/SparseMatrix.h
@@ -31,6 +31,8 @@ template< typename Real,
 class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
 {
    public:
+      static constexpr bool isSymmetric() { return MatrixType::isSymmetric(); };
+      static constexpr bool isBinary() { return std::is_same< Real, bool >::value; };
 
       using RealType = Real;
       template< typename Device_, typename Index_, typename IndexAllocator_ >
@@ -43,24 +45,23 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
       using IndexType = Index;
       using RealAllocatorType = RealAllocator;
       using IndexAllocatorType = IndexAllocator;
+      using BaseType = Matrix< Real, Device, Index, RealAllocator >;
       using RowsCapacitiesType = Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocatorType >;
       using RowsCapacitiesView = Containers::VectorView< IndexType, DeviceType, IndexType >;
       using ConstRowsCapacitiesView = typename RowsCapacitiesView::ConstViewType;
-      using ValuesHolderType = typename Matrix< Real, Device, Index, RealAllocator >::ValuesHolderType;
-      using ValuesViewType = typename ValuesHolderType::ViewType;
+      using ValuesVectorType = typename Matrix< Real, Device, Index, RealAllocator >::ValuesVectorType;
+      using ValuesViewType = typename ValuesVectorType::ViewType;
       using ColumnsIndexesVectorType = Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocatorType >;
       using ColumnsIndexesViewType = typename ColumnsIndexesVectorType::ViewType;
       using ViewType = SparseMatrixView< Real, Device, Index, MatrixType, SegmentsViewTemplate >;
       using ConstViewType = SparseMatrixView< typename std::add_const< Real >::type, Device, Index, MatrixType, SegmentsViewTemplate >;
-      using RowView = SparseMatrixRowView< SegmentViewType, ValuesViewType, ColumnsIndexesViewType >;
+      using RowView = SparseMatrixRowView< SegmentViewType, ValuesViewType, ColumnsIndexesViewType, isBinary() >;
 
       // TODO: remove this - it is here only for compatibility with original matrix implementation
       typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector;
       typedef Containers::VectorView< IndexType, DeviceType, IndexType > CompressedRowLengthsVectorView;
       typedef typename CompressedRowLengthsVectorView::ConstViewType ConstCompressedRowLengthsVectorView;
 
-      static constexpr bool isSymmetric() { return MatrixType::isSymmetric(); };
-
       SparseMatrix( const RealAllocatorType& realAllocator = RealAllocatorType(),
                     const IndexAllocatorType& indexAllocator = IndexAllocatorType() );
 
diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp
index d3641e030..60f4695f0 100644
--- a/src/TNL/Matrices/SparseMatrix.hpp
+++ b/src/TNL/Matrices/SparseMatrix.hpp
@@ -27,7 +27,7 @@ template< typename Real,
 SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
 SparseMatrix( const RealAllocatorType& realAllocator,
               const IndexAllocatorType& indexAllocator )
-   : Matrix< Real, Device, Index, RealAllocator >( realAllocator ), columnIndexes( indexAllocator )
+   : BaseType( realAllocator ), columnIndexes( indexAllocator )
 {
 }
 
@@ -69,7 +69,7 @@ SparseMatrix( const IndexType rows,
               const IndexType columns,
               const RealAllocatorType& realAllocator,
               const IndexAllocatorType& indexAllocator )
-: Matrix< Real, Device, Index, RealAllocator >( rows, columns, realAllocator ), columnIndexes( indexAllocator )
+: BaseType( rows, columns, realAllocator ), columnIndexes( indexAllocator )
 {
 }
 
@@ -162,8 +162,11 @@ setCompressedRowLengths( const RowsCapacitiesVector& rowsCapacities )
       thisRowsCapacities = rowsCapacities;
       this->segments.setSegmentsSizes( thisRowsCapacities );
    }
-   this->values.setSize( this->segments.getStorageSize() );
-   this->values = ( RealType ) 0;
+   if( ! isBinary() )
+   {
+      this->values.setSize( this->segments.getStorageSize() );
+      this->values = ( RealType ) 0;
+   }
    this->columnIndexes.setSize( this->segments.getStorageSize() );
    this->columnIndexes = this->getPaddingIndex();
    this->view = this->getView();
@@ -196,7 +199,7 @@ void
 SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
 setLike( const Matrix_& matrix )
 {
-   Matrix< Real, Device, Index, RealAllocator >::setLike( matrix );
+   BaseType::setLike( matrix );
 }
 
 template< typename Real,
@@ -224,7 +227,7 @@ void
 SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
 reset()
 {
-   Matrix< Real, Device, Index >::reset();
+   BaseType::reset();
 }
 
 template< typename Real,
@@ -761,7 +764,7 @@ operator=( const RHSMatrix& matrix )
                TNL_ASSERT_LT( bufferIdx, bufferSize, "" );
                inValue = thisValuesBuffer_view[ bufferIdx ];
             }
-            //std::cerr << "rowIdx = " << rowIdx << " localIdx = " << localIdx << " bufferLocalIdx = " << bufferLocalIdx 
+            //std::cerr << "rowIdx = " << rowIdx << " localIdx = " << localIdx << " bufferLocalIdx = " << bufferLocalIdx
             //          << " inValue = " << inValue << " bufferIdx = " << bufferIdx << std::endl;
             rowLocalIndexes_view[ rowIdx ] = bufferLocalIdx;
             if( inValue == 0.0 )
diff --git a/src/TNL/Matrices/SparseMatrixRowView.h b/src/TNL/Matrices/SparseMatrixRowView.h
index 19445f531..8906ab5ae 100644
--- a/src/TNL/Matrices/SparseMatrixRowView.h
+++ b/src/TNL/Matrices/SparseMatrixRowView.h
@@ -1,4 +1,4 @@
-/***************************************************************************
+ /***************************************************************************
                           SparseMatrixRowView.h -  description
                              -------------------
     begin                : Dec 28, 2019
@@ -15,7 +15,8 @@ namespace TNL {
 
 template< typename SegmentView,
           typename ValuesView,
-          typename ColumnsIndexesView >
+          typename ColumnsIndexesView,
+          bool isBinary_ >
 class SparseMatrixRowView
 {
    public:
@@ -26,6 +27,8 @@ class SparseMatrixRowView
       using ValuesViewType = ValuesView;
       using ColumnsIndexesViewType = ColumnsIndexesView;
 
+      static constexpr bool isBinary() { return isBinary_; };
+
       __cuda_callable__
       SparseMatrixRowView( const SegmentViewType& segmentView,
                            const ValuesViewType& values,
@@ -39,7 +42,7 @@ class SparseMatrixRowView
 
       __cuda_callable__
       IndexType& getColumnIndex( const IndexType localIdx );
-      
+
       __cuda_callable__
       const RealType& getValue( const IndexType localIdx ) const;
 
diff --git a/src/TNL/Matrices/SparseMatrixRowView.hpp b/src/TNL/Matrices/SparseMatrixRowView.hpp
index 70dac874e..ab5b4622b 100644
--- a/src/TNL/Matrices/SparseMatrixRowView.hpp
+++ b/src/TNL/Matrices/SparseMatrixRowView.hpp
@@ -17,9 +17,10 @@ namespace TNL {
 
 template< typename SegmentView,
           typename ValuesView,
-          typename ColumnsIndexesView >
+          typename ColumnsIndexesView,
+          bool isBinary_ >
 __cuda_callable__
-SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView >::
+SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView, isBinary_ >::
 SparseMatrixRowView( const SegmentViewType& segmentView,
                      const ValuesViewType& values,
                      const ColumnsIndexesViewType& columnIndexes )
@@ -29,9 +30,10 @@ SparseMatrixRowView( const SegmentViewType& segmentView,
 
 template< typename SegmentView,
           typename ValuesView,
-          typename ColumnsIndexesView >
+          typename ColumnsIndexesView,
+          bool isBinary_ >
 __cuda_callable__ auto
-SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView >::
+SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView, isBinary_ >::
 getSize() const -> IndexType
 {
    return segmentView.getSize();
@@ -39,9 +41,10 @@ getSize() const -> IndexType
 
 template< typename SegmentView,
           typename ValuesView,
-          typename ColumnsIndexesView >
+          typename ColumnsIndexesView,
+          bool isBinary_ >
 __cuda_callable__ auto
-SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView >::
+SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView, isBinary_ >::
 getColumnIndex( const IndexType localIdx ) const -> const IndexType&
 {
    TNL_ASSERT_LT( localIdx, this->getSize(), "Local index exceeds matrix row capacity." );
@@ -50,9 +53,10 @@ getColumnIndex( const IndexType localIdx ) const -> const IndexType&
 
 template< typename SegmentView,
           typename ValuesView,
-          typename ColumnsIndexesView >
+          typename ColumnsIndexesView,
+          bool isBinary_ >
 __cuda_callable__ auto
-SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView >::
+SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView, isBinary_ >::
 getColumnIndex( const IndexType localIdx ) -> IndexType&
 {
    TNL_ASSERT_LT( localIdx, this->getSize(), "Local index exceeds matrix row capacity." );
@@ -61,9 +65,10 @@ getColumnIndex( const IndexType localIdx ) -> IndexType&
 
 template< typename SegmentView,
           typename ValuesView,
-          typename ColumnsIndexesView >
+          typename ColumnsIndexesView,
+          bool isBinary_ >
 __cuda_callable__ auto
-SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView >::
+SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView, isBinary_ >::
 getValue( const IndexType localIdx ) const -> const RealType&
 {
    TNL_ASSERT_LT( localIdx, this->getSize(), "Local index exceeds matrix row capacity." );
@@ -72,9 +77,10 @@ getValue( const IndexType localIdx ) const -> const RealType&
 
 template< typename SegmentView,
           typename ValuesView,
-          typename ColumnsIndexesView >
+          typename ColumnsIndexesView,
+          bool isBinary_ >
 __cuda_callable__ auto
-SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView >::
+SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView, isBinary_ >::
 getValue( const IndexType localIdx ) -> RealType&
 {
    TNL_ASSERT_LT( localIdx, this->getSize(), "Local index exceeds matrix row capacity." );
@@ -83,9 +89,10 @@ getValue( const IndexType localIdx ) -> RealType&
 
 template< typename SegmentView,
           typename ValuesView,
-          typename ColumnsIndexesView >
-__cuda_callable__ void 
-SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView >::
+          typename ColumnsIndexesView,
+          bool isBinary_ >
+__cuda_callable__ void
+SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView, isBinary_ >::
 setElement( const IndexType localIdx,
             const IndexType column,
             const RealType& value )
@@ -93,7 +100,8 @@ setElement( const IndexType localIdx,
    TNL_ASSERT_LT( localIdx, this->getSize(), "Local index exceeds matrix row capacity." );
    const IndexType globalIdx = segmentView.getGlobalIndex( localIdx );
    columnIndexes[ globalIdx ] = column;
-   values[ globalIdx ] = value;
+   if( ! isBinary() )
+      values[ globalIdx ] = value;
 }
 
 
diff --git a/src/TNL/Matrices/SparseMatrixView.h b/src/TNL/Matrices/SparseMatrixView.h
index 7168e1e8e..d8c6eb63f 100644
--- a/src/TNL/Matrices/SparseMatrixView.h
+++ b/src/TNL/Matrices/SparseMatrixView.h
@@ -27,6 +27,8 @@ template< typename Real,
 class SparseMatrixView : public MatrixView< Real, Device, Index >
 {
    public:
+      static constexpr bool isSymmetric() { return MatrixType::isSymmetric(); };
+      static constexpr bool isBinary() { return std::is_same< Real, bool >::value; };
 
       using RealType = Real;
       template< typename Device_, typename Index_ >
@@ -35,20 +37,20 @@ class SparseMatrixView : public MatrixView< Real, Device, Index >
       using SegmentViewType = typename SegmentsViewType::SegmentViewType;
       using DeviceType = Device;
       using IndexType = Index;
+      using BaseType = MatrixView< Real, Device, Index >;
       using RowsCapacitiesView = Containers::VectorView< IndexType, DeviceType, IndexType >;
       using ConstRowsCapacitiesView = typename RowsCapacitiesView::ConstViewType;
-      using ValuesViewType = typename MatrixView< Real, Device, Index >::ValuesView;
+      using ValuesViewType = typename BaseType::ValuesView;
       using ColumnsIndexesViewType = Containers::VectorView< IndexType, DeviceType, IndexType >;
       using ViewType = SparseMatrixView< typename std::remove_const< Real >::type, Device, Index, MatrixType, SegmentsViewTemplate >;
       using ConstViewType = SparseMatrixView< typename std::add_const< Real >::type, Device, Index, MatrixType, SegmentsViewTemplate >;
-      using RowView = SparseMatrixRowView< SegmentViewType, ValuesViewType, ColumnsIndexesViewType >;
+      using RowView = SparseMatrixRowView< SegmentViewType, ValuesViewType, ColumnsIndexesViewType, isBinary() >;
 
       // TODO: remove this - it is here only for compatibility with original matrix implementation
       typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector;
       typedef Containers::VectorView< IndexType, DeviceType, IndexType > CompressedRowLengthsVectorView;
       typedef typename CompressedRowLengthsVectorView::ConstViewType ConstCompressedRowLengthsVectorView;
 
-      static constexpr bool isSymmetric() { return MatrixType::isSymmetric(); };
 
       __cuda_callable__
       SparseMatrixView();
diff --git a/src/TNL/Matrices/Tridiagonal.hpp b/src/TNL/Matrices/Tridiagonal.hpp
index 2ccdc4838..3ddabc985 100644
--- a/src/TNL/Matrices/Tridiagonal.hpp
+++ b/src/TNL/Matrices/Tridiagonal.hpp
@@ -208,7 +208,6 @@ Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
 reset()
 {
    Matrix< Real, Device, Index >::reset();
-   this->values.reset();
 }
 
 template< typename Real,
diff --git a/src/TNL/Matrices/details/ValuesHolder.h b/src/TNL/Matrices/details/ValuesHolder.h
deleted file mode 100644
index c76f0d344..000000000
--- a/src/TNL/Matrices/details/ValuesHolder.h
+++ /dev/null
@@ -1,154 +0,0 @@
-/***************************************************************************
-                          ValuesHolder.h  -  description
-                             -------------------
-    begin                : Jan 27, 2020
-    copyright            : (C) 2020 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#pragma once
-
-namespace TNL {
-   namespace Matrices {
-      namespace details {
-
-
-template< typename Real,
-          typename Device,
-          typename Index >
-struct ValuesHolderView
-: public Containers::VectorView< Real, Device, Index >
-{
-   using RealType = Real;
-   using DeviceType = Device;
-   using IndexType = Index;
-
-   using Containers::VectorView< Real, Device, Index >::VectorView;
-   using Containers::VectorView< Real, Device, Index >::operator=;
-   /*__cuda_callable__
-   ValuesHolderView() = default;
-
-   __cuda_callable__
-   explicit ValuesHolderView( const ValuesHolderView& ) = default;
-
-   __cuda_callable__
-   ValuesHolderView( ValuesHolderView&& ) = default;*/
-
-};
-
-template< typename Real,
-          typename Device,
-          typename Index,
-          typename Allocator >
-struct ValuesHolder
-: public Containers::Vector< Real, Device, Index, Allocator >
-{
-   using RealType = Real;
-   using DeviceType = Device;
-   using IndexType = Index;
-   using AllocatorType = Allocator;
-   using ViewType = ValuesHolderView< Real, Device, Index >;
-
-   using Containers::Vector< Real, Device, Index, Allocator >::Vector;
-   using Containers::Vector< Real, Device, Index, Allocator >::operator=;
-   /*ValuesHolder() = default;
-
-   explicit ValuesHolder( const ValuesHolder& ) = default;
-
-   explicit ValuesHolder( const ValuesHolder& vector, const AllocatorType& allocator );
-
-   ValuesHolder( ValuesHolder&& ) = default;*/
-
-};
-
-template< typename Device,
-          typename Index >
-class BooleanValuesHolder
-{
-   public:
-
-      using RealType = bool;
-      using DeviceType = Device;
-      using IndexType = Index;
-      using ViewType = BooleanValuesHolder;
-
-      BooleanValuesHolder()
-      : size( 0 ){};
-
-      BooleanValuesHolder( const IndexType& size )
-      : size( size ){};
-
-      void setSize( const IndexType& size ) { this->size = size; };
-
-      __cuda_callable__
-      IndexType getSize() const { return this->size; };
-
-      __cuda_callable__
-      bool operator[]( const IndexType& i ) const { return true; };
-
-   protected:
-
-      IndexType size;
-};
-
-/**
- * \brief Serialization of values holder into binary files.
- */
-template< typename Device, typename Index, typename Allocator >
-File& operator<<( File& file, const ValuesHolder< bool, Device, Index, Allocator >& holder ) {
-   file << holder.getSize();
-   return file; };
-
-template< typename Device, typename Index, typename Allocator >
-File& operator<<( File&& file, const ValuesHolder< bool, Device, Index, Allocator >& holder ) {
-   file << holder.getSize();
-   return file; };
-
-/**
- * \brief Deserialization of values holder from binary files.
- */
-template< typename Device, typename Index, typename Allocator >
-File& operator>>( File& file, ValuesHolder< bool, Device, Index, Allocator >& holder ) {
-   Index size;
-   file >> size;
-   holder.setSize( size );
-   return file; };
-
-template< typename Device, typename Index, typename Allocator >
-File& operator>>( File&& file, ValuesHolder< bool, Device, Index, Allocator >& holder ) {
-   Index size;
-   file >> size;
-   holder.setSize( size );
-   return file; };
-
-template< typename Real,
-          typename Device,
-          typename Index,
-          typename RealAllocator >
-struct ValuesHolderSetter
-{
-   using type = ValuesHolder< Real, Device, Index, RealAllocator >;
-};
-
-template< typename Real,
-          typename Device,
-          typename Index,
-          typename RealAllocator >
-struct SparseMatrixValuesHolderSetter
-{
-   using type = ValuesHolder< Real, Device, Index, RealAllocator >;
-};
-
-template< typename Device,
-          typename Index,
-          typename RealAllocator >
-struct SparseMatrixValuesHolderSetter< bool, Device, Index, RealAllocator >
-{
-   using type = BooleanValuesHolder< Device, Index >;
-};
-
-      } //namespace details
-   } //namepsace Matrices
-} //namespace TNL
diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest.cpp b/src/UnitTests/Matrices/BinarySparseMatrixTest.cpp
new file mode 100644
index 000000000..ea7b8d3c9
--- /dev/null
+++ b/src/UnitTests/Matrices/BinarySparseMatrixTest.cpp
@@ -0,0 +1,11 @@
+/***************************************************************************
+                          BinarySparseMatrixTest.cpp -  description
+                             -------------------
+    begin                : Jan 30, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include "BinarySparseMatrixTest.h"
\ No newline at end of file
diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest.cu b/src/UnitTests/Matrices/BinarySparseMatrixTest.cu
new file mode 100644
index 000000000..916f14360
--- /dev/null
+++ b/src/UnitTests/Matrices/BinarySparseMatrixTest.cu
@@ -0,0 +1,11 @@
+/***************************************************************************
+                          BinarySparseMatrixTest.cu -  description
+                             -------------------
+    begin                : Jan 30, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include "BinarySparseMatrixTest.h"
\ No newline at end of file
diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest.h b/src/UnitTests/Matrices/BinarySparseMatrixTest.h
new file mode 100644
index 000000000..0abba5b86
--- /dev/null
+++ b/src/UnitTests/Matrices/BinarySparseMatrixTest.h
@@ -0,0 +1,117 @@
+/***************************************************************************
+                          BinarySparseMatrixTest.h -  description
+                             -------------------
+    begin                : Jan 30, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include <TNL/Containers/Segments/CSR.h>
+#include <TNL/Matrices/SparseMatrix.h>
+
+
+#include "BinarySparseMatrixTest.hpp"
+#include <iostream>
+
+#ifdef HAVE_GTEST
+#include <gtest/gtest.h>
+
+// test fixture for typed tests
+template< typename Matrix >
+class CSRMatrixTest : public ::testing::Test
+{
+protected:
+   using CSRMatrixType = Matrix;
+};
+
+// types for which MatrixTest is instantiated
+using CSRMatrixTypes = ::testing::Types
+<
+    TNL::Matrices::SparseMatrix< bool, TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< bool, TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< bool, TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >
+#ifdef HAVE_CUDA
+   ,TNL::Matrices::SparseMatrix< bool, TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< bool, TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< bool, TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >,
+#endif
+>;
+
+TYPED_TEST_SUITE( CSRMatrixTest, CSRMatrixTypes);
+
+TYPED_TEST( CSRMatrixTest, setDimensionsTest )
+{
+    using CSRMatrixType = typename TestFixture::CSRMatrixType;
+
+    test_SetDimensions< CSRMatrixType >();
+}
+
+TYPED_TEST( CSRMatrixTest, setCompressedRowLengthsTest )
+{
+    using CSRMatrixType = typename TestFixture::CSRMatrixType;
+
+    test_SetCompressedRowLengths< CSRMatrixType >();
+}
+
+TYPED_TEST( CSRMatrixTest, setLikeTest )
+{
+    using CSRMatrixType = typename TestFixture::CSRMatrixType;
+
+    test_SetLike< CSRMatrixType, CSRMatrixType >();
+}
+
+TYPED_TEST( CSRMatrixTest, resetTest )
+{
+    using CSRMatrixType = typename TestFixture::CSRMatrixType;
+
+    test_Reset< CSRMatrixType >();
+}
+
+TYPED_TEST( CSRMatrixTest, getRowTest )
+{
+    using CSRMatrixType = typename TestFixture::CSRMatrixType;
+
+    test_GetRow< CSRMatrixType >();
+}
+
+
+TYPED_TEST( CSRMatrixTest, setElementTest )
+{
+    using CSRMatrixType = typename TestFixture::CSRMatrixType;
+
+    test_SetElement< CSRMatrixType >();
+}
+
+TYPED_TEST( CSRMatrixTest, vectorProductTest )
+{
+    using CSRMatrixType = typename TestFixture::CSRMatrixType;
+
+    test_VectorProduct< CSRMatrixType >();
+}
+
+TYPED_TEST( CSRMatrixTest, rowsReduction )
+{
+    using CSRMatrixType = typename TestFixture::CSRMatrixType;
+
+    test_RowsReduction< CSRMatrixType >();
+}
+
+TYPED_TEST( CSRMatrixTest, saveAndLoadTest )
+{
+    using CSRMatrixType = typename TestFixture::CSRMatrixType;
+
+    test_SaveAndLoad< CSRMatrixType >( "test_BinarySparseMatrixTest" );
+}
+
+TYPED_TEST( CSRMatrixTest, printTest )
+{
+    using CSRMatrixType = typename TestFixture::CSRMatrixType;
+
+    test_Print< CSRMatrixType >();
+}
+
+#endif
+
+#include "../main.h"
diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest.hpp b/src/UnitTests/Matrices/BinarySparseMatrixTest.hpp
new file mode 100644
index 000000000..5e969e976
--- /dev/null
+++ b/src/UnitTests/Matrices/BinarySparseMatrixTest.hpp
@@ -0,0 +1,1573 @@
+/***************************************************************************
+                          SparseMatrixTest_impl.h -  description
+                             -------------------
+    begin                : Nov 22, 2018
+    copyright            : (C) 2018 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include <TNL/Containers/Vector.h>
+#include <TNL/Containers/VectorView.h>
+#include <TNL/Math.h>
+#include <TNL/Algorithms/ParallelFor.h>
+#include <iostream>
+
+// Temporary, until test_OperatorEquals doesn't work for all formats.
+#include <TNL/Matrices/Legacy/ChunkedEllpack.h>
+#include <TNL/Matrices/Legacy/AdEllpack.h>
+#include <TNL/Matrices/Legacy/BiEllpack.h>
+
+#ifdef HAVE_GTEST
+#include <gtest/gtest.h>
+
+template< typename MatrixHostFloat, typename MatrixHostInt >
+void host_test_GetType()
+{
+    bool testRan = false;
+    EXPECT_TRUE( testRan );
+    std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n";
+    std::cerr << "This test has not been implemented properly yet.\n" << std::endl;
+}
+
+template< typename MatrixCudaFloat, typename MatrixCudaInt >
+void cuda_test_GetType()
+{
+    bool testRan = false;
+    EXPECT_TRUE( testRan );
+    std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n";
+    std::cerr << "This test has not been implemented properly yet.\n" << std::endl;
+}
+
+template< typename Matrix >
+void test_SetDimensions()
+{
+    using RealType = typename Matrix::RealType;
+    using DeviceType = typename Matrix::DeviceType;
+    using IndexType = typename Matrix::IndexType;
+
+    const IndexType rows = 9;
+    const IndexType cols = 8;
+
+    Matrix m;
+    m.setDimensions( rows, cols );
+
+    EXPECT_EQ( m.getRows(), 9 );
+    EXPECT_EQ( m.getColumns(), 8 );
+}
+
+template< typename Matrix >
+void test_SetCompressedRowLengths()
+{
+    using RealType = typename Matrix::RealType;
+    using DeviceType = typename Matrix::DeviceType;
+    using IndexType = typename Matrix::IndexType;
+
+    const IndexType rows = 10;
+    const IndexType cols = 11;
+
+    Matrix m;
+    m.reset();
+    m.setDimensions( rows, cols );
+    typename Matrix::CompressedRowLengthsVector rowLengths;
+    rowLengths.setSize( rows );
+    rowLengths.setValue( 3 );
+
+    IndexType rowLength = 1;
+    for( IndexType i = 2; i < rows; i++ )
+        rowLengths.setElement( i, rowLength++ );
+
+    m.setCompressedRowLengths( rowLengths );
+
+    // Insert values into the rows.
+
+    for( IndexType i = 0; i < 3; i++ )      // 0th row
+        m.setElement( 0, i, 1 );
+
+    for( IndexType i = 0; i < 3; i++ )      // 1st row
+        m.setElement( 1, i, 1 );
+
+    for( IndexType i = 0; i < 1; i++ )      // 2nd row
+        m.setElement( 2, i, 1 );
+
+    for( IndexType i = 0; i < 2; i++ )      // 3rd row
+        m.setElement( 3, i, 1 );
+
+    for( IndexType i = 0; i < 3; i++ )      // 4th row
+        m.setElement( 4, i, 1 );
+
+    for( IndexType i = 0; i < 4; i++ )      // 5th row
+        m.setElement( 5, i, 1 );
+
+    for( IndexType i = 0; i < 5; i++ )      // 6th row
+        m.setElement( 6, i, 1 );
+
+    for( IndexType i = 0; i < 6; i++ )      // 7th row
+        m.setElement( 7, i, 1 );
+
+    for( IndexType i = 0; i < 7; i++ )      // 8th row
+        m.setElement( 8, i, 1 );
+
+    for( IndexType i = 0; i < 8; i++ )      // 9th row
+        m.setElement( 9, i, 1 );
+
+   rowLengths = 0;
+   m.getCompressedRowLengths( rowLengths );
+   typename Matrix::CompressedRowLengthsVector correctRowLengths{ 3, 3, 1, 2, 3, 4, 5, 6, 7, 8 };
+   EXPECT_EQ( rowLengths, correctRowLengths );
+}
+
+template< typename Matrix1, typename Matrix2 >
+void test_SetLike()
+{
+    using RealType = typename Matrix1::RealType;
+    using DeviceType = typename Matrix1::DeviceType;
+    using IndexType = typename Matrix1::IndexType;
+
+    const IndexType rows = 8;
+    const IndexType cols = 7;
+
+    Matrix1 m1;
+    m1.reset();
+    m1.setDimensions( rows + 1, cols + 2 );
+
+    Matrix2 m2;
+    m2.reset();
+    m2.setDimensions( rows, cols );
+
+    m1.setLike( m2 );
+
+
+    EXPECT_EQ( m1.getRows(), m2.getRows() );
+    EXPECT_EQ( m1.getColumns(), m2.getColumns() );
+}
+
+template< typename Matrix >
+void test_GetNumberOfNonzeroMatrixElements()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /*
+    * Sets up the following 10x10 sparse matrix:
+    *
+    *    /  1  0  2  0  3  0  4  0  0  0  \
+    *    |  5  6  7  0  0  0  0  0  0  0  |
+    *    |  8  9 10 11 12 13 14 15  0  0  |
+    *    | 16 17  0  0  0  0  0  0  0  0  |
+    *    | 18  0  0  0  0  0  0  0  0  0  |
+    *    | 19  0  0  0  0  0  0  0  0  0  |
+    *    | 20  0  0  0  0  0  0  0  0  0  |
+    *    | 21  0  0  0  0  0  0  0  0  0  |
+    *    | 22 23 24 25 26 27 28 29 30 31  |
+    *    \ 32 33 34 35 36 37 38 39 40 41 /
+    */
+
+   const IndexType rows = 10;
+   const IndexType cols = 10;
+
+   Matrix m;
+   m.reset();
+
+   m.setDimensions( rows, cols );
+
+   typename Matrix::CompressedRowLengthsVector rowLengths;
+   rowLengths.setSize( rows );
+   rowLengths.setElement( 0, 4 );
+   rowLengths.setElement( 1, 3 );
+   rowLengths.setElement( 2, 8 );
+   rowLengths.setElement( 3, 2 );
+   for( IndexType i = 4; i < rows - 2; i++ )
+   {
+      rowLengths.setElement( i, 1 );
+   }
+   rowLengths.setElement( 8, 10 );
+   rowLengths.setElement( 9, 10 );
+   m.setCompressedRowLengths( rowLengths );
+
+   RealType value = 1;
+   for( IndexType i = 0; i < 4; i++ )
+      m.setElement( 0, 2 * i, value++ );
+
+   for( IndexType i = 0; i < 3; i++ )
+      m.setElement( 1, i, value++ );
+
+   for( IndexType i = 0; i < 8; i++ )
+      m.setElement( 2, i, value++ );
+
+   for( IndexType i = 0; i < 2; i++ )
+      m.setElement( 3, i, value++ );
+
+   for( IndexType i = 4; i < 8; i++ )
+      m.setElement( i, 0, value++ );
+
+   for( IndexType j = 8; j < rows; j++)
+   {
+      for( IndexType i = 0; i < cols; i++ )
+         m.setElement( j, i, value++ );
+   }
+
+   EXPECT_EQ( m.getNumberOfNonzeroMatrixElements(), 41 );
+}
+
+template< typename Matrix >
+void test_Reset()
+{
+    using RealType = typename Matrix::RealType;
+    using DeviceType = typename Matrix::DeviceType;
+    using IndexType = typename Matrix::IndexType;
+
+/*
+ * Sets up the following 5x4 sparse matrix:
+ *
+ *    /  0  0  0  0 \
+ *    |  0  0  0  0 |
+ *    |  0  0  0  0 |
+ *    |  0  0  0  0 |
+ *    \  0  0  0  0 /
+ */
+
+    const IndexType rows = 5;
+    const IndexType cols = 4;
+
+    Matrix m;
+    m.setDimensions( rows, cols );
+
+    m.reset();
+
+
+    EXPECT_EQ( m.getRows(), 0 );
+    EXPECT_EQ( m.getColumns(), 0 );
+}
+
+template< typename Matrix >
+void test_GetRow()
+{
+    using RealType = typename Matrix::RealType;
+    using DeviceType = typename Matrix::DeviceType;
+    using IndexType = typename Matrix::IndexType;
+
+/*
+ * Sets up the following 10x10 sparse matrix:
+ *
+ *    /  1  0  2  0  3  0  4  0  0  0  \
+ *    |  5  6  7  0  0  0  0  0  0  0  |
+ *    |  8  9 10 11 12 13 14 15  0  0  |
+ *    | 16 17  0  0  0  0  0  0  0  0  |
+ *    | 18  0  0  0  0  0  0  0  0  0  |
+ *    | 19  0  0  0  0  0  0  0  0  0  |
+ *    | 20  0  0  0  0  0  0  0  0  0  |
+ *    | 21  0  0  0  0  0  0  0  0  0  |
+ *    | 22 23 24 25 26 27 28 29 30 31  |
+ *    \ 32 33 34 35 36 37 38 39 40 41 /
+ */
+
+    const IndexType rows = 10;
+    const IndexType cols = 10;
+
+    Matrix m( rows, cols );
+
+    typename Matrix::CompressedRowLengthsVector rowLengths;
+    rowLengths.setSize( rows );
+    rowLengths.setElement( 0, 4 );
+    rowLengths.setElement( 1, 3 );
+    rowLengths.setElement( 2, 8 );
+    rowLengths.setElement( 3, 2 );
+    for( IndexType i = 4; i < rows - 2; i++ )
+    {
+        rowLengths.setElement( i, 1 );
+    }
+    rowLengths.setElement( 8, 10 );
+    rowLengths.setElement( 9, 10 );
+    m.setCompressedRowLengths( rowLengths );
+
+    /*RealType value = 1;
+    for( IndexType i = 0; i < 4; i++ )
+        m.setElement( 0, 2 * i, value++ );
+
+    for( IndexType i = 0; i < 3; i++ )
+        m.setElement( 1, i, value++ );
+
+    for( IndexType i = 0; i < 8; i++ )
+        m.setElement( 2, i, value++ );
+
+    for( IndexType i = 0; i < 2; i++ )
+        m.setElement( 3, i, value++ );
+
+    for( IndexType i = 4; i < 8; i++ )
+        m.setElement( i, 0, value++ );
+
+    for( IndexType j = 8; j < rows; j++)
+    {
+        for( IndexType i = 0; i < cols; i++ )
+            m.setElement( j, i, value++ );
+    }*/
+    auto matrixView = m.getView();
+    auto f = [=] __cuda_callable__ ( const IndexType rowIdx ) mutable {
+       auto row = matrixView.getRow( rowIdx );
+       switch( rowIdx )
+       {
+          case 0:
+            for( IndexType i = 0; i < 4; i++ )
+               row.setElement( i, 2 * i, 1 );
+            break;
+         case 1:
+            for( IndexType i = 0; i < 3; i++ )
+               row.setElement( i, i, 1 );
+            break;
+         case 2:
+            for( IndexType i = 0; i < 8; i++ )
+               row.setElement( i, i, 1 );
+            break;
+         case 3:
+            for( IndexType i = 0; i < 2; i++ )
+               row.setElement( i, i, 1 );
+            break;
+         case 4:
+            row.setElement( 0, 0, 1 );
+            break;
+         case 5:
+            row.setElement( 0, 0, 1 );
+            break;
+         case 6:
+            row.setElement( 0, 0, 1 );
+            break;
+         case 7:
+            row.setElement( 0, 0, 1 );
+            break;
+         case 8:
+             for( IndexType i = 0; i < rows; i++ )
+                row.setElement( i, i, 1 );
+             break;
+         case 9:
+             for( IndexType i = 0; i < rows; i++ )
+                row.setElement( i, i, 1 );
+             break;
+       }
+    };
+    TNL::Algorithms::ParallelFor< DeviceType >::exec( ( IndexType ) 0, rows, f );
+
+    EXPECT_EQ( m.getElement( 0, 0 ),  1 );
+    EXPECT_EQ( m.getElement( 0, 1 ),  0 );
+    EXPECT_EQ( m.getElement( 0, 2 ),  2 );
+    EXPECT_EQ( m.getElement( 0, 3 ),  0 );
+    EXPECT_EQ( m.getElement( 0, 4 ),  3 );
+    EXPECT_EQ( m.getElement( 0, 5 ),  0 );
+    EXPECT_EQ( m.getElement( 0, 6 ),  4 );
+    EXPECT_EQ( m.getElement( 0, 7 ),  0 );
+    EXPECT_EQ( m.getElement( 0, 8 ),  0 );
+    EXPECT_EQ( m.getElement( 0, 9 ),  0 );
+
+    EXPECT_EQ( m.getElement( 1, 0 ),  5 );
+    EXPECT_EQ( m.getElement( 1, 1 ),  6 );
+    EXPECT_EQ( m.getElement( 1, 2 ),  7 );
+    EXPECT_EQ( m.getElement( 1, 3 ),  0 );
+    EXPECT_EQ( m.getElement( 1, 4 ),  0 );
+    EXPECT_EQ( m.getElement( 1, 5 ),  0 );
+    EXPECT_EQ( m.getElement( 1, 6 ),  0 );
+    EXPECT_EQ( m.getElement( 1, 7 ),  0 );
+    EXPECT_EQ( m.getElement( 1, 8 ),  0 );
+    EXPECT_EQ( m.getElement( 1, 9 ),  0 );
+
+    EXPECT_EQ( m.getElement( 2, 0 ),  8 );
+    EXPECT_EQ( m.getElement( 2, 1 ),  9 );
+    EXPECT_EQ( m.getElement( 2, 2 ), 10 );
+    EXPECT_EQ( m.getElement( 2, 3 ), 11 );
+    EXPECT_EQ( m.getElement( 2, 4 ), 12 );
+    EXPECT_EQ( m.getElement( 2, 5 ), 13 );
+    EXPECT_EQ( m.getElement( 2, 6 ), 14 );
+    EXPECT_EQ( m.getElement( 2, 7 ), 15 );
+    EXPECT_EQ( m.getElement( 2, 8 ),  0 );
+    EXPECT_EQ( m.getElement( 2, 9 ),  0 );
+
+    EXPECT_EQ( m.getElement( 3, 0 ), 16 );
+    EXPECT_EQ( m.getElement( 3, 1 ), 17 );
+    EXPECT_EQ( m.getElement( 3, 2 ),  0 );
+    EXPECT_EQ( m.getElement( 3, 3 ),  0 );
+    EXPECT_EQ( m.getElement( 3, 4 ),  0 );
+    EXPECT_EQ( m.getElement( 3, 5 ),  0 );
+    EXPECT_EQ( m.getElement( 3, 6 ),  0 );
+    EXPECT_EQ( m.getElement( 3, 7 ),  0 );
+    EXPECT_EQ( m.getElement( 3, 8 ),  0 );
+    EXPECT_EQ( m.getElement( 3, 9 ),  0 );
+
+    EXPECT_EQ( m.getElement( 4, 0 ), 18 );
+    EXPECT_EQ( m.getElement( 4, 1 ),  0 );
+    EXPECT_EQ( m.getElement( 4, 2 ),  0 );
+    EXPECT_EQ( m.getElement( 4, 3 ),  0 );
+    EXPECT_EQ( m.getElement( 4, 4 ),  0 );
+    EXPECT_EQ( m.getElement( 4, 5 ),  0 );
+    EXPECT_EQ( m.getElement( 4, 6 ),  0 );
+    EXPECT_EQ( m.getElement( 4, 7 ),  0 );
+    EXPECT_EQ( m.getElement( 4, 8 ),  0 );
+    EXPECT_EQ( m.getElement( 4, 9 ),  0 );
+
+    EXPECT_EQ( m.getElement( 5, 0 ), 19 );
+    EXPECT_EQ( m.getElement( 5, 1 ),  0 );
+    EXPECT_EQ( m.getElement( 5, 2 ),  0 );
+    EXPECT_EQ( m.getElement( 5, 3 ),  0 );
+    EXPECT_EQ( m.getElement( 5, 4 ),  0 );
+    EXPECT_EQ( m.getElement( 5, 5 ),  0 );
+    EXPECT_EQ( m.getElement( 5, 6 ),  0 );
+    EXPECT_EQ( m.getElement( 5, 7 ),  0 );
+    EXPECT_EQ( m.getElement( 5, 8 ),  0 );
+    EXPECT_EQ( m.getElement( 5, 9 ),  0 );
+
+    EXPECT_EQ( m.getElement( 6, 0 ), 20 );
+    EXPECT_EQ( m.getElement( 6, 1 ),  0 );
+    EXPECT_EQ( m.getElement( 6, 2 ),  0 );
+    EXPECT_EQ( m.getElement( 6, 3 ),  0 );
+    EXPECT_EQ( m.getElement( 6, 4 ),  0 );
+    EXPECT_EQ( m.getElement( 6, 5 ),  0 );
+    EXPECT_EQ( m.getElement( 6, 6 ),  0 );
+    EXPECT_EQ( m.getElement( 6, 7 ),  0 );
+    EXPECT_EQ( m.getElement( 6, 8 ),  0 );
+    EXPECT_EQ( m.getElement( 6, 9 ),  0 );
+
+    EXPECT_EQ( m.getElement( 7, 0 ), 21 );
+    EXPECT_EQ( m.getElement( 7, 1 ),  0 );
+    EXPECT_EQ( m.getElement( 7, 2 ),  0 );
+    EXPECT_EQ( m.getElement( 7, 3 ),  0 );
+    EXPECT_EQ( m.getElement( 7, 4 ),  0 );
+    EXPECT_EQ( m.getElement( 7, 5 ),  0 );
+    EXPECT_EQ( m.getElement( 7, 6 ),  0 );
+    EXPECT_EQ( m.getElement( 7, 7 ),  0 );
+    EXPECT_EQ( m.getElement( 7, 8 ),  0 );
+    EXPECT_EQ( m.getElement( 7, 9 ),  0 );
+
+    EXPECT_EQ( m.getElement( 8, 0 ), 22 );
+    EXPECT_EQ( m.getElement( 8, 1 ), 23 );
+    EXPECT_EQ( m.getElement( 8, 2 ), 24 );
+    EXPECT_EQ( m.getElement( 8, 3 ), 25 );
+    EXPECT_EQ( m.getElement( 8, 4 ), 26 );
+    EXPECT_EQ( m.getElement( 8, 5 ), 27 );
+    EXPECT_EQ( m.getElement( 8, 6 ), 28 );
+    EXPECT_EQ( m.getElement( 8, 7 ), 29 );
+    EXPECT_EQ( m.getElement( 8, 8 ), 30 );
+    EXPECT_EQ( m.getElement( 8, 9 ), 31 );
+
+    EXPECT_EQ( m.getElement( 9, 0 ), 32 );
+    EXPECT_EQ( m.getElement( 9, 1 ), 33 );
+    EXPECT_EQ( m.getElement( 9, 2 ), 34 );
+    EXPECT_EQ( m.getElement( 9, 3 ), 35 );
+    EXPECT_EQ( m.getElement( 9, 4 ), 36 );
+    EXPECT_EQ( m.getElement( 9, 5 ), 37 );
+    EXPECT_EQ( m.getElement( 9, 6 ), 38 );
+    EXPECT_EQ( m.getElement( 9, 7 ), 39 );
+    EXPECT_EQ( m.getElement( 9, 8 ), 40 );
+    EXPECT_EQ( m.getElement( 9, 9 ), 41 );
+}
+
+
+template< typename Matrix >
+void test_SetElement()
+{
+    using RealType = typename Matrix::RealType;
+    using DeviceType = typename Matrix::DeviceType;
+    using IndexType = typename Matrix::IndexType;
+
+/*
+ * Sets up the following 10x10 sparse matrix:
+ *
+ *    /  1  0  2  0  3  0  4  0  0  0  \
+ *    |  5  6  7  0  0  0  0  0  0  0  |
+ *    |  8  9 10 11 12 13 14 15  0  0  |
+ *    | 16 17  0  0  0  0  0  0  0  0  |
+ *    | 18  0  0  0  0  0  0  0  0  0  |
+ *    | 19  0  0  0  0  0  0  0  0  0  |
+ *    | 20  0  0  0  0  0  0  0  0  0  |
+ *    | 21  0  0  0  0  0  0  0  0  0  |
+ *    | 22 23 24 25 26 27 28 29 30 31  |
+ *    \ 32 33 34 35 36 37 38 39 40 41 /
+ */
+
+    const IndexType rows = 10;
+    const IndexType cols = 10;
+
+    Matrix m;
+    m.reset();
+
+    m.setDimensions( rows, cols );
+
+    typename Matrix::CompressedRowLengthsVector rowLengths;
+    rowLengths.setSize( rows );
+    rowLengths.setElement( 0, 4 );
+    rowLengths.setElement( 1, 3 );
+    rowLengths.setElement( 2, 8 );
+    rowLengths.setElement( 3, 2 );
+    for( IndexType i = 4; i < rows - 2; i++ )
+    {
+        rowLengths.setElement( i, 1 );
+    }
+    rowLengths.setElement( 8, 10 );
+    rowLengths.setElement( 9, 10 );
+    m.setCompressedRowLengths( rowLengths );
+
+    for( IndexType i = 0; i < 4; i++ )
+        m.setElement( 0, 2 * i, 1 );
+
+    for( IndexType i = 0; i < 3; i++ )
+        m.setElement( 1, i, 1 );
+
+    for( IndexType i = 0; i < 8; i++ )
+        m.setElement( 2, i, 1 );
+
+    for( IndexType i = 0; i < 2; i++ )
+        m.setElement( 3, i, 1 );
+
+    for( IndexType i = 4; i < 8; i++ )
+        m.setElement( i, 0, 1 );
+
+    for( IndexType j = 8; j < rows; j++)
+    {
+        for( IndexType i = 0; i < cols; i++ )
+            m.setElement( j, i, 1 );
+    }
+
+    EXPECT_EQ( m.getElement( 0, 0 ), 1 );
+    EXPECT_EQ( m.getElement( 0, 1 ), 0 );
+    EXPECT_EQ( m.getElement( 0, 2 ), 1 );
+    EXPECT_EQ( m.getElement( 0, 3 ), 0 );
+    EXPECT_EQ( m.getElement( 0, 4 ), 1 );
+    EXPECT_EQ( m.getElement( 0, 5 ), 0 );
+    EXPECT_EQ( m.getElement( 0, 6 ), 1 );
+    EXPECT_EQ( m.getElement( 0, 7 ), 0 );
+    EXPECT_EQ( m.getElement( 0, 8 ), 0 );
+    EXPECT_EQ( m.getElement( 0, 9 ), 0 );
+
+    EXPECT_EQ( m.getElement( 1, 0 ), 1 );
+    EXPECT_EQ( m.getElement( 1, 1 ), 1 );
+    EXPECT_EQ( m.getElement( 1, 2 ), 1 );
+    EXPECT_EQ( m.getElement( 1, 3 ), 0 );
+    EXPECT_EQ( m.getElement( 1, 4 ), 0 );
+    EXPECT_EQ( m.getElement( 1, 5 ), 0 );
+    EXPECT_EQ( m.getElement( 1, 6 ), 0 );
+    EXPECT_EQ( m.getElement( 1, 7 ), 0 );
+    EXPECT_EQ( m.getElement( 1, 8 ), 0 );
+    EXPECT_EQ( m.getElement( 1, 9 ), 0 );
+
+    EXPECT_EQ( m.getElement( 2, 0 ), 1 );
+    EXPECT_EQ( m.getElement( 2, 1 ), 1 );
+    EXPECT_EQ( m.getElement( 2, 2 ), 1 );
+    EXPECT_EQ( m.getElement( 2, 3 ), 1 );
+    EXPECT_EQ( m.getElement( 2, 4 ), 1 );
+    EXPECT_EQ( m.getElement( 2, 5 ), 1 );
+    EXPECT_EQ( m.getElement( 2, 6 ), 1 );
+    EXPECT_EQ( m.getElement( 2, 7 ), 1 );
+    EXPECT_EQ( m.getElement( 2, 8 ), 0 );
+    EXPECT_EQ( m.getElement( 2, 9 ), 0 );
+
+    EXPECT_EQ( m.getElement( 3, 0 ), 1 );
+    EXPECT_EQ( m.getElement( 3, 1 ), 1 );
+    EXPECT_EQ( m.getElement( 3, 2 ), 0 );
+    EXPECT_EQ( m.getElement( 3, 3 ), 0 );
+    EXPECT_EQ( m.getElement( 3, 4 ), 0 );
+    EXPECT_EQ( m.getElement( 3, 5 ), 0 );
+    EXPECT_EQ( m.getElement( 3, 6 ), 0 );
+    EXPECT_EQ( m.getElement( 3, 7 ), 0 );
+    EXPECT_EQ( m.getElement( 3, 8 ), 0 );
+    EXPECT_EQ( m.getElement( 3, 9 ), 0 );
+
+    EXPECT_EQ( m.getElement( 4, 0 ), 1 );
+    EXPECT_EQ( m.getElement( 4, 1 ), 0 );
+    EXPECT_EQ( m.getElement( 4, 2 ), 0 );
+    EXPECT_EQ( m.getElement( 4, 3 ), 0 );
+    EXPECT_EQ( m.getElement( 4, 4 ), 0 );
+    EXPECT_EQ( m.getElement( 4, 5 ), 0 );
+    EXPECT_EQ( m.getElement( 4, 6 ), 0 );
+    EXPECT_EQ( m.getElement( 4, 7 ), 0 );
+    EXPECT_EQ( m.getElement( 4, 8 ), 0 );
+    EXPECT_EQ( m.getElement( 4, 9 ), 0 );
+
+    EXPECT_EQ( m.getElement( 5, 0 ), 1 );
+    EXPECT_EQ( m.getElement( 5, 1 ), 0 );
+    EXPECT_EQ( m.getElement( 5, 2 ), 0 );
+    EXPECT_EQ( m.getElement( 5, 3 ), 0 );
+    EXPECT_EQ( m.getElement( 5, 4 ), 0 );
+    EXPECT_EQ( m.getElement( 5, 5 ), 0 );
+    EXPECT_EQ( m.getElement( 5, 6 ), 0 );
+    EXPECT_EQ( m.getElement( 5, 7 ), 0 );
+    EXPECT_EQ( m.getElement( 5, 8 ), 0 );
+    EXPECT_EQ( m.getElement( 5, 9 ), 0 );
+
+    EXPECT_EQ( m.getElement( 6, 0 ), 1 );
+    EXPECT_EQ( m.getElement( 6, 1 ), 0 );
+    EXPECT_EQ( m.getElement( 6, 2 ), 0 );
+    EXPECT_EQ( m.getElement( 6, 3 ), 0 );
+    EXPECT_EQ( m.getElement( 6, 4 ), 0 );
+    EXPECT_EQ( m.getElement( 6, 5 ), 0 );
+    EXPECT_EQ( m.getElement( 6, 6 ), 0 );
+    EXPECT_EQ( m.getElement( 6, 7 ), 0 );
+    EXPECT_EQ( m.getElement( 6, 8 ), 0 );
+    EXPECT_EQ( m.getElement( 6, 9 ), 0 );
+
+    EXPECT_EQ( m.getElement( 7, 0 ), 1 );
+    EXPECT_EQ( m.getElement( 7, 1 ), 0 );
+    EXPECT_EQ( m.getElement( 7, 2 ), 0 );
+    EXPECT_EQ( m.getElement( 7, 3 ), 0 );
+    EXPECT_EQ( m.getElement( 7, 4 ), 0 );
+    EXPECT_EQ( m.getElement( 7, 5 ), 0 );
+    EXPECT_EQ( m.getElement( 7, 6 ), 0 );
+    EXPECT_EQ( m.getElement( 7, 7 ), 0 );
+    EXPECT_EQ( m.getElement( 7, 8 ), 0 );
+    EXPECT_EQ( m.getElement( 7, 9 ), 0 );
+
+    EXPECT_EQ( m.getElement( 8, 0 ), 1 );
+    EXPECT_EQ( m.getElement( 8, 1 ), 1 );
+    EXPECT_EQ( m.getElement( 8, 2 ), 1 );
+    EXPECT_EQ( m.getElement( 8, 3 ), 1 );
+    EXPECT_EQ( m.getElement( 8, 4 ), 1 );
+    EXPECT_EQ( m.getElement( 8, 5 ), 1 );
+    EXPECT_EQ( m.getElement( 8, 6 ), 1 );
+    EXPECT_EQ( m.getElement( 8, 7 ), 1 );
+    EXPECT_EQ( m.getElement( 8, 8 ), 1 );
+    EXPECT_EQ( m.getElement( 8, 9 ), 1 );
+
+    EXPECT_EQ( m.getElement( 9, 0 ), 1 );
+    EXPECT_EQ( m.getElement( 9, 1 ), 1 );
+    EXPECT_EQ( m.getElement( 9, 2 ), 1 );
+    EXPECT_EQ( m.getElement( 9, 3 ), 1 );
+    EXPECT_EQ( m.getElement( 9, 4 ), 1 );
+    EXPECT_EQ( m.getElement( 9, 5 ), 1 );
+    EXPECT_EQ( m.getElement( 9, 6 ), 1 );
+    EXPECT_EQ( m.getElement( 9, 7 ), 1 );
+    EXPECT_EQ( m.getElement( 9, 8 ), 1 );
+    EXPECT_EQ( m.getElement( 9, 9 ), 1 );
+}
+
+template< typename Matrix >
+void test_VectorProduct()
+{
+    using RealType = typename Matrix::RealType;
+    using DeviceType = typename Matrix::DeviceType;
+    using IndexType = typename Matrix::IndexType;
+    using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >;
+
+/*
+ * Sets up the following 4x4 sparse matrix:
+ *
+ *    /  1  0  0  0 \
+ *    |  0  2  0  3 |
+ *    |  0  4  0  0 |
+ *    \  0  0  5  0 /
+ */
+
+    const IndexType m_rows_1 = 4;
+    const IndexType m_cols_1 = 4;
+
+    Matrix m_1;
+    m_1.reset();
+    m_1.setDimensions( m_rows_1, m_cols_1 );
+    typename Matrix::CompressedRowLengthsVector rowLengths_1;
+    rowLengths_1.setSize( m_rows_1 );
+    rowLengths_1.setElement( 0, 1 );
+    rowLengths_1.setElement( 1, 2 );
+    rowLengths_1.setElement( 2, 1 );
+    rowLengths_1.setElement( 3, 1 );
+    m_1.setCompressedRowLengths( rowLengths_1 );
+
+    m_1.setElement( 0, 0, 1 );      // 0th row
+
+    m_1.setElement( 1, 1, 1 );      // 1st row
+    m_1.setElement( 1, 3, 1 );
+
+    m_1.setElement( 2, 1, 1 );      // 2nd row
+
+    m_1.setElement( 3, 2, 1 );      // 3rd row
+
+    VectorType inVector_1;
+    inVector_1.setSize( m_cols_1 );
+    for( IndexType i = 0; i < inVector_1.getSize(); i++ )
+        inVector_1.setElement( i, 2 );
+
+    VectorType outVector_1;
+    outVector_1.setSize( m_rows_1 );
+    for( IndexType j = 0; j < outVector_1.getSize(); j++ )
+        outVector_1.setElement( j, 0 );
+
+
+    m_1.vectorProduct( inVector_1, outVector_1 );
+
+
+    EXPECT_EQ( outVector_1.getElement( 0 ),  2 );
+    EXPECT_EQ( outVector_1.getElement( 1 ), 10 );
+    EXPECT_EQ( outVector_1.getElement( 2 ),  8 );
+    EXPECT_EQ( outVector_1.getElement( 3 ), 10 );
+
+
+/*
+ * Sets up the following 4x4 sparse matrix:
+ *
+ *    /  1  2  3  0 \
+ *    |  0  0  0  4 |
+ *    |  5  6  7  0 |
+ *    \  0  8  0  0 /
+ */
+
+    const IndexType m_rows_2 = 4;
+    const IndexType m_cols_2 = 4;
+
+    Matrix m_2;
+    m_2.reset();
+    m_2.setDimensions( m_rows_2, m_cols_2 );
+    typename Matrix::CompressedRowLengthsVector rowLengths_2;
+    rowLengths_2.setSize( m_rows_2 );
+    rowLengths_2.setValue( 3 );
+    rowLengths_2.setElement( 1, 1 );
+    rowLengths_2.setElement( 3, 1 );
+    m_2.setCompressedRowLengths( rowLengths_2 );
+
+    for( IndexType i = 0; i < 3; i++ )   // 0th row
+        m_2.setElement( 0, i, 1 );
+
+    m_2.setElement( 1, 3, 1 );      // 1st row
+
+    for( IndexType i = 0; i < 3; i++ )   // 2nd row
+        m_2.setElement( 2, i, 1 );
+
+    for( IndexType i = 1; i < 2; i++ )       // 3rd row
+        m_2.setElement( 3, i, 1 );
+
+    VectorType inVector_2;
+    inVector_2.setSize( m_cols_2 );
+    for( IndexType i = 0; i < inVector_2.getSize(); i++ )
+        inVector_2.setElement( i, 2 );
+
+    VectorType outVector_2;
+    outVector_2.setSize( m_rows_2 );
+    for( IndexType j = 0; j < outVector_2.getSize(); j++ )
+        outVector_2.setElement( j, 0 );
+
+
+    m_2.vectorProduct( inVector_2, outVector_2 );
+
+
+    EXPECT_EQ( outVector_2.getElement( 0 ), 1 );
+    EXPECT_EQ( outVector_2.getElement( 1 ), 1 );
+    EXPECT_EQ( outVector_2.getElement( 2 ), 1 );
+    EXPECT_EQ( outVector_2.getElement( 3 ), 1 );
+
+
+/*
+ * Sets up the following 4x4 sparse matrix:
+ *
+ *    /  1  2  3  0 \
+ *    |  0  4  5  6 |
+ *    |  7  8  9  0 |
+ *    \  0 10 11 12 /
+ */
+
+    const IndexType m_rows_3 = 4;
+    const IndexType m_cols_3 = 4;
+
+    Matrix m_3;
+    m_3.reset();
+    m_3.setDimensions( m_rows_3, m_cols_3 );
+    typename Matrix::CompressedRowLengthsVector rowLengths_3;
+    rowLengths_3.setSize( m_rows_3 );
+    rowLengths_3.setValue( 3 );
+    m_3.setCompressedRowLengths( rowLengths_3 );
+
+    for( IndexType i = 0; i < 3; i++ )          // 0th row
+        m_3.setElement( 0, i, 1 );
+
+    for( IndexType i = 1; i < 4; i++ )
+        m_3.setElement( 1, i, 1 );      // 1st row
+
+    for( IndexType i = 0; i < 3; i++ )          // 2nd row
+        m_3.setElement( 2, i, 1 );
+
+    for( IndexType i = 1; i < 4; i++ )          // 3rd row
+        m_3.setElement( 3, i, 1 );
+
+    VectorType inVector_3;
+    inVector_3.setSize( m_cols_3 );
+    for( IndexType i = 0; i < inVector_3.getSize(); i++ )
+        inVector_3.setElement( i, 2 );
+
+    VectorType outVector_3;
+    outVector_3.setSize( m_rows_3 );
+    for( IndexType j = 0; j < outVector_3.getSize(); j++ )
+        outVector_3.setElement( j, 0 );
+
+
+    m_3.vectorProduct( inVector_3, outVector_3 );
+
+
+    EXPECT_EQ( outVector_3.getElement( 0 ), 1 );
+    EXPECT_EQ( outVector_3.getElement( 1 ), 1 );
+    EXPECT_EQ( outVector_3.getElement( 2 ), 1 );
+    EXPECT_EQ( outVector_3.getElement( 3 ), 1 );
+
+
+/*
+ * Sets up the following 8x8 sparse matrix:
+ *
+ *    /  1  2  3  0  0  4  0  0 \
+ *    |  0  5  6  7  8  0  0  0 |
+ *    |  9 10 11 12 13  0  0  0 |
+ *    |  0 14 15 16 17  0  0  0 |
+ *    |  0  0 18 19 20 21  0  0 |
+ *    |  0  0  0 22 23 24 25  0 |
+ *    | 26 27 28 29 30  0  0  0 |
+ *    \ 31 32 33 34 35  0  0  0 /
+ */
+
+    const IndexType m_rows_4 = 8;
+    const IndexType m_cols_4 = 8;
+
+    Matrix m_4;
+    m_4.reset();
+    m_4.setDimensions( m_rows_4, m_cols_4 );
+    typename Matrix::CompressedRowLengthsVector rowLengths_4;
+    rowLengths_4.setSize( m_rows_4 );
+    rowLengths_4.setValue( 4 );
+    rowLengths_4.setElement( 2, 5 );
+    rowLengths_4.setElement( 6, 5 );
+    rowLengths_4.setElement( 7, 5 );
+    m_4.setCompressedRowLengths( rowLengths_4 );
+
+    for( IndexType i = 0; i < 3; i++ )       // 0th row
+        m_4.setElement( 0, i, 1 );
+
+    m_4.setElement( 0, 5, 1 );
+
+    for( IndexType i = 1; i < 5; i++ )       // 1st row
+        m_4.setElement( 1, i, 1 );
+
+    for( IndexType i = 0; i < 5; i++ )       // 2nd row
+        m_4.setElement( 2, i, 1 );
+
+    for( IndexType i = 1; i < 5; i++ )       // 3rd row
+        m_4.setElement( 3, i, 1 );
+
+    for( IndexType i = 2; i < 6; i++ )       // 4th row
+        m_4.setElement( 4, i, 1 );
+
+    for( IndexType i = 3; i < 7; i++ )       // 5th row
+        m_4.setElement( 5, i, 1 );
+
+    for( IndexType i = 0; i < 5; i++ )       // 6th row
+        m_4.setElement( 6, i, 1 );
+
+    for( IndexType i = 0; i < 5; i++ )       // 7th row
+        m_4.setElement( 7, i, 1 );
+
+    VectorType inVector_4;
+    inVector_4.setSize( m_cols_4 );
+    for( IndexType i = 0; i < inVector_4.getSize(); i++ )
+        inVector_4.setElement( i, 2 );
+
+    VectorType outVector_4;
+    outVector_4.setSize( m_rows_4 );
+    for( IndexType j = 0; j < outVector_4.getSize(); j++ )
+        outVector_4.setElement( j, 0 );
+
+
+    m_4.vectorProduct( inVector_4, outVector_4 );
+
+
+    EXPECT_EQ( outVector_4.getElement( 0 ),  20 );
+    EXPECT_EQ( outVector_4.getElement( 1 ),  52 );
+    EXPECT_EQ( outVector_4.getElement( 2 ), 110 );
+    EXPECT_EQ( outVector_4.getElement( 3 ), 124 );
+    EXPECT_EQ( outVector_4.getElement( 4 ), 156 );
+    EXPECT_EQ( outVector_4.getElement( 5 ), 188 );
+    EXPECT_EQ( outVector_4.getElement( 6 ), 280 );
+    EXPECT_EQ( outVector_4.getElement( 7 ), 330 );
+
+
+   /*
+    * Sets up the following 8x8 sparse matrix:
+    *
+    *    /  1  2  3  0  4  5  0  1 \   6
+    *    |  0  6  0  7  0  0  0  1 |   3
+    *    |  0  8  9  0 10  0  0  1 |   4
+    *    |  0 11 12 13 14  0  0  1 |   5
+    *    |  0 15  0  0  0  0  0  1 |   2
+    *    |  0 16 17 18 19 20 21  1 |   7
+    *    | 22 23 24 25 26 27 28  1 |   8
+    *    \ 29 30 31 32 33 34 35 36 /   8
+    */
+
+    const IndexType m_rows_5 = 8;
+    const IndexType m_cols_5 = 8;
+
+    Matrix m_5;
+    m_5.reset();
+    m_5.setDimensions( m_rows_5, m_cols_5 );
+    typename Matrix::CompressedRowLengthsVector rowLengths_5;
+    rowLengths_5.setSize( m_rows_5 );
+    rowLengths_5.setElement(0, 6);
+    rowLengths_5.setElement(1, 3);
+    rowLengths_5.setElement(2, 4);
+    rowLengths_5.setElement(3, 5);
+    rowLengths_5.setElement(4, 2);
+    rowLengths_5.setElement(5, 7);
+    rowLengths_5.setElement(6, 8);
+    rowLengths_5.setElement(7, 8);
+    m_5.setCompressedRowLengths( rowLengths_5 );
+
+    RealType value_5 = 1;
+    for( IndexType i = 0; i < 3; i++ )   // 0th row
+        m_5.setElement( 0, i, 1 );
+
+    m_5.setElement( 0, 4, 1 );           // 0th row
+    m_5.setElement( 0, 5, 1 );
+
+    m_5.setElement( 1, 1, 1 );           // 1st row
+    m_5.setElement( 1, 3, 1 );
+
+    for( IndexType i = 1; i < 3; i++ )            // 2nd row
+        m_5.setElement( 2, i, 1 );
+
+    m_5.setElement( 2, 4, 1 );           // 2nd row
+
+    for( IndexType i = 1; i < 5; i++ )            // 3rd row
+        m_5.setElement( 3, i, 1 );
+
+    m_5.setElement( 4, 1, 1 );           // 4th row
+
+    for( IndexType i = 1; i < 7; i++ )            // 5th row
+        m_5.setElement( 5, i, 1 );
+
+    for( IndexType i = 0; i < 7; i++ )            // 6th row
+        m_5.setElement( 6, i, 1 );
+
+    for( IndexType i = 0; i < 8; i++ )            // 7th row
+        m_5.setElement( 7, i, 1 );
+
+    for( IndexType i = 0; i < 7; i++ )            // 1s at the end of rows
+        m_5.setElement( i, 7, 1);
+
+    VectorType inVector_5;
+    inVector_5.setSize( m_cols_5 );
+    for( IndexType i = 0; i < inVector_5.getSize(); i++ )
+        inVector_5.setElement( i, 2 );
+
+    VectorType outVector_5;
+    outVector_5.setSize( m_rows_5 );
+    for( IndexType j = 0; j < outVector_5.getSize(); j++ )
+        outVector_5.setElement( j, 0 );
+
+    m_5.vectorProduct( inVector_5, outVector_5 );
+
+    EXPECT_EQ( outVector_5.getElement( 0 ), 1 );
+    EXPECT_EQ( outVector_5.getElement( 1 ), 1 );
+    EXPECT_EQ( outVector_5.getElement( 2 ), 1 );
+    EXPECT_EQ( outVector_5.getElement( 3 ), 1 );
+    EXPECT_EQ( outVector_5.getElement( 4 ), 1 );
+    EXPECT_EQ( outVector_5.getElement( 5 ), 1 );
+    EXPECT_EQ( outVector_5.getElement( 6 ), 1 );
+    EXPECT_EQ( outVector_5.getElement( 7 ), 1 );
+}
+
+template< typename Matrix >
+void test_RowsReduction()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /*
+    * Sets up the following 8x8 sparse matrix:
+    *
+    *    /  1  2  3  0  4  5  0  1 \   6
+    *    |  0  6  0  7  0  0  0  1 |   3
+    *    |  0  8  9  0 10  0  0  1 |   4
+    *    |  0 11 12 13 14  0  0  1 |   5
+    *    |  0 15  0  0  0  0  0  1 |   2
+    *    |  0 16 17 18 19 20 21  1 |   7
+    *    | 22 23 24 25 26 27 28  1 |   8
+    *    \ 29 30 31 32 33 34 35 36 /   8
+    */
+
+   const IndexType rows = 8;
+   const IndexType cols = 8;
+
+   Matrix m;
+   m.setDimensions( rows, cols );
+   typename Matrix::RowsCapacitiesType rowsCapacities( rows );
+   //rowLengths.setSize( rows );
+   rowsCapacities.setElement(0, 6);
+   rowsCapacities.setElement(1, 3);
+   rowsCapacities.setElement(2, 4);
+   rowsCapacities.setElement(3, 5);
+   rowsCapacities.setElement(4, 2);
+   rowsCapacities.setElement(5, 7);
+   rowsCapacities.setElement(6, 8);
+   rowsCapacities.setElement(7, 8);
+   m.setCompressedRowLengths( rowsCapacities );
+
+   for( IndexType i = 0; i < 3; i++ )   // 0th row
+      m.setElement( 0, i, 1 );
+
+   m.setElement( 0, 4, 1 );           // 0th row
+   m.setElement( 0, 5, 1 );
+
+   m.setElement( 1, 1, 1 );           // 1st row
+   m.setElement( 1, 3, 1 );
+
+   for( IndexType i = 1; i < 3; i++ )            // 2nd row
+      m.setElement( 2, i, 1 );
+
+   m.setElement( 2, 4, 1 );           // 2nd row
+
+   for( IndexType i = 1; i < 5; i++ )            // 3rd row
+      m.setElement( 3, i, 1 );
+
+   m.setElement( 4, 1, 1 );           // 4th row
+
+   for( IndexType i = 1; i < 7; i++ )            // 5th row
+      m.setElement( 5, i, 1 );
+
+   for( IndexType i = 0; i < 7; i++ )            // 6th row
+      m.setElement( 6, i, 1 );
+
+   for( IndexType i = 0; i < 8; i++ )            // 7th row
+       m.setElement( 7, i, 1 );
+
+   for( IndexType i = 0; i < 7; i++ )            // 1s at the end of rows
+      m.setElement( i, 7, 1);
+
+   ////
+   // Compute number of non-zero elements in rows.
+   typename Matrix::RowsCapacitiesType rowLengths( rows );
+   auto rowLengths_view = rowLengths.getView();
+   auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, IndexType globalIdx, const RealType& value ) -> IndexType {
+      return ( value != 0.0 );
+   };
+   auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) {
+      aux += a;
+   };
+   auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable {
+      rowLengths_view[ rowIdx ] = value;
+   };
+   m.allRowsReduction( fetch, reduce, keep, 0 );
+   EXPECT_EQ( rowsCapacities, rowLengths );
+   m.getCompressedRowLengths( rowLengths );
+   EXPECT_EQ( rowsCapacities, rowLengths );
+
+   ////
+   // Compute max norm
+   TNL::Containers::Vector< RealType, DeviceType, IndexType > rowSums( rows );
+   auto rowSums_view = rowSums.getView();
+   auto max_fetch = [] __cuda_callable__ ( IndexType row, IndexType column, IndexType globalIdx, const RealType& value ) -> IndexType {
+      return abs( value );
+   };
+   auto max_reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) {
+      aux += a;
+   };
+   auto max_keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable {
+      rowSums_view[ rowIdx ] = value;
+   };
+   m.allRowsReduction( max_fetch, max_reduce, max_keep, 0 );
+   const RealType maxNorm = TNL::max( rowSums );
+   EXPECT_EQ( maxNorm, 260 ) ; // 29+30+31+32+33+34+35+36
+}
+
+template< typename Matrix >
+void test_PerformSORIteration()
+{
+    using RealType = typename Matrix::RealType;
+    using DeviceType = typename Matrix::DeviceType;
+    using IndexType = typename Matrix::IndexType;
+
+/*
+ * Sets up the following 4x4 sparse matrix:
+ *
+ *    /  4  1  0  0 \
+ *    |  1  4  1  0 |
+ *    |  0  1  4  1 |
+ *    \  0  0  1  4 /
+ */
+
+    const IndexType m_rows = 4;
+    const IndexType m_cols = 4;
+
+    Matrix m;
+    m.reset();
+    m.setDimensions( m_rows, m_cols );
+    typename Matrix::CompressedRowLengthsVector rowLengths;
+    rowLengths.setSize( m_rows );
+    rowLengths.setValue( 3 );
+    m.setCompressedRowLengths( rowLengths );
+
+    m.setElement( 0, 0, 4.0 );        // 0th row
+    m.setElement( 0, 1, 1.0);
+
+    m.setElement( 1, 0, 1.0 );        // 1st row
+    m.setElement( 1, 1, 4.0 );
+    m.setElement( 1, 2, 1.0 );
+
+    m.setElement( 2, 1, 1.0 );        // 2nd row
+    m.setElement( 2, 2, 4.0 );
+    m.setElement( 2, 3, 1.0 );
+
+    m.setElement( 3, 2, 1.0 );        // 3rd row
+    m.setElement( 3, 3, 4.0 );
+
+    RealType bVector [ 4 ] = { 1, 1, 1, 1 };
+    RealType xVector [ 4 ] = { 1, 1, 1, 1 };
+
+    IndexType row = 0;
+    RealType omega = 1;
+
+
+    m.performSORIteration( bVector, row++, xVector, omega);
+
+    EXPECT_EQ( xVector[ 0 ], 0.0 );
+    EXPECT_EQ( xVector[ 1 ], 1.0 );
+    EXPECT_EQ( xVector[ 2 ], 1.0 );
+    EXPECT_EQ( xVector[ 3 ], 1.0 );
+
+
+    m.performSORIteration( bVector, row++, xVector, omega);
+
+    EXPECT_EQ( xVector[ 0 ], 0.0 );
+    EXPECT_EQ( xVector[ 1 ], 0.0 );
+    EXPECT_EQ( xVector[ 2 ], 1.0 );
+    EXPECT_EQ( xVector[ 3 ], 1.0 );
+
+
+    m.performSORIteration( bVector, row++, xVector, omega);
+
+    EXPECT_EQ( xVector[ 0 ], 0.0 );
+    EXPECT_EQ( xVector[ 1 ], 0.0 );
+    EXPECT_EQ( xVector[ 2 ], 0.0 );
+    EXPECT_EQ( xVector[ 3 ], 1.0 );
+
+
+    m.performSORIteration( bVector, row++, xVector, omega);
+
+    EXPECT_EQ( xVector[ 0 ], 0.0 );
+    EXPECT_EQ( xVector[ 1 ], 0.0 );
+    EXPECT_EQ( xVector[ 2 ], 0.0 );
+    EXPECT_EQ( xVector[ 3 ], 0.25 );
+}
+
+// This test is only for AdEllpack
+template< typename Matrix >
+void test_OperatorEquals()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   if( std::is_same< DeviceType, TNL::Devices::Cuda >::value )
+       return;
+   else
+   {
+       using AdELL_host = TNL::Matrices::AdEllpack< RealType, TNL::Devices::Host, IndexType >;
+       using AdELL_cuda = TNL::Matrices::AdEllpack< RealType, TNL::Devices::Cuda, IndexType >;
+
+       /*
+        * Sets up the following 8x8 sparse matrix:
+        *
+        *    /  1  2  3  0  4  5  0  1 \   6
+        *    |  0  6  0  7  0  0  0  1 |   3
+        *    |  0  8  9  0 10  0  0  1 |   4
+        *    |  0 11 12 13 14  0  0  1 |   5
+        *    |  0 15  0  0  0  0  0  1 |   2
+        *    |  0 16 17 18 19 20 21  1 |   7
+        *    | 22 23 24 25 26 27 28  1 |   8
+        *    \ 29 30 31 32 33 34 35 36 /   8
+        */
+
+        const IndexType m_rows = 8;
+        const IndexType m_cols = 8;
+
+        AdELL_host m_host;
+
+        m_host.reset();
+        m_host.setDimensions( m_rows, m_cols );
+        typename AdELL_host::CompressedRowLengthsVector rowLengths;
+        rowLengths.setSize( m_rows );
+        rowLengths.setElement(0, 6);
+        rowLengths.setElement(1, 3);
+        rowLengths.setElement(2, 4);
+        rowLengths.setElement(3, 5);
+        rowLengths.setElement(4, 2);
+        rowLengths.setElement(5, 7);
+        rowLengths.setElement(6, 8);
+        rowLengths.setElement(7, 8);
+        m_host.setCompressedRowLengths( rowLengths );
+
+        RealType value = 1;
+        for( IndexType i = 0; i < 3; i++ )   // 0th row
+            m_host.setElement( 0, i, value++ );
+
+        m_host.setElement( 0, 4, value++ );           // 0th row
+        m_host.setElement( 0, 5, value++ );
+
+        m_host.setElement( 1, 1, value++ );           // 1st row
+        m_host.setElement( 1, 3, value++ );
+
+        for( IndexType i = 1; i < 3; i++ )            // 2nd row
+            m_host.setElement( 2, i, value++ );
+
+        m_host.setElement( 2, 4, value++ );           // 2nd row
+
+
+        for( IndexType i = 1; i < 5; i++ )            // 3rd row
+            m_host.setElement( 3, i, value++ );
+
+        m_host.setElement( 4, 1, value++ );           // 4th row
+
+        for( IndexType i = 1; i < 7; i++ )            // 5th row
+            m_host.setElement( 5, i, value++ );
+
+        for( IndexType i = 0; i < 7; i++ )            // 6th row
+            m_host.setElement( 6, i, value++ );
+
+        for( IndexType i = 0; i < 8; i++ )            // 7th row
+            m_host.setElement( 7, i, value++ );
+
+        for( IndexType i = 0; i < 7; i++ )            // 1s at the end or rows: 5, 6
+            m_host.setElement( i, 7, 1);
+
+        EXPECT_EQ( m_host.getElement( 0, 0 ),  1 );
+        EXPECT_EQ( m_host.getElement( 0, 1 ),  2 );
+        EXPECT_EQ( m_host.getElement( 0, 2 ),  3 );
+        EXPECT_EQ( m_host.getElement( 0, 3 ),  0 );
+        EXPECT_EQ( m_host.getElement( 0, 4 ),  4 );
+        EXPECT_EQ( m_host.getElement( 0, 5 ),  5 );
+        EXPECT_EQ( m_host.getElement( 0, 6 ),  0 );
+        EXPECT_EQ( m_host.getElement( 0, 7 ),  1 );
+
+        EXPECT_EQ( m_host.getElement( 1, 0 ),  0 );
+        EXPECT_EQ( m_host.getElement( 1, 1 ),  6 );
+        EXPECT_EQ( m_host.getElement( 1, 2 ),  0 );
+        EXPECT_EQ( m_host.getElement( 1, 3 ),  7 );
+        EXPECT_EQ( m_host.getElement( 1, 4 ),  0 );
+        EXPECT_EQ( m_host.getElement( 1, 5 ),  0 );
+        EXPECT_EQ( m_host.getElement( 1, 6 ),  0 );
+        EXPECT_EQ( m_host.getElement( 1, 7 ),  1 );
+
+        EXPECT_EQ( m_host.getElement( 2, 0 ),  0 );
+        EXPECT_EQ( m_host.getElement( 2, 1 ),  8 );
+        EXPECT_EQ( m_host.getElement( 2, 2 ),  9 );
+        EXPECT_EQ( m_host.getElement( 2, 3 ),  0 );
+        EXPECT_EQ( m_host.getElement( 2, 4 ), 10 );
+        EXPECT_EQ( m_host.getElement( 2, 5 ),  0 );
+        EXPECT_EQ( m_host.getElement( 2, 6 ),  0 );
+        EXPECT_EQ( m_host.getElement( 2, 7 ),  1 );
+
+        EXPECT_EQ( m_host.getElement( 3, 0 ),  0 );
+        EXPECT_EQ( m_host.getElement( 3, 1 ), 11 );
+        EXPECT_EQ( m_host.getElement( 3, 2 ), 12 );
+        EXPECT_EQ( m_host.getElement( 3, 3 ), 13 );
+        EXPECT_EQ( m_host.getElement( 3, 4 ), 14 );
+        EXPECT_EQ( m_host.getElement( 3, 5 ),  0 );
+        EXPECT_EQ( m_host.getElement( 3, 6 ),  0 );
+        EXPECT_EQ( m_host.getElement( 3, 7 ),  1 );
+
+        EXPECT_EQ( m_host.getElement( 4, 0 ),  0 );
+        EXPECT_EQ( m_host.getElement( 4, 1 ), 15 );
+        EXPECT_EQ( m_host.getElement( 4, 2 ),  0 );
+        EXPECT_EQ( m_host.getElement( 4, 3 ),  0 );
+        EXPECT_EQ( m_host.getElement( 4, 4 ),  0 );
+        EXPECT_EQ( m_host.getElement( 4, 5 ),  0 );
+        EXPECT_EQ( m_host.getElement( 4, 6 ),  0 );
+        EXPECT_EQ( m_host.getElement( 4, 7 ),  1 );
+
+        EXPECT_EQ( m_host.getElement( 5, 0 ),  0 );
+        EXPECT_EQ( m_host.getElement( 5, 1 ), 16 );
+        EXPECT_EQ( m_host.getElement( 5, 2 ), 17 );
+        EXPECT_EQ( m_host.getElement( 5, 3 ), 18 );
+        EXPECT_EQ( m_host.getElement( 5, 4 ), 19 );
+        EXPECT_EQ( m_host.getElement( 5, 5 ), 20 );
+        EXPECT_EQ( m_host.getElement( 5, 6 ), 21 );
+        EXPECT_EQ( m_host.getElement( 5, 7 ),  1 );
+
+        EXPECT_EQ( m_host.getElement( 6, 0 ), 22 );
+        EXPECT_EQ( m_host.getElement( 6, 1 ), 23 );
+        EXPECT_EQ( m_host.getElement( 6, 2 ), 24 );
+        EXPECT_EQ( m_host.getElement( 6, 3 ), 25 );
+        EXPECT_EQ( m_host.getElement( 6, 4 ), 26 );
+        EXPECT_EQ( m_host.getElement( 6, 5 ), 27 );
+        EXPECT_EQ( m_host.getElement( 6, 6 ), 28 );
+        EXPECT_EQ( m_host.getElement( 6, 7 ),  1 );
+
+        EXPECT_EQ( m_host.getElement( 7, 0 ), 29 );
+        EXPECT_EQ( m_host.getElement( 7, 1 ), 30 );
+        EXPECT_EQ( m_host.getElement( 7, 2 ), 31 );
+        EXPECT_EQ( m_host.getElement( 7, 3 ), 32 );
+        EXPECT_EQ( m_host.getElement( 7, 4 ), 33 );
+        EXPECT_EQ( m_host.getElement( 7, 5 ), 34 );
+        EXPECT_EQ( m_host.getElement( 7, 6 ), 35 );
+        EXPECT_EQ( m_host.getElement( 7, 7 ), 36 );
+
+        AdELL_cuda m_cuda;
+
+        // Copy the host matrix into the cuda matrix
+        m_cuda = m_host;
+
+        // Reset the host matrix
+        m_host.reset();
+
+        // Copy the cuda matrix back into the host matrix
+        m_host = m_cuda;
+
+        // Check the newly created double-copy host matrix
+        EXPECT_EQ( m_host.getElement( 0, 0 ),  1 );
+        EXPECT_EQ( m_host.getElement( 0, 1 ),  2 );
+        EXPECT_EQ( m_host.getElement( 0, 2 ),  3 );
+        EXPECT_EQ( m_host.getElement( 0, 3 ),  0 );
+        EXPECT_EQ( m_host.getElement( 0, 4 ),  4 );
+        EXPECT_EQ( m_host.getElement( 0, 5 ),  5 );
+        EXPECT_EQ( m_host.getElement( 0, 6 ),  0 );
+        EXPECT_EQ( m_host.getElement( 0, 7 ),  1 );
+
+        EXPECT_EQ( m_host.getElement( 1, 0 ),  0 );
+        EXPECT_EQ( m_host.getElement( 1, 1 ),  6 );
+        EXPECT_EQ( m_host.getElement( 1, 2 ),  0 );
+        EXPECT_EQ( m_host.getElement( 1, 3 ),  7 );
+        EXPECT_EQ( m_host.getElement( 1, 4 ),  0 );
+        EXPECT_EQ( m_host.getElement( 1, 5 ),  0 );
+        EXPECT_EQ( m_host.getElement( 1, 6 ),  0 );
+        EXPECT_EQ( m_host.getElement( 1, 7 ),  1 );
+
+        EXPECT_EQ( m_host.getElement( 2, 0 ),  0 );
+        EXPECT_EQ( m_host.getElement( 2, 1 ),  8 );
+        EXPECT_EQ( m_host.getElement( 2, 2 ),  9 );
+        EXPECT_EQ( m_host.getElement( 2, 3 ),  0 );
+        EXPECT_EQ( m_host.getElement( 2, 4 ), 10 );
+        EXPECT_EQ( m_host.getElement( 2, 5 ),  0 );
+        EXPECT_EQ( m_host.getElement( 2, 6 ),  0 );
+        EXPECT_EQ( m_host.getElement( 2, 7 ),  1 );
+
+        EXPECT_EQ( m_host.getElement( 3, 0 ),  0 );
+        EXPECT_EQ( m_host.getElement( 3, 1 ), 11 );
+        EXPECT_EQ( m_host.getElement( 3, 2 ), 12 );
+        EXPECT_EQ( m_host.getElement( 3, 3 ), 13 );
+        EXPECT_EQ( m_host.getElement( 3, 4 ), 14 );
+        EXPECT_EQ( m_host.getElement( 3, 5 ),  0 );
+        EXPECT_EQ( m_host.getElement( 3, 6 ),  0 );
+        EXPECT_EQ( m_host.getElement( 3, 7 ),  1 );
+
+        EXPECT_EQ( m_host.getElement( 4, 0 ),  0 );
+        EXPECT_EQ( m_host.getElement( 4, 1 ), 15 );
+        EXPECT_EQ( m_host.getElement( 4, 2 ),  0 );
+        EXPECT_EQ( m_host.getElement( 4, 3 ),  0 );
+        EXPECT_EQ( m_host.getElement( 4, 4 ),  0 );
+        EXPECT_EQ( m_host.getElement( 4, 5 ),  0 );
+        EXPECT_EQ( m_host.getElement( 4, 6 ),  0 );
+        EXPECT_EQ( m_host.getElement( 4, 7 ),  1 );
+
+        EXPECT_EQ( m_host.getElement( 5, 0 ),  0 );
+        EXPECT_EQ( m_host.getElement( 5, 1 ), 16 );
+        EXPECT_EQ( m_host.getElement( 5, 2 ), 17 );
+        EXPECT_EQ( m_host.getElement( 5, 3 ), 18 );
+        EXPECT_EQ( m_host.getElement( 5, 4 ), 19 );
+        EXPECT_EQ( m_host.getElement( 5, 5 ), 20 );
+        EXPECT_EQ( m_host.getElement( 5, 6 ), 21 );
+        EXPECT_EQ( m_host.getElement( 5, 7 ),  1 );
+
+        EXPECT_EQ( m_host.getElement( 6, 0 ), 22 );
+        EXPECT_EQ( m_host.getElement( 6, 1 ), 23 );
+        EXPECT_EQ( m_host.getElement( 6, 2 ), 24 );
+        EXPECT_EQ( m_host.getElement( 6, 3 ), 25 );
+        EXPECT_EQ( m_host.getElement( 6, 4 ), 26 );
+        EXPECT_EQ( m_host.getElement( 6, 5 ), 27 );
+        EXPECT_EQ( m_host.getElement( 6, 6 ), 28 );
+        EXPECT_EQ( m_host.getElement( 6, 7 ),  1 );
+
+        EXPECT_EQ( m_host.getElement( 7, 0 ), 29 );
+        EXPECT_EQ( m_host.getElement( 7, 1 ), 30 );
+        EXPECT_EQ( m_host.getElement( 7, 2 ), 31 );
+        EXPECT_EQ( m_host.getElement( 7, 3 ), 32 );
+        EXPECT_EQ( m_host.getElement( 7, 4 ), 33 );
+        EXPECT_EQ( m_host.getElement( 7, 5 ), 34 );
+        EXPECT_EQ( m_host.getElement( 7, 6 ), 35 );
+        EXPECT_EQ( m_host.getElement( 7, 7 ), 36 );
+
+        // Try vectorProduct with copied cuda matrix to see if it works correctly.
+        using VectorType = TNL::Containers::Vector< RealType, TNL::Devices::Cuda, IndexType >;
+
+        VectorType inVector;
+        inVector.setSize( m_cols );
+        for( IndexType i = 0; i < inVector.getSize(); i++ )
+            inVector.setElement( i, 2 );
+
+        VectorType outVector;
+        outVector.setSize( m_rows );
+        for( IndexType j = 0; j < outVector.getSize(); j++ )
+            outVector.setElement( j, 0 );
+
+        m_cuda.vectorProduct( inVector, outVector );
+
+        EXPECT_EQ( outVector.getElement( 0 ),  32 );
+        EXPECT_EQ( outVector.getElement( 1 ),  28 );
+        EXPECT_EQ( outVector.getElement( 2 ),  56 );
+        EXPECT_EQ( outVector.getElement( 3 ), 102 );
+        EXPECT_EQ( outVector.getElement( 4 ),  32 );
+        EXPECT_EQ( outVector.getElement( 5 ), 224 );
+        EXPECT_EQ( outVector.getElement( 6 ), 352 );
+        EXPECT_EQ( outVector.getElement( 7 ), 520 );
+   }
+}
+
+template< typename Matrix >
+void test_SaveAndLoad( const char* filename )
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /*
+    * Sets up the following 4x4 sparse matrix:
+    *
+    *    /  1  2  3  0 \
+    *    |  0  4  0  5 |
+    *    |  6  7  8  0 |
+    *    \  0  9 10 11 /
+    */
+
+    const IndexType m_rows = 4;
+    const IndexType m_cols = 4;
+
+    Matrix savedMatrix;
+    savedMatrix.reset();
+    savedMatrix.setDimensions( m_rows, m_cols );
+    typename Matrix::CompressedRowLengthsVector rowLengths;
+    rowLengths.setSize( m_rows );
+    rowLengths.setValue( 3 );
+    savedMatrix.setCompressedRowLengths( rowLengths );
+
+    for( IndexType i = 0; i < m_cols - 1; i++ )   // 0th row
+        savedMatrix.setElement( 0, i, 1 );
+
+    savedMatrix.setElement( 1, 1, 1 );
+    savedMatrix.setElement( 1, 3, 1 );      // 1st row
+
+    for( IndexType i = 0; i < m_cols - 1; i++ )   // 2nd row
+        savedMatrix.setElement( 2, i, 1 );
+
+    for( IndexType i = 1; i < m_cols; i++ )       // 3rd row
+        savedMatrix.setElement( 3, i, 1 );
+
+    ASSERT_NO_THROW( savedMatrix.save( filename ) );
+
+    Matrix loadedMatrix;
+    loadedMatrix.reset();
+    loadedMatrix.setDimensions( m_rows, m_cols );
+    typename Matrix::CompressedRowLengthsVector rowLengths2;
+    rowLengths2.setSize( m_rows );
+    rowLengths2.setValue( 3 );
+    loadedMatrix.setCompressedRowLengths( rowLengths2 );
+
+
+    ASSERT_NO_THROW( loadedMatrix.load( filename ) );
+
+
+    EXPECT_EQ( savedMatrix.getElement( 0, 0 ), loadedMatrix.getElement( 0, 0 ) );
+    EXPECT_EQ( savedMatrix.getElement( 0, 1 ), loadedMatrix.getElement( 0, 1 ) );
+    EXPECT_EQ( savedMatrix.getElement( 0, 2 ), loadedMatrix.getElement( 0, 2 ) );
+    EXPECT_EQ( savedMatrix.getElement( 0, 3 ), loadedMatrix.getElement( 0, 3 ) );
+
+    EXPECT_EQ( savedMatrix.getElement( 1, 0 ), loadedMatrix.getElement( 1, 0 ) );
+    EXPECT_EQ( savedMatrix.getElement( 1, 1 ), loadedMatrix.getElement( 1, 1 ) );
+    EXPECT_EQ( savedMatrix.getElement( 1, 2 ), loadedMatrix.getElement( 1, 2 ) );
+    EXPECT_EQ( savedMatrix.getElement( 1, 3 ), loadedMatrix.getElement( 1, 3 ) );
+
+    EXPECT_EQ( savedMatrix.getElement( 2, 0 ), loadedMatrix.getElement( 2, 0 ) );
+    EXPECT_EQ( savedMatrix.getElement( 2, 1 ), loadedMatrix.getElement( 2, 1 ) );
+    EXPECT_EQ( savedMatrix.getElement( 2, 2 ), loadedMatrix.getElement( 2, 2 ) );
+    EXPECT_EQ( savedMatrix.getElement( 2, 3 ), loadedMatrix.getElement( 2, 3 ) );
+
+    EXPECT_EQ( savedMatrix.getElement( 3, 0 ), loadedMatrix.getElement( 3, 0 ) );
+    EXPECT_EQ( savedMatrix.getElement( 3, 1 ), loadedMatrix.getElement( 3, 1 ) );
+    EXPECT_EQ( savedMatrix.getElement( 3, 2 ), loadedMatrix.getElement( 3, 2 ) );
+    EXPECT_EQ( savedMatrix.getElement( 3, 3 ), loadedMatrix.getElement( 3, 3 ) );
+
+    EXPECT_EQ( savedMatrix.getElement( 0, 0 ),  1 );
+    EXPECT_EQ( savedMatrix.getElement( 0, 1 ),  2 );
+    EXPECT_EQ( savedMatrix.getElement( 0, 2 ),  3 );
+    EXPECT_EQ( savedMatrix.getElement( 0, 3 ),  0 );
+
+    EXPECT_EQ( savedMatrix.getElement( 1, 0 ),  0 );
+    EXPECT_EQ( savedMatrix.getElement( 1, 1 ),  4 );
+    EXPECT_EQ( savedMatrix.getElement( 1, 2 ),  0 );
+    EXPECT_EQ( savedMatrix.getElement( 1, 3 ),  5 );
+
+    EXPECT_EQ( savedMatrix.getElement( 2, 0 ),  6 );
+    EXPECT_EQ( savedMatrix.getElement( 2, 1 ),  7 );
+    EXPECT_EQ( savedMatrix.getElement( 2, 2 ),  8 );
+    EXPECT_EQ( savedMatrix.getElement( 2, 3 ),  0 );
+
+    EXPECT_EQ( savedMatrix.getElement( 3, 0 ),  0 );
+    EXPECT_EQ( savedMatrix.getElement( 3, 1 ),  9 );
+    EXPECT_EQ( savedMatrix.getElement( 3, 2 ), 10 );
+    EXPECT_EQ( savedMatrix.getElement( 3, 3 ), 11 );
+
+    EXPECT_EQ( std::remove( filename ), 0 );
+}
+
+template< typename Matrix >
+void test_Print()
+{
+    using RealType = typename Matrix::RealType;
+    using DeviceType = typename Matrix::DeviceType;
+    using IndexType = typename Matrix::IndexType;
+
+/*
+ * Sets up the following 5x4 sparse matrix:
+ *
+ *    /  1  2  3  0 \
+ *    |  0  0  0  4 |
+ *    |  5  6  7  0 |
+ *    |  0  8  9 10 |
+ *    \  0  0 11 12 /
+ */
+
+    const IndexType m_rows = 5;
+    const IndexType m_cols = 4;
+
+    Matrix m;
+    m.reset();
+    m.setDimensions( m_rows, m_cols );
+    typename Matrix::CompressedRowLengthsVector rowLengths;
+    rowLengths.setSize( m_rows );
+    rowLengths.setValue( 3 );
+    m.setCompressedRowLengths( rowLengths );
+
+    RealType value = 1;
+    for( IndexType i = 0; i < m_cols - 1; i++ )   // 0th row
+        m.setElement( 0, i, 1 );
+
+    m.setElement( 1, 3, 1 );      // 1st row
+
+    for( IndexType i = 0; i < m_cols - 1; i++ )   // 2nd row
+        m.setElement( 2, i, 1 );
+
+    for( IndexType i = 1; i < m_cols; i++ )       // 3rd row
+        m.setElement( 3, i, 1 );
+
+    for( IndexType i = 2; i < m_cols; i++ )       // 4th row
+        m.setElement( 4, i, 1 );
+
+    #include <sstream>
+    std::stringstream printed;
+    std::stringstream couted;
+
+    //change the underlying buffer and save the old buffer
+    auto old_buf = std::cout.rdbuf(printed.rdbuf());
+
+    m.print( std::cout ); //all the std::cout goes to ss
+
+    std::cout.rdbuf(old_buf); //reset
+
+    couted << "Row: 0 ->  Col:0->1	 Col:1->1	 Col:2->1\t\n"
+               "Row: 1 ->  Col:3->1\t\n"
+               "Row: 2 ->  Col:0->1	 Col:1->1	 Col:2->1\t\n"
+               "Row: 3 ->  Col:1->1	 Col:2->1	 Col:3->1\t\n"
+               "Row: 4 ->  Col:2->1	 Col:3->1\t\n";
+
+
+    EXPECT_EQ( printed.str(), couted.str() );
+}
+
+#endif
diff --git a/src/UnitTests/Matrices/CMakeLists.txt b/src/UnitTests/Matrices/CMakeLists.txt
index b19c8b705..4b12e81a3 100644
--- a/src/UnitTests/Matrices/CMakeLists.txt
+++ b/src/UnitTests/Matrices/CMakeLists.txt
@@ -28,6 +28,9 @@ IF( BUILD_CUDA )
    CUDA_ADD_EXECUTABLE( SparseMatrixTest_SlicedEllpack_segments SparseMatrixTest_SlicedEllpack_segments.cu OPTIONS ${CXX_TESTS_FLAGS} )
    TARGET_LINK_LIBRARIES( SparseMatrixTest_SlicedEllpack_segments ${GTEST_BOTH_LIBRARIES} )
 
+   CUDA_ADD_EXECUTABLE( BinarySparseMatrixTest BinarySparseMatrixTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( BinarySparseMatrixTest ${GTEST_BOTH_LIBRARIES} )
+
 ELSE(  BUILD_CUDA )
    ADD_EXECUTABLE( SparseMatrixTest SparseMatrixTest.cpp )
    TARGET_COMPILE_OPTIONS( SparseMatrixTest PRIVATE ${CXX_TESTS_FLAGS} )
@@ -65,6 +68,10 @@ ELSE(  BUILD_CUDA )
    TARGET_COMPILE_OPTIONS( SparseMatrixTest_SlicedEllpack_segments PRIVATE ${CXX_TESTS_FLAGS} )
    TARGET_LINK_LIBRARIES( SparseMatrixTest_SlicedEllpack_segments ${GTEST_BOTH_LIBRARIES} )
 
+   ADD_EXECUTABLE( BinarySparseMatrixTest BinarySparseMatrixTest.cpp )
+   TARGET_COMPILE_OPTIONS( BinarySparseMatrixTest PRIVATE ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( BinarySparseMatrixTest ${GTEST_BOTH_LIBRARIES} )
+
 ENDIF( BUILD_CUDA )
 
 ADD_TEST( SparseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} )
@@ -76,6 +83,7 @@ ADD_TEST( MultidiagonalMatrixTest ${EXECUTABLE_OUTPUT_PATH}/MultidiagonalMatrixT
 ADD_TEST( SparseMatrixTest_CSR_segments ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_CSR_segments${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( SparseMatrixTest_Ellpack_segments ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_Ellpack_segments${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( SparseMatrixTest_SlicedEllpack_segments ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_SlicedEllpack_segments${CMAKE_EXECUTABLE_SUFFIX} )
+ADD_TEST( BinarySparseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/BinarySparseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} )
 
 if( ${BUILD_MPI} )
    if( BUILD_CUDA )
-- 
GitLab


From c7940ae7464d2e6dc128d3661e253daea982a133 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Fri, 31 Jan 2020 16:46:00 +0100
Subject: [PATCH 121/179] Debugging binary sparse matrix.

---
 src/TNL/Matrices/Multidiagonal.h | 4 ++--
 src/TNL/Matrices/Tridiagonal.h   | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/TNL/Matrices/Multidiagonal.h b/src/TNL/Matrices/Multidiagonal.h
index c93dc7d9c..3b92d1db1 100644
--- a/src/TNL/Matrices/Multidiagonal.h
+++ b/src/TNL/Matrices/Multidiagonal.h
@@ -35,8 +35,8 @@ class Multidiagonal : public Matrix< Real, Device, Index, RealAllocator >
       using RealAllocatorType = RealAllocator;
       using IndexAllocatorType = IndexAllocator;
       using BaseType = Matrix< Real, Device, Index, RealAllocator >;
-      using ValuesHolderType = typename BaseType::ValuesHolderType;
-      using ValuesViewType = typename ValuesHolderType::ViewType;
+      using ValuesVectorType = typename BaseType::ValuesVectorType;
+      using ValuesViewType = typename ValuesVectorType::ViewType;
       using IndexerType = details::MultidiagonalMatrixIndexer< IndexType, RowMajorOrder >;
       using DiagonalsShiftsType = Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocatorType >;
       using DiagonalsShiftsView = typename DiagonalsShiftsType::ViewType;
diff --git a/src/TNL/Matrices/Tridiagonal.h b/src/TNL/Matrices/Tridiagonal.h
index b65cfb527..029793681 100644
--- a/src/TNL/Matrices/Tridiagonal.h
+++ b/src/TNL/Matrices/Tridiagonal.h
@@ -34,8 +34,8 @@ class Tridiagonal : public Matrix< Real, Device, Index, RealAllocator >
       using RealAllocatorType = RealAllocator;
       using BaseType = Matrix< Real, Device, Index, RealAllocator >;
       using IndexerType = details::TridiagonalMatrixIndexer< IndexType, RowMajorOrder >;
-      using ValuesHolderType = typename BaseType::ValuesHolderType;
-      using ValuesViewType = typename ValuesHolderType::ViewType;
+      using ValuesVectorType = typename BaseType::ValuesVectorType;
+      using ValuesViewType = typename ValuesVectorType::ViewType;
       using ViewType = TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >;
       using ConstViewType = TridiagonalMatrixView< typename std::add_const< Real >::type, Device, Index, RowMajorOrder >;
       using RowView = TridiagonalMatrixRowView< ValuesViewType, IndexerType >;
-- 
GitLab


From c63e73869e5cab71023ef378814a27f8ec869dfb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Mon, 3 Feb 2020 17:13:32 +0100
Subject: [PATCH 122/179] Fixed asserts in Array/ArrayView::operator[].

---
 src/TNL/Containers/Array.hpp     | 8 ++++----
 src/TNL/Containers/ArrayView.hpp | 8 ++++----
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/TNL/Containers/Array.hpp b/src/TNL/Containers/Array.hpp
index 2a60986f5..1b8d6291f 100644
--- a/src/TNL/Containers/Array.hpp
+++ b/src/TNL/Containers/Array.hpp
@@ -510,9 +510,9 @@ Array< Value, Device, Index, Allocator >::
 operator[]( const Index& i )
 {
 #ifdef __CUDA_ARCH__
-   TNL_ASSERT( ( std::is_same< Device, Devices::Cuda >::value ), "Attempt to access data not allocated on CUDA device from CUDA device." );
+   TNL_ASSERT_TRUE( ( std::is_same< Device, Devices::Cuda >::value ), "Attempt to access data not allocated on CUDA device from CUDA device." );
 #else
-   TNL_ASSERT( ( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Sequential >::value ),
+   TNL_ASSERT_TRUE( ( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Sequential >::value ),
                "Attempt to access data not allocated on the host from the host." );
 #endif
    TNL_ASSERT_GE( i, (Index) 0, "Element index must be non-negative." );
@@ -530,9 +530,9 @@ Array< Value, Device, Index, Allocator >::
 operator[]( const Index& i ) const
 {
 #ifdef __CUDA_ARCH__
-   TNL_ASSERT( ( std::is_same< Device, Devices::Cuda >::value ), "Attempt to access data not allocated on CUDA device from CUDA device." );
+   TNL_ASSERT_TRUE( ( std::is_same< Device, Devices::Cuda >::value ), "Attempt to access data not allocated on CUDA device from CUDA device." );
 #else
-   TNL_ASSERT( ( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Sequential >::value ),
+   TNL_ASSERT_TRUE( ( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Sequential >::value ),
                "Attempt to access data not allocated on the host from the host." );
 #endif
    TNL_ASSERT_GE( i, (Index) 0, "Element index must be non-negative." );
diff --git a/src/TNL/Containers/ArrayView.hpp b/src/TNL/Containers/ArrayView.hpp
index 81e143ac2..3373fc6ab 100644
--- a/src/TNL/Containers/ArrayView.hpp
+++ b/src/TNL/Containers/ArrayView.hpp
@@ -253,9 +253,9 @@ Value& ArrayView< Value, Device, Index >::
 operator[]( Index i )
 {
 #ifdef __CUDA_ARCH__
-   TNL_ASSERT( ( std::is_same< Device, Devices::Cuda >::value ), "Attempt to access data not allocated on CUDA device from CUDA device." );
+   TNL_ASSERT_TRUE( ( std::is_same< Device, Devices::Cuda >::value ), "Attempt to access data not allocated on CUDA device from CUDA device." );
 #else
-   TNL_ASSERT( ( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Sequential >::value ),
+   TNL_ASSERT_TRUE( ( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Sequential >::value ),
                "Attempt to access data not allocated on the host from the host." );
 #endif
    TNL_ASSERT_GE( i, 0, "Element index must be non-negative." );
@@ -272,9 +272,9 @@ Value& ArrayView< Value, Device, Index >::
 operator[]( Index i ) const
 {
 #ifdef __CUDA_ARCH__
-   TNL_ASSERT( ( std::is_same< Device, Devices::Cuda >::value ), "Attempt to access data not allocated on CUDA device from CUDA device." );
+   TNL_ASSERT_TRUE( ( std::is_same< Device, Devices::Cuda >::value ), "Attempt to access data not allocated on CUDA device from CUDA device." );
 #else
-   TNL_ASSERT( ( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Sequential >::value ),
+   TNL_ASSERT_TRUE( ( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Sequential >::value ),
                "Attempt to access data not allocated on the host from the host." );
 #endif
    TNL_ASSERT_GE( i, 0, "Element index must be non-negative." );
-- 
GitLab


From cfe193b9ddfd087ebc0c601faabf53a37a788294 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Mon, 3 Feb 2020 21:32:40 +0100
Subject: [PATCH 123/179] Reverting fixes of asserts in
 Array/ArrayView::operator[] since it is not accepted by nvcc.

---
 src/TNL/Containers/Array.hpp     | 8 ++++----
 src/TNL/Containers/ArrayView.hpp | 8 ++++----
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/TNL/Containers/Array.hpp b/src/TNL/Containers/Array.hpp
index 1b8d6291f..2a60986f5 100644
--- a/src/TNL/Containers/Array.hpp
+++ b/src/TNL/Containers/Array.hpp
@@ -510,9 +510,9 @@ Array< Value, Device, Index, Allocator >::
 operator[]( const Index& i )
 {
 #ifdef __CUDA_ARCH__
-   TNL_ASSERT_TRUE( ( std::is_same< Device, Devices::Cuda >::value ), "Attempt to access data not allocated on CUDA device from CUDA device." );
+   TNL_ASSERT( ( std::is_same< Device, Devices::Cuda >::value ), "Attempt to access data not allocated on CUDA device from CUDA device." );
 #else
-   TNL_ASSERT_TRUE( ( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Sequential >::value ),
+   TNL_ASSERT( ( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Sequential >::value ),
                "Attempt to access data not allocated on the host from the host." );
 #endif
    TNL_ASSERT_GE( i, (Index) 0, "Element index must be non-negative." );
@@ -530,9 +530,9 @@ Array< Value, Device, Index, Allocator >::
 operator[]( const Index& i ) const
 {
 #ifdef __CUDA_ARCH__
-   TNL_ASSERT_TRUE( ( std::is_same< Device, Devices::Cuda >::value ), "Attempt to access data not allocated on CUDA device from CUDA device." );
+   TNL_ASSERT( ( std::is_same< Device, Devices::Cuda >::value ), "Attempt to access data not allocated on CUDA device from CUDA device." );
 #else
-   TNL_ASSERT_TRUE( ( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Sequential >::value ),
+   TNL_ASSERT( ( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Sequential >::value ),
                "Attempt to access data not allocated on the host from the host." );
 #endif
    TNL_ASSERT_GE( i, (Index) 0, "Element index must be non-negative." );
diff --git a/src/TNL/Containers/ArrayView.hpp b/src/TNL/Containers/ArrayView.hpp
index 3373fc6ab..81e143ac2 100644
--- a/src/TNL/Containers/ArrayView.hpp
+++ b/src/TNL/Containers/ArrayView.hpp
@@ -253,9 +253,9 @@ Value& ArrayView< Value, Device, Index >::
 operator[]( Index i )
 {
 #ifdef __CUDA_ARCH__
-   TNL_ASSERT_TRUE( ( std::is_same< Device, Devices::Cuda >::value ), "Attempt to access data not allocated on CUDA device from CUDA device." );
+   TNL_ASSERT( ( std::is_same< Device, Devices::Cuda >::value ), "Attempt to access data not allocated on CUDA device from CUDA device." );
 #else
-   TNL_ASSERT_TRUE( ( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Sequential >::value ),
+   TNL_ASSERT( ( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Sequential >::value ),
                "Attempt to access data not allocated on the host from the host." );
 #endif
    TNL_ASSERT_GE( i, 0, "Element index must be non-negative." );
@@ -272,9 +272,9 @@ Value& ArrayView< Value, Device, Index >::
 operator[]( Index i ) const
 {
 #ifdef __CUDA_ARCH__
-   TNL_ASSERT_TRUE( ( std::is_same< Device, Devices::Cuda >::value ), "Attempt to access data not allocated on CUDA device from CUDA device." );
+   TNL_ASSERT( ( std::is_same< Device, Devices::Cuda >::value ), "Attempt to access data not allocated on CUDA device from CUDA device." );
 #else
-   TNL_ASSERT_TRUE( ( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Sequential >::value ),
+   TNL_ASSERT( ( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Sequential >::value ),
                "Attempt to access data not allocated on the host from the host." );
 #endif
    TNL_ASSERT_GE( i, 0, "Element index must be non-negative." );
-- 
GitLab


From 9c3bbace343101a6594011d947568800ab928af5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Mon, 3 Feb 2020 21:33:17 +0100
Subject: [PATCH 124/179] Debugging binary sparse matrix.

---
 src/TNL/Matrices/MatrixType.h                 |   36 +
 src/TNL/Matrices/SparseMatrix.h               |    2 +-
 src/TNL/Matrices/SparseMatrixRowView.hpp      |    2 +
 src/TNL/Matrices/SparseMatrixView.h           |    2 +-
 src/TNL/Matrices/SparseMatrixView.hpp         |   42 +-
 .../Matrices/BinarySparseMatrixTest.h         |   30 +-
 .../Matrices/BinarySparseMatrixTest.hpp       | 1447 ++++++++---------
 src/UnitTests/Matrices/SparseMatrixTest.hpp   |   21 -
 8 files changed, 790 insertions(+), 792 deletions(-)

diff --git a/src/TNL/Matrices/MatrixType.h b/src/TNL/Matrices/MatrixType.h
index c5c8f6375..8d4cfe7ba 100644
--- a/src/TNL/Matrices/MatrixType.h
+++ b/src/TNL/Matrices/MatrixType.h
@@ -13,15 +13,51 @@
 namespace TNL {
    namespace Matrices {
 
+template< bool Symmetric,
+          bool Binary >
+struct MatrixType
+{
+   static constexpr bool isSymmetric() { return Symmetric; }
+
+   static constexpr bool isBinary() { return Binary; }
+
+};
+
 struct GeneralMatrix
 {
    static constexpr bool isSymmetric() { return false; }
+
+   static constexpr bool isBinary() { return false; }
 };
 
 struct SymmetricMatrix
 {
    static constexpr bool isSymmetric() { return true; }
+
+   static constexpr bool isBinary() { return false; }
+};
+
+struct BinaryMatrix
+{
+   static constexpr bool isSymmetric() { return false; }
+
+   static constexpr bool isBinary() { return true; }
 };
 
+struct BinarySymmetricMatrix
+{
+   static constexpr bool isSymmetric() { return false; }
+
+   static constexpr bool isBinary() { return true; }
+};
+
+struct SymmetricBinaryMatrix
+{
+   static constexpr bool isSymmetric() { return false; }
+
+   static constexpr bool isBinary() { return true; }
+};
+
+
    } //namespace Matrices
 } //namespace TNL
\ No newline at end of file
diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h
index 49e3b45bb..7072ce3c4 100644
--- a/src/TNL/Matrices/SparseMatrix.h
+++ b/src/TNL/Matrices/SparseMatrix.h
@@ -32,7 +32,7 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
 {
    public:
       static constexpr bool isSymmetric() { return MatrixType::isSymmetric(); };
-      static constexpr bool isBinary() { return std::is_same< Real, bool >::value; };
+      static constexpr bool isBinary() { return MatrixType::isBinary(); };
 
       using RealType = Real;
       template< typename Device_, typename Index_, typename IndexAllocator_ >
diff --git a/src/TNL/Matrices/SparseMatrixRowView.hpp b/src/TNL/Matrices/SparseMatrixRowView.hpp
index ab5b4622b..67d0845d4 100644
--- a/src/TNL/Matrices/SparseMatrixRowView.hpp
+++ b/src/TNL/Matrices/SparseMatrixRowView.hpp
@@ -72,6 +72,7 @@ SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView, isBinary_ >::
 getValue( const IndexType localIdx ) const -> const RealType&
 {
    TNL_ASSERT_LT( localIdx, this->getSize(), "Local index exceeds matrix row capacity." );
+   TNL_ASSERT_FALSE( isBinary(), "Cannot call this method for binary matrix row." );
    return values[ segmentView.getGlobalIndex( localIdx ) ];
 }
 
@@ -84,6 +85,7 @@ SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView, isBinary_ >::
 getValue( const IndexType localIdx ) -> RealType&
 {
    TNL_ASSERT_LT( localIdx, this->getSize(), "Local index exceeds matrix row capacity." );
+   TNL_ASSERT_FALSE( isBinary(), "Cannot call this method for binary matrix row." );
    return values[ segmentView.getGlobalIndex( localIdx ) ];
 }
 
diff --git a/src/TNL/Matrices/SparseMatrixView.h b/src/TNL/Matrices/SparseMatrixView.h
index d8c6eb63f..2756c80d7 100644
--- a/src/TNL/Matrices/SparseMatrixView.h
+++ b/src/TNL/Matrices/SparseMatrixView.h
@@ -28,7 +28,7 @@ class SparseMatrixView : public MatrixView< Real, Device, Index >
 {
    public:
       static constexpr bool isSymmetric() { return MatrixType::isSymmetric(); };
-      static constexpr bool isBinary() { return std::is_same< Real, bool >::value; };
+      static constexpr bool isBinary() { return MatrixType::isBinary(); };
 
       using RealType = Real;
       template< typename Device_, typename Index_ >
diff --git a/src/TNL/Matrices/SparseMatrixView.hpp b/src/TNL/Matrices/SparseMatrixView.hpp
index 4ac0a29b8..5b043753f 100644
--- a/src/TNL/Matrices/SparseMatrixView.hpp
+++ b/src/TNL/Matrices/SparseMatrixView.hpp
@@ -227,7 +227,8 @@ addElement( const IndexType row,
       col = this->columnIndexes.getElement( globalIdx );
       if( col == column )
       {
-         this->values.setElement( globalIdx, thisElementMultiplicator * this->values.getElement( globalIdx ) + value );
+         if( ! isBinary() )
+            this->values.setElement( globalIdx, thisElementMultiplicator * this->values.getElement( globalIdx ) + value );
          return;
       }
       if( col == this->getPaddingIndex() || col > column )
@@ -242,7 +243,8 @@ addElement( const IndexType row,
    if( col == this->getPaddingIndex() )
    {
       this->columnIndexes.setElement( globalIdx, column );
-      this->values.setElement( globalIdx, value );
+      if( ! isBinary() )
+         this->values.setElement( globalIdx, value );
       return;
    }
    else
@@ -255,7 +257,8 @@ addElement( const IndexType row,
          TNL_ASSERT_LT( globalIdx1, this->columnIndexes.getSize(), "" );
          TNL_ASSERT_LT( globalIdx2, this->columnIndexes.getSize(), "" );
          this->columnIndexes.setElement( globalIdx1, this->columnIndexes.getElement( globalIdx2 ) );
-         this->values.setElement( globalIdx1, this->values.getElement( globalIdx2 ) );
+         if( ! isBinary() )
+            this->values.setElement( globalIdx1, this->values.getElement( globalIdx2 ) );
          j--;
       }
 
@@ -287,7 +290,12 @@ getElement( const IndexType row,
       TNL_ASSERT_LT( globalIdx, this->columnIndexes.getSize(), "" );
       const IndexType col = this->columnIndexes.getElement( globalIdx );
       if( col == column )
-         return this->values.getElement( globalIdx );
+      {
+         if( isBinary() )
+            return 1;
+         else
+            return this->values.getElement( globalIdx );
+      }
    }
    return 0.0;
 }
@@ -334,6 +342,8 @@ vectorProduct( const InVector& inVector,
       compute = ( column != paddingIndex );
       if( ! compute )
          return 0.0;
+      if( isBinary() )
+         return inVectorView[ column ];
       return valuesView[ globalIdx ] * inVectorView[ column ];
    };
    auto reduction = [] __cuda_callable__ ( RealType& sum, const RealType& value ) {
@@ -382,7 +392,12 @@ rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Ke
    auto fetch_ = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx, bool& compute ) mutable -> decltype( fetch( IndexType(), IndexType(), IndexType(), RealType() ) ) {
       IndexType columnIdx = columns_view[ globalIdx ];
       if( columnIdx != paddingIndex_ )
-         return fetch( rowIdx, columnIdx, globalIdx, values_view[ globalIdx ] );
+      {
+         if( isBinary() )
+            return fetch( rowIdx, columnIdx, globalIdx, 1 );
+         else
+            return fetch( rowIdx, columnIdx, globalIdx, values_view[ globalIdx ] );
+      }
       return zero;
    };
    this->segments.segmentsReduction( first, last, fetch_, reduce, keep, zero );
@@ -415,7 +430,10 @@ forRows( IndexType first, IndexType last, Function& function ) const
    const auto values_view = this->values.getConstView();
    const IndexType paddingIndex_ = this->getPaddingIndex();
    auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx, bool& compute ) mutable -> bool {
-      function( rowIdx, localIdx, columns_view[ globalIdx ], values_view[ globalIdx ], compute );
+      if( isBinary() )
+         function( rowIdx, localIdx, columns_view[ globalIdx ], 1, compute );
+      else
+         function( rowIdx, localIdx, columns_view[ globalIdx ], values_view[ globalIdx ], compute );
       return true;
    };
    this->segments.forSegments( first, last, f );
@@ -435,7 +453,10 @@ forRows( IndexType first, IndexType last, Function& function )
    auto values_view = this->values.getView();
    const IndexType paddingIndex_ = this->getPaddingIndex();
    auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx, bool& compute ) mutable {
-      function( rowIdx, localIdx, columns_view[ globalIdx ], values_view[ globalIdx ], compute );
+      if( isBinary() )
+         function( rowIdx, localIdx, columns_view[ globalIdx ], 1, compute );
+      else
+         function( rowIdx, localIdx, columns_view[ globalIdx ], values_view[ globalIdx ], compute );
    };
    this->segments.forSegments( first, last, f );
 }
@@ -573,7 +594,12 @@ print( std::ostream& str ) const
          const IndexType column = this->columnIndexes.getElement( globalIdx );
          if( column == this->getPaddingIndex() )
             break;
-         str << " Col:" << column << "->" << this->values.getElement( globalIdx ) << "\t";
+         RealType value;
+         if( isBinary() )
+            value = 1.0;
+         else
+            value = this->values.getElement( globalIdx );
+         str << " Col:" << column << "->" << value << "\t";
       }
       str << std::endl;
    }
diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest.h b/src/UnitTests/Matrices/BinarySparseMatrixTest.h
index 0abba5b86..cb0d0bab5 100644
--- a/src/UnitTests/Matrices/BinarySparseMatrixTest.h
+++ b/src/UnitTests/Matrices/BinarySparseMatrixTest.h
@@ -29,13 +29,31 @@ protected:
 // types for which MatrixTest is instantiated
 using CSRMatrixTypes = ::testing::Types
 <
-    TNL::Matrices::SparseMatrix< bool, TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >,
-    TNL::Matrices::SparseMatrix< bool, TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >,
-    TNL::Matrices::SparseMatrix< bool, TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, short, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, short, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, short, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, short, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, int,   TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, int,   TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, int,   TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, int,   TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, long,  TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, long,  TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, long,  TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, long,  TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >
 #ifdef HAVE_CUDA
-   ,TNL::Matrices::SparseMatrix< bool, TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >,
-    TNL::Matrices::SparseMatrix< bool, TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >,
-    TNL::Matrices::SparseMatrix< bool, TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >,
+   ,TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, short, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, short, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, short, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, short, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, int,   TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, int,   TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, int,   TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, int,   TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, long,  TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, long,  TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, long,  TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, long,  TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >
 #endif
 >;
 
diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest.hpp b/src/UnitTests/Matrices/BinarySparseMatrixTest.hpp
index 5e969e976..3d1775972 100644
--- a/src/UnitTests/Matrices/BinarySparseMatrixTest.hpp
+++ b/src/UnitTests/Matrices/BinarySparseMatrixTest.hpp
@@ -153,16 +153,16 @@ void test_GetNumberOfNonzeroMatrixElements()
    /*
     * Sets up the following 10x10 sparse matrix:
     *
-    *    /  1  0  2  0  3  0  4  0  0  0  \
-    *    |  5  6  7  0  0  0  0  0  0  0  |
-    *    |  8  9 10 11 12 13 14 15  0  0  |
-    *    | 16 17  0  0  0  0  0  0  0  0  |
-    *    | 18  0  0  0  0  0  0  0  0  0  |
-    *    | 19  0  0  0  0  0  0  0  0  0  |
-    *    | 20  0  0  0  0  0  0  0  0  0  |
-    *    | 21  0  0  0  0  0  0  0  0  0  |
-    *    | 22 23 24 25 26 27 28 29 30 31  |
-    *    \ 32 33 34 35 36 37 38 39 40 41 /
+    *    /  1  0  1  0  1  0  1  0  0  0  \
+    *    |  1  1  1  0  0  0  0  0  0  0  |
+    *    |  1  1  1  1  1  1  1  1  0  0  |
+    *    |  1  1  0  0  0  0  0  0  0  0  |
+    *    |  1  0  0  0  0  0  0  0  0  0  |
+    *    |  1  0  0  0  0  0  0  0  0  0  |
+    *    |  1  0  0  0  0  0  0  0  0  0  |
+    *    |  1  0  0  0  0  0  0  0  0  0  |
+    *    |  1  1  1  1  1  1  1  1  1  1  |
+    *    \  1  1  1  1  1  1  1  1  1  1 /
     */
 
    const IndexType rows = 10;
@@ -175,38 +175,37 @@ void test_GetNumberOfNonzeroMatrixElements()
 
    typename Matrix::CompressedRowLengthsVector rowLengths;
    rowLengths.setSize( rows );
-   rowLengths.setElement( 0, 4 );
-   rowLengths.setElement( 1, 3 );
-   rowLengths.setElement( 2, 8 );
-   rowLengths.setElement( 3, 2 );
+   rowLengths.setElement( 0, 1 );
+   rowLengths.setElement( 1, 1 );
+   rowLengths.setElement( 2, 1 );
+   rowLengths.setElement( 3, 1 );
    for( IndexType i = 4; i < rows - 2; i++ )
    {
       rowLengths.setElement( i, 1 );
    }
-   rowLengths.setElement( 8, 10 );
-   rowLengths.setElement( 9, 10 );
+   rowLengths.setElement( 8, 1 );
+   rowLengths.setElement( 9, 1 );
    m.setCompressedRowLengths( rowLengths );
 
-   RealType value = 1;
    for( IndexType i = 0; i < 4; i++ )
-      m.setElement( 0, 2 * i, value++ );
+      m.setElement( 0, 2 * i, 1 );
 
    for( IndexType i = 0; i < 3; i++ )
-      m.setElement( 1, i, value++ );
+      m.setElement( 1, i, 1 );
 
    for( IndexType i = 0; i < 8; i++ )
-      m.setElement( 2, i, value++ );
+      m.setElement( 2, i, 1 );
 
    for( IndexType i = 0; i < 2; i++ )
-      m.setElement( 3, i, value++ );
+      m.setElement( 3, i, 1 );
 
    for( IndexType i = 4; i < 8; i++ )
-      m.setElement( i, 0, value++ );
+      m.setElement( i, 0, 1 );
 
    for( IndexType j = 8; j < rows; j++)
    {
       for( IndexType i = 0; i < cols; i++ )
-         m.setElement( j, i, value++ );
+         m.setElement( j, i, 1 );
    }
 
    EXPECT_EQ( m.getNumberOfNonzeroMatrixElements(), 41 );
@@ -252,16 +251,16 @@ void test_GetRow()
 /*
  * Sets up the following 10x10 sparse matrix:
  *
- *    /  1  0  2  0  3  0  4  0  0  0  \
- *    |  5  6  7  0  0  0  0  0  0  0  |
- *    |  8  9 10 11 12 13 14 15  0  0  |
- *    | 16 17  0  0  0  0  0  0  0  0  |
- *    | 18  0  0  0  0  0  0  0  0  0  |
- *    | 19  0  0  0  0  0  0  0  0  0  |
- *    | 20  0  0  0  0  0  0  0  0  0  |
- *    | 21  0  0  0  0  0  0  0  0  0  |
- *    | 22 23 24 25 26 27 28 29 30 31  |
- *    \ 32 33 34 35 36 37 38 39 40 41 /
+ *    /  1  0  1  0  1  0  1  0  0  0  \
+ *    |  1  1  1  0  0  0  0  0  0  0  |
+ *    |  1  1  1  1  1  1  1  1  0  0  |
+ *    |  1  1  0  0  0  0  0  0  0  0  |
+ *    |  1  0  0  0  0  0  0  0  0  0  |
+ *    |  1  0  0  0  0  0  0  0  0  0  |
+ *    |  1  0  0  0  0  0  0  0  0  0  |
+ *    |  1  0  0  0  0  0  0  0  0  0  |
+ *    |  1  1  1  1  1  1  1  1  1  1  |
+ *    \  1  1  1  1  1  1  1  1  1  1 /
  */
 
     const IndexType rows = 10;
@@ -283,27 +282,6 @@ void test_GetRow()
     rowLengths.setElement( 9, 10 );
     m.setCompressedRowLengths( rowLengths );
 
-    /*RealType value = 1;
-    for( IndexType i = 0; i < 4; i++ )
-        m.setElement( 0, 2 * i, value++ );
-
-    for( IndexType i = 0; i < 3; i++ )
-        m.setElement( 1, i, value++ );
-
-    for( IndexType i = 0; i < 8; i++ )
-        m.setElement( 2, i, value++ );
-
-    for( IndexType i = 0; i < 2; i++ )
-        m.setElement( 3, i, value++ );
-
-    for( IndexType i = 4; i < 8; i++ )
-        m.setElement( i, 0, value++ );
-
-    for( IndexType j = 8; j < rows; j++)
-    {
-        for( IndexType i = 0; i < cols; i++ )
-            m.setElement( j, i, value++ );
-    }*/
     auto matrixView = m.getView();
     auto f = [=] __cuda_callable__ ( const IndexType rowIdx ) mutable {
        auto row = matrixView.getRow( rowIdx );
@@ -349,115 +327,115 @@ void test_GetRow()
     };
     TNL::Algorithms::ParallelFor< DeviceType >::exec( ( IndexType ) 0, rows, f );
 
-    EXPECT_EQ( m.getElement( 0, 0 ),  1 );
-    EXPECT_EQ( m.getElement( 0, 1 ),  0 );
-    EXPECT_EQ( m.getElement( 0, 2 ),  2 );
-    EXPECT_EQ( m.getElement( 0, 3 ),  0 );
-    EXPECT_EQ( m.getElement( 0, 4 ),  3 );
-    EXPECT_EQ( m.getElement( 0, 5 ),  0 );
-    EXPECT_EQ( m.getElement( 0, 6 ),  4 );
-    EXPECT_EQ( m.getElement( 0, 7 ),  0 );
-    EXPECT_EQ( m.getElement( 0, 8 ),  0 );
-    EXPECT_EQ( m.getElement( 0, 9 ),  0 );
-
-    EXPECT_EQ( m.getElement( 1, 0 ),  5 );
-    EXPECT_EQ( m.getElement( 1, 1 ),  6 );
-    EXPECT_EQ( m.getElement( 1, 2 ),  7 );
-    EXPECT_EQ( m.getElement( 1, 3 ),  0 );
-    EXPECT_EQ( m.getElement( 1, 4 ),  0 );
-    EXPECT_EQ( m.getElement( 1, 5 ),  0 );
-    EXPECT_EQ( m.getElement( 1, 6 ),  0 );
-    EXPECT_EQ( m.getElement( 1, 7 ),  0 );
-    EXPECT_EQ( m.getElement( 1, 8 ),  0 );
-    EXPECT_EQ( m.getElement( 1, 9 ),  0 );
-
-    EXPECT_EQ( m.getElement( 2, 0 ),  8 );
-    EXPECT_EQ( m.getElement( 2, 1 ),  9 );
-    EXPECT_EQ( m.getElement( 2, 2 ), 10 );
-    EXPECT_EQ( m.getElement( 2, 3 ), 11 );
-    EXPECT_EQ( m.getElement( 2, 4 ), 12 );
-    EXPECT_EQ( m.getElement( 2, 5 ), 13 );
-    EXPECT_EQ( m.getElement( 2, 6 ), 14 );
-    EXPECT_EQ( m.getElement( 2, 7 ), 15 );
-    EXPECT_EQ( m.getElement( 2, 8 ),  0 );
-    EXPECT_EQ( m.getElement( 2, 9 ),  0 );
-
-    EXPECT_EQ( m.getElement( 3, 0 ), 16 );
-    EXPECT_EQ( m.getElement( 3, 1 ), 17 );
-    EXPECT_EQ( m.getElement( 3, 2 ),  0 );
-    EXPECT_EQ( m.getElement( 3, 3 ),  0 );
-    EXPECT_EQ( m.getElement( 3, 4 ),  0 );
-    EXPECT_EQ( m.getElement( 3, 5 ),  0 );
-    EXPECT_EQ( m.getElement( 3, 6 ),  0 );
-    EXPECT_EQ( m.getElement( 3, 7 ),  0 );
-    EXPECT_EQ( m.getElement( 3, 8 ),  0 );
-    EXPECT_EQ( m.getElement( 3, 9 ),  0 );
-
-    EXPECT_EQ( m.getElement( 4, 0 ), 18 );
-    EXPECT_EQ( m.getElement( 4, 1 ),  0 );
-    EXPECT_EQ( m.getElement( 4, 2 ),  0 );
-    EXPECT_EQ( m.getElement( 4, 3 ),  0 );
-    EXPECT_EQ( m.getElement( 4, 4 ),  0 );
-    EXPECT_EQ( m.getElement( 4, 5 ),  0 );
-    EXPECT_EQ( m.getElement( 4, 6 ),  0 );
-    EXPECT_EQ( m.getElement( 4, 7 ),  0 );
-    EXPECT_EQ( m.getElement( 4, 8 ),  0 );
-    EXPECT_EQ( m.getElement( 4, 9 ),  0 );
-
-    EXPECT_EQ( m.getElement( 5, 0 ), 19 );
-    EXPECT_EQ( m.getElement( 5, 1 ),  0 );
-    EXPECT_EQ( m.getElement( 5, 2 ),  0 );
-    EXPECT_EQ( m.getElement( 5, 3 ),  0 );
-    EXPECT_EQ( m.getElement( 5, 4 ),  0 );
-    EXPECT_EQ( m.getElement( 5, 5 ),  0 );
-    EXPECT_EQ( m.getElement( 5, 6 ),  0 );
-    EXPECT_EQ( m.getElement( 5, 7 ),  0 );
-    EXPECT_EQ( m.getElement( 5, 8 ),  0 );
-    EXPECT_EQ( m.getElement( 5, 9 ),  0 );
-
-    EXPECT_EQ( m.getElement( 6, 0 ), 20 );
-    EXPECT_EQ( m.getElement( 6, 1 ),  0 );
-    EXPECT_EQ( m.getElement( 6, 2 ),  0 );
-    EXPECT_EQ( m.getElement( 6, 3 ),  0 );
-    EXPECT_EQ( m.getElement( 6, 4 ),  0 );
-    EXPECT_EQ( m.getElement( 6, 5 ),  0 );
-    EXPECT_EQ( m.getElement( 6, 6 ),  0 );
-    EXPECT_EQ( m.getElement( 6, 7 ),  0 );
-    EXPECT_EQ( m.getElement( 6, 8 ),  0 );
-    EXPECT_EQ( m.getElement( 6, 9 ),  0 );
-
-    EXPECT_EQ( m.getElement( 7, 0 ), 21 );
-    EXPECT_EQ( m.getElement( 7, 1 ),  0 );
-    EXPECT_EQ( m.getElement( 7, 2 ),  0 );
-    EXPECT_EQ( m.getElement( 7, 3 ),  0 );
-    EXPECT_EQ( m.getElement( 7, 4 ),  0 );
-    EXPECT_EQ( m.getElement( 7, 5 ),  0 );
-    EXPECT_EQ( m.getElement( 7, 6 ),  0 );
-    EXPECT_EQ( m.getElement( 7, 7 ),  0 );
-    EXPECT_EQ( m.getElement( 7, 8 ),  0 );
-    EXPECT_EQ( m.getElement( 7, 9 ),  0 );
-
-    EXPECT_EQ( m.getElement( 8, 0 ), 22 );
-    EXPECT_EQ( m.getElement( 8, 1 ), 23 );
-    EXPECT_EQ( m.getElement( 8, 2 ), 24 );
-    EXPECT_EQ( m.getElement( 8, 3 ), 25 );
-    EXPECT_EQ( m.getElement( 8, 4 ), 26 );
-    EXPECT_EQ( m.getElement( 8, 5 ), 27 );
-    EXPECT_EQ( m.getElement( 8, 6 ), 28 );
-    EXPECT_EQ( m.getElement( 8, 7 ), 29 );
-    EXPECT_EQ( m.getElement( 8, 8 ), 30 );
-    EXPECT_EQ( m.getElement( 8, 9 ), 31 );
-
-    EXPECT_EQ( m.getElement( 9, 0 ), 32 );
-    EXPECT_EQ( m.getElement( 9, 1 ), 33 );
-    EXPECT_EQ( m.getElement( 9, 2 ), 34 );
-    EXPECT_EQ( m.getElement( 9, 3 ), 35 );
-    EXPECT_EQ( m.getElement( 9, 4 ), 36 );
-    EXPECT_EQ( m.getElement( 9, 5 ), 37 );
-    EXPECT_EQ( m.getElement( 9, 6 ), 38 );
-    EXPECT_EQ( m.getElement( 9, 7 ), 39 );
-    EXPECT_EQ( m.getElement( 9, 8 ), 40 );
-    EXPECT_EQ( m.getElement( 9, 9 ), 41 );
+    EXPECT_EQ( m.getElement( 0, 0 ), 1 );
+    EXPECT_EQ( m.getElement( 0, 1 ), 0 );
+    EXPECT_EQ( m.getElement( 0, 2 ), 1 );
+    EXPECT_EQ( m.getElement( 0, 3 ), 0 );
+    EXPECT_EQ( m.getElement( 0, 4 ), 1 );
+    EXPECT_EQ( m.getElement( 0, 5 ), 0 );
+    EXPECT_EQ( m.getElement( 0, 6 ), 1 );
+    EXPECT_EQ( m.getElement( 0, 7 ), 0 );
+    EXPECT_EQ( m.getElement( 0, 8 ), 0 );
+    EXPECT_EQ( m.getElement( 0, 9 ), 0 );
+
+    EXPECT_EQ( m.getElement( 1, 0 ), 1 );
+    EXPECT_EQ( m.getElement( 1, 1 ), 1 );
+    EXPECT_EQ( m.getElement( 1, 2 ), 1 );
+    EXPECT_EQ( m.getElement( 1, 3 ), 0 );
+    EXPECT_EQ( m.getElement( 1, 4 ), 0 );
+    EXPECT_EQ( m.getElement( 1, 5 ), 0 );
+    EXPECT_EQ( m.getElement( 1, 6 ), 0 );
+    EXPECT_EQ( m.getElement( 1, 7 ), 0 );
+    EXPECT_EQ( m.getElement( 1, 8 ), 0 );
+    EXPECT_EQ( m.getElement( 1, 9 ), 0 );
+
+    EXPECT_EQ( m.getElement( 2, 0 ), 1 );
+    EXPECT_EQ( m.getElement( 2, 1 ), 1 );
+    EXPECT_EQ( m.getElement( 2, 2 ), 1 );
+    EXPECT_EQ( m.getElement( 2, 3 ), 1 );
+    EXPECT_EQ( m.getElement( 2, 4 ), 1 );
+    EXPECT_EQ( m.getElement( 2, 5 ), 1 );
+    EXPECT_EQ( m.getElement( 2, 6 ), 1 );
+    EXPECT_EQ( m.getElement( 2, 7 ), 1 );
+    EXPECT_EQ( m.getElement( 2, 8 ), 0 );
+    EXPECT_EQ( m.getElement( 2, 9 ), 0 );
+
+    EXPECT_EQ( m.getElement( 3, 0 ), 1 );
+    EXPECT_EQ( m.getElement( 3, 1 ), 1 );
+    EXPECT_EQ( m.getElement( 3, 2 ), 0 );
+    EXPECT_EQ( m.getElement( 3, 3 ), 0 );
+    EXPECT_EQ( m.getElement( 3, 4 ), 0 );
+    EXPECT_EQ( m.getElement( 3, 5 ), 0 );
+    EXPECT_EQ( m.getElement( 3, 6 ), 0 );
+    EXPECT_EQ( m.getElement( 3, 7 ), 0 );
+    EXPECT_EQ( m.getElement( 3, 8 ), 0 );
+    EXPECT_EQ( m.getElement( 3, 9 ), 0 );
+
+    EXPECT_EQ( m.getElement( 4, 0 ), 1 );
+    EXPECT_EQ( m.getElement( 4, 1 ), 0 );
+    EXPECT_EQ( m.getElement( 4, 2 ), 0 );
+    EXPECT_EQ( m.getElement( 4, 3 ), 0 );
+    EXPECT_EQ( m.getElement( 4, 4 ), 0 );
+    EXPECT_EQ( m.getElement( 4, 5 ), 0 );
+    EXPECT_EQ( m.getElement( 4, 6 ), 0 );
+    EXPECT_EQ( m.getElement( 4, 7 ), 0 );
+    EXPECT_EQ( m.getElement( 4, 8 ), 0 );
+    EXPECT_EQ( m.getElement( 4, 9 ), 0 );
+
+    EXPECT_EQ( m.getElement( 5, 0 ), 1 );
+    EXPECT_EQ( m.getElement( 5, 1 ), 0 );
+    EXPECT_EQ( m.getElement( 5, 2 ), 0 );
+    EXPECT_EQ( m.getElement( 5, 3 ), 0 );
+    EXPECT_EQ( m.getElement( 5, 4 ), 0 );
+    EXPECT_EQ( m.getElement( 5, 5 ), 0 );
+    EXPECT_EQ( m.getElement( 5, 6 ), 0 );
+    EXPECT_EQ( m.getElement( 5, 7 ), 0 );
+    EXPECT_EQ( m.getElement( 5, 8 ), 0 );
+    EXPECT_EQ( m.getElement( 5, 9 ), 0 );
+
+    EXPECT_EQ( m.getElement( 6, 0 ), 1 );
+    EXPECT_EQ( m.getElement( 6, 1 ), 0 );
+    EXPECT_EQ( m.getElement( 6, 2 ), 0 );
+    EXPECT_EQ( m.getElement( 6, 3 ), 0 );
+    EXPECT_EQ( m.getElement( 6, 4 ), 0 );
+    EXPECT_EQ( m.getElement( 6, 5 ), 0 );
+    EXPECT_EQ( m.getElement( 6, 6 ), 0 );
+    EXPECT_EQ( m.getElement( 6, 7 ), 0 );
+    EXPECT_EQ( m.getElement( 6, 8 ), 0 );
+    EXPECT_EQ( m.getElement( 6, 9 ), 0 );
+
+    EXPECT_EQ( m.getElement( 7, 0 ), 1 );
+    EXPECT_EQ( m.getElement( 7, 1 ), 0 );
+    EXPECT_EQ( m.getElement( 7, 2 ), 0 );
+    EXPECT_EQ( m.getElement( 7, 3 ), 0 );
+    EXPECT_EQ( m.getElement( 7, 4 ), 0 );
+    EXPECT_EQ( m.getElement( 7, 5 ), 0 );
+    EXPECT_EQ( m.getElement( 7, 6 ), 0 );
+    EXPECT_EQ( m.getElement( 7, 7 ), 0 );
+    EXPECT_EQ( m.getElement( 7, 8 ), 0 );
+    EXPECT_EQ( m.getElement( 7, 9 ), 0 );
+
+    EXPECT_EQ( m.getElement( 8, 0 ), 1 );
+    EXPECT_EQ( m.getElement( 8, 1 ), 1 );
+    EXPECT_EQ( m.getElement( 8, 2 ), 1 );
+    EXPECT_EQ( m.getElement( 8, 3 ), 1 );
+    EXPECT_EQ( m.getElement( 8, 4 ), 1 );
+    EXPECT_EQ( m.getElement( 8, 5 ), 1 );
+    EXPECT_EQ( m.getElement( 8, 6 ), 1 );
+    EXPECT_EQ( m.getElement( 8, 7 ), 1 );
+    EXPECT_EQ( m.getElement( 8, 8 ), 1 );
+    EXPECT_EQ( m.getElement( 8, 9 ), 1 );
+
+    EXPECT_EQ( m.getElement( 9, 0 ), 1 );
+    EXPECT_EQ( m.getElement( 9, 1 ), 1 );
+    EXPECT_EQ( m.getElement( 9, 2 ), 1 );
+    EXPECT_EQ( m.getElement( 9, 3 ), 1 );
+    EXPECT_EQ( m.getElement( 9, 4 ), 1 );
+    EXPECT_EQ( m.getElement( 9, 5 ), 1 );
+    EXPECT_EQ( m.getElement( 9, 6 ), 1 );
+    EXPECT_EQ( m.getElement( 9, 7 ), 1 );
+    EXPECT_EQ( m.getElement( 9, 8 ), 1 );
+    EXPECT_EQ( m.getElement( 9, 9 ), 1 );
 }
 
 
@@ -471,16 +449,16 @@ void test_SetElement()
 /*
  * Sets up the following 10x10 sparse matrix:
  *
- *    /  1  0  2  0  3  0  4  0  0  0  \
- *    |  5  6  7  0  0  0  0  0  0  0  |
- *    |  8  9 10 11 12 13 14 15  0  0  |
- *    | 16 17  0  0  0  0  0  0  0  0  |
- *    | 18  0  0  0  0  0  0  0  0  0  |
- *    | 19  0  0  0  0  0  0  0  0  0  |
- *    | 20  0  0  0  0  0  0  0  0  0  |
- *    | 21  0  0  0  0  0  0  0  0  0  |
- *    | 22 23 24 25 26 27 28 29 30 31  |
- *    \ 32 33 34 35 36 37 38 39 40 41 /
+ *    /  1  0  1  0  1  0  1  0  0  0  \
+ *    |  1  1  1  0  0  0  0  0  0  0  |
+ *    |  1  1  1  1  1  1  1  1  0  0  |
+ *    |  1  1  0  0  0  0  0  0  0  0  |
+ *    |  1  0  0  0  0  0  0  0  0  0  |
+ *    |  1  0  0  0  0  0  0  0  0  0  |
+ *    |  1  0  0  0  0  0  0  0  0  0  |
+ *    |  1  0  0  0  0  0  0  0  0  0  |
+ *    |  1  1  1  1  1  1  1  1  1  1  |
+ *    \  1  1  1  1  1  1  1  1  1  1 /
  */
 
     const IndexType rows = 10;
@@ -640,328 +618,290 @@ void test_SetElement()
 template< typename Matrix >
 void test_VectorProduct()
 {
-    using RealType = typename Matrix::RealType;
-    using DeviceType = typename Matrix::DeviceType;
-    using IndexType = typename Matrix::IndexType;
-    using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >;
-
-/*
- * Sets up the following 4x4 sparse matrix:
- *
- *    /  1  0  0  0 \
- *    |  0  2  0  3 |
- *    |  0  4  0  0 |
- *    \  0  0  5  0 /
- */
-
-    const IndexType m_rows_1 = 4;
-    const IndexType m_cols_1 = 4;
-
-    Matrix m_1;
-    m_1.reset();
-    m_1.setDimensions( m_rows_1, m_cols_1 );
-    typename Matrix::CompressedRowLengthsVector rowLengths_1;
-    rowLengths_1.setSize( m_rows_1 );
-    rowLengths_1.setElement( 0, 1 );
-    rowLengths_1.setElement( 1, 2 );
-    rowLengths_1.setElement( 2, 1 );
-    rowLengths_1.setElement( 3, 1 );
-    m_1.setCompressedRowLengths( rowLengths_1 );
-
-    m_1.setElement( 0, 0, 1 );      // 0th row
-
-    m_1.setElement( 1, 1, 1 );      // 1st row
-    m_1.setElement( 1, 3, 1 );
-
-    m_1.setElement( 2, 1, 1 );      // 2nd row
-
-    m_1.setElement( 3, 2, 1 );      // 3rd row
-
-    VectorType inVector_1;
-    inVector_1.setSize( m_cols_1 );
-    for( IndexType i = 0; i < inVector_1.getSize(); i++ )
-        inVector_1.setElement( i, 2 );
-
-    VectorType outVector_1;
-    outVector_1.setSize( m_rows_1 );
-    for( IndexType j = 0; j < outVector_1.getSize(); j++ )
-        outVector_1.setElement( j, 0 );
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+   using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >;
 
+   /*
+    * Sets up the following 4x4 sparse matrix:
+    *
+    *    /  1  0  0  0 \
+    *    |  0  1  0  1 |
+    *    |  0  1  0  0 |
+    *    \  0  0  1  0 /
+    */
 
-    m_1.vectorProduct( inVector_1, outVector_1 );
+   const IndexType m_rows_1 = 4;
+   const IndexType m_cols_1 = 4;
 
+   Matrix m_1( m_rows_1, m_cols_1 );
+   typename Matrix::CompressedRowLengthsVector rowLengths_1;
+   rowLengths_1.setSize( m_rows_1 );
+   rowLengths_1.setElement( 0, 1 );
+   rowLengths_1.setElement( 1, 2 );
+   rowLengths_1.setElement( 2, 1 );
+   rowLengths_1.setElement( 3, 1 );
+   m_1.setCompressedRowLengths( rowLengths_1 );
 
-    EXPECT_EQ( outVector_1.getElement( 0 ),  2 );
-    EXPECT_EQ( outVector_1.getElement( 1 ), 10 );
-    EXPECT_EQ( outVector_1.getElement( 2 ),  8 );
-    EXPECT_EQ( outVector_1.getElement( 3 ), 10 );
+   m_1.setElement( 0, 0, 1 );      // 0th row
 
+   m_1.setElement( 1, 1, 1 );      // 1st row
+   m_1.setElement( 1, 3, 1 );
+   
+   m_1.setElement( 2, 1, 1 );      // 2nd row
 
-/*
- * Sets up the following 4x4 sparse matrix:
- *
- *    /  1  2  3  0 \
- *    |  0  0  0  4 |
- *    |  5  6  7  0 |
- *    \  0  8  0  0 /
- */
+   m_1.setElement( 3, 2, 1 );      // 3rd row
 
-    const IndexType m_rows_2 = 4;
-    const IndexType m_cols_2 = 4;
+   VectorType inVector_1( m_cols_1 );
+   inVector_1 = 2.0;
 
-    Matrix m_2;
-    m_2.reset();
-    m_2.setDimensions( m_rows_2, m_cols_2 );
-    typename Matrix::CompressedRowLengthsVector rowLengths_2;
-    rowLengths_2.setSize( m_rows_2 );
-    rowLengths_2.setValue( 3 );
-    rowLengths_2.setElement( 1, 1 );
-    rowLengths_2.setElement( 3, 1 );
-    m_2.setCompressedRowLengths( rowLengths_2 );
+   VectorType outVector_1( m_rows_1 );
+   outVector_1 = 0.0;
 
-    for( IndexType i = 0; i < 3; i++ )   // 0th row
-        m_2.setElement( 0, i, 1 );
+   m_1.vectorProduct( inVector_1, outVector_1 );
 
-    m_2.setElement( 1, 3, 1 );      // 1st row
 
-    for( IndexType i = 0; i < 3; i++ )   // 2nd row
-        m_2.setElement( 2, i, 1 );
+   EXPECT_EQ( outVector_1.getElement( 0 ), 2 );
+   EXPECT_EQ( outVector_1.getElement( 1 ), 4 );
+   EXPECT_EQ( outVector_1.getElement( 2 ), 2 );
+   EXPECT_EQ( outVector_1.getElement( 3 ), 2 );
 
-    for( IndexType i = 1; i < 2; i++ )       // 3rd row
-        m_2.setElement( 3, i, 1 );
 
-    VectorType inVector_2;
-    inVector_2.setSize( m_cols_2 );
-    for( IndexType i = 0; i < inVector_2.getSize(); i++ )
-        inVector_2.setElement( i, 2 );
+   /*
+    * Sets up the following 4x4 sparse matrix:
+    *
+    *    /  1  1  1  0 \
+    *    |  0  0  0  1 |
+    *    |  1  1  1  0 |
+    *    \  0  1  0  0 /
+    */
+   const IndexType m_rows_2 = 4;
+   const IndexType m_cols_2 = 4;
 
-    VectorType outVector_2;
-    outVector_2.setSize( m_rows_2 );
-    for( IndexType j = 0; j < outVector_2.getSize(); j++ )
-        outVector_2.setElement( j, 0 );
+   Matrix m_2( m_rows_2, m_cols_2 );
+   typename Matrix::CompressedRowLengthsVector rowLengths_2;
+   rowLengths_2.setSize( m_rows_2 );
+   rowLengths_2.setValue( 3 );
+   rowLengths_2.setElement( 1, 1 );
+   rowLengths_2.setElement( 3, 1 );
+   m_2.setCompressedRowLengths( rowLengths_2 );
 
+   for( IndexType i = 0; i < 3; i++ )   // 0th row
+      m_2.setElement( 0, i, 1 );
 
-    m_2.vectorProduct( inVector_2, outVector_2 );
+   m_2.setElement( 1, 3, 1 );      // 1st row
 
+   for( IndexType i = 0; i < 3; i++ )   // 2nd row
+      m_2.setElement( 2, i, 1 );
 
-    EXPECT_EQ( outVector_2.getElement( 0 ), 1 );
-    EXPECT_EQ( outVector_2.getElement( 1 ), 1 );
-    EXPECT_EQ( outVector_2.getElement( 2 ), 1 );
-    EXPECT_EQ( outVector_2.getElement( 3 ), 1 );
+   for( IndexType i = 1; i < 2; i++ )       // 3rd row
+      m_2.setElement( 3, i, 1 );
 
+   VectorType inVector_2( m_cols_2 );
+   inVector_2 = 2.0;
 
-/*
- * Sets up the following 4x4 sparse matrix:
- *
- *    /  1  2  3  0 \
- *    |  0  4  5  6 |
- *    |  7  8  9  0 |
- *    \  0 10 11 12 /
- */
+   VectorType outVector_2( m_rows_2 );
+   outVector_2 = 0.0;
 
-    const IndexType m_rows_3 = 4;
-    const IndexType m_cols_3 = 4;
+   m_2.vectorProduct( inVector_2, outVector_2 );
 
-    Matrix m_3;
-    m_3.reset();
-    m_3.setDimensions( m_rows_3, m_cols_3 );
-    typename Matrix::CompressedRowLengthsVector rowLengths_3;
-    rowLengths_3.setSize( m_rows_3 );
-    rowLengths_3.setValue( 3 );
-    m_3.setCompressedRowLengths( rowLengths_3 );
 
-    for( IndexType i = 0; i < 3; i++ )          // 0th row
-        m_3.setElement( 0, i, 1 );
+   EXPECT_EQ( outVector_2.getElement( 0 ), 6 );
+   EXPECT_EQ( outVector_2.getElement( 1 ), 2 );
+   EXPECT_EQ( outVector_2.getElement( 2 ), 6 );
+   EXPECT_EQ( outVector_2.getElement( 3 ), 2 );
 
-    for( IndexType i = 1; i < 4; i++ )
-        m_3.setElement( 1, i, 1 );      // 1st row
 
-    for( IndexType i = 0; i < 3; i++ )          // 2nd row
-        m_3.setElement( 2, i, 1 );
+   /*
+    * Sets up the following 4x4 sparse matrix:
+    *
+    *    /  1  1  1  0 \
+    *    |  0  1  1  1 |
+    *    |  1  1  1  0 |
+    *    \  0  1  1  1 /
+    */
+   const IndexType m_rows_3 = 4;
+   const IndexType m_cols_3 = 4;
 
-    for( IndexType i = 1; i < 4; i++ )          // 3rd row
-        m_3.setElement( 3, i, 1 );
+   Matrix m_3( m_rows_3, m_cols_3 );
+   typename Matrix::CompressedRowLengthsVector rowLengths_3;
+   rowLengths_3.setSize( m_rows_3 );
+   rowLengths_3.setValue( 3 );
+   m_3.setCompressedRowLengths( rowLengths_3 );
 
-    VectorType inVector_3;
-    inVector_3.setSize( m_cols_3 );
-    for( IndexType i = 0; i < inVector_3.getSize(); i++ )
-        inVector_3.setElement( i, 2 );
+   for( IndexType i = 0; i < 3; i++ )          // 0th row
+      m_3.setElement( 0, i, 1 );
 
-    VectorType outVector_3;
-    outVector_3.setSize( m_rows_3 );
-    for( IndexType j = 0; j < outVector_3.getSize(); j++ )
-        outVector_3.setElement( j, 0 );
+   for( IndexType i = 1; i < 4; i++ )
+      m_3.setElement( 1, i, 1 );      // 1st row
 
+   for( IndexType i = 0; i < 3; i++ )          // 2nd row
+      m_3.setElement( 2, i, 1 );
 
-    m_3.vectorProduct( inVector_3, outVector_3 );
+   for( IndexType i = 1; i < 4; i++ )          // 3rd row
+      m_3.setElement( 3, i, 1 );
 
+   VectorType inVector_3( m_cols_3 );
+   inVector_3 = 2.0;
 
-    EXPECT_EQ( outVector_3.getElement( 0 ), 1 );
-    EXPECT_EQ( outVector_3.getElement( 1 ), 1 );
-    EXPECT_EQ( outVector_3.getElement( 2 ), 1 );
-    EXPECT_EQ( outVector_3.getElement( 3 ), 1 );
+   VectorType outVector_3( m_rows_3 );
+   outVector_3 = 0.0;
 
+   m_3.vectorProduct( inVector_3, outVector_3 );
 
-/*
- * Sets up the following 8x8 sparse matrix:
- *
- *    /  1  2  3  0  0  4  0  0 \
- *    |  0  5  6  7  8  0  0  0 |
- *    |  9 10 11 12 13  0  0  0 |
- *    |  0 14 15 16 17  0  0  0 |
- *    |  0  0 18 19 20 21  0  0 |
- *    |  0  0  0 22 23 24 25  0 |
- *    | 26 27 28 29 30  0  0  0 |
- *    \ 31 32 33 34 35  0  0  0 /
- */
 
-    const IndexType m_rows_4 = 8;
-    const IndexType m_cols_4 = 8;
+   EXPECT_EQ( outVector_3.getElement( 0 ), 6 );
+   EXPECT_EQ( outVector_3.getElement( 1 ), 6 );
+   EXPECT_EQ( outVector_3.getElement( 2 ), 6 );
+   EXPECT_EQ( outVector_3.getElement( 3 ), 6 );
 
-    Matrix m_4;
-    m_4.reset();
-    m_4.setDimensions( m_rows_4, m_cols_4 );
-    typename Matrix::CompressedRowLengthsVector rowLengths_4;
-    rowLengths_4.setSize( m_rows_4 );
-    rowLengths_4.setValue( 4 );
-    rowLengths_4.setElement( 2, 5 );
-    rowLengths_4.setElement( 6, 5 );
-    rowLengths_4.setElement( 7, 5 );
-    m_4.setCompressedRowLengths( rowLengths_4 );
+   /*
+    * Sets up the following 8x8 sparse matrix:
+    *
+    *    /  1  1  1  0  0  1  0  0 \
+    *    |  0  1  1  1  1  0  0  0 |
+    *    |  1  1  1  1  1  0  0  0 |
+    *    |  0  1  1  1  1  0  0  0 |
+    *    |  0  0  1  1  1  1  0  0 |
+    *    |  0  0  0  1  1  1  1  0 |
+    *    |  1  1  1  1  1  0  0  0 |
+    *    \  1  1  1  1  1  0  0  0 /
+    */
+   const IndexType m_rows_4 = 8;
+   const IndexType m_cols_4 = 8;
 
-    for( IndexType i = 0; i < 3; i++ )       // 0th row
-        m_4.setElement( 0, i, 1 );
+   Matrix m_4( m_rows_4, m_cols_4 );
+   typename Matrix::CompressedRowLengthsVector rowLengths_4;
+   rowLengths_4.setSize( m_rows_4 );
+   rowLengths_4.setValue( 4 );
+   rowLengths_4.setElement( 2, 5 );
+   rowLengths_4.setElement( 6, 5 );
+   rowLengths_4.setElement( 7, 5 );
+   m_4.setCompressedRowLengths( rowLengths_4 );
 
-    m_4.setElement( 0, 5, 1 );
+   for( IndexType i = 0; i < 3; i++ )       // 0th row
+      m_4.setElement( 0, i, 1 );
 
-    for( IndexType i = 1; i < 5; i++ )       // 1st row
-        m_4.setElement( 1, i, 1 );
+   m_4.setElement( 0, 5, 1 );
 
-    for( IndexType i = 0; i < 5; i++ )       // 2nd row
-        m_4.setElement( 2, i, 1 );
+   for( IndexType i = 1; i < 5; i++ )       // 1st row
+      m_4.setElement( 1, i, 1 );
 
-    for( IndexType i = 1; i < 5; i++ )       // 3rd row
-        m_4.setElement( 3, i, 1 );
+   for( IndexType i = 0; i < 5; i++ )       // 2nd row
+      m_4.setElement( 2, i, 1 );
 
-    for( IndexType i = 2; i < 6; i++ )       // 4th row
-        m_4.setElement( 4, i, 1 );
+   for( IndexType i = 1; i < 5; i++ )       // 3rd row
+      m_4.setElement( 3, i, 1 );
 
-    for( IndexType i = 3; i < 7; i++ )       // 5th row
-        m_4.setElement( 5, i, 1 );
+   for( IndexType i = 2; i < 6; i++ )       // 4th row
+      m_4.setElement( 4, i, 1 );
 
-    for( IndexType i = 0; i < 5; i++ )       // 6th row
-        m_4.setElement( 6, i, 1 );
+   for( IndexType i = 3; i < 7; i++ )       // 5th row
+      m_4.setElement( 5, i, 1 );
 
-    for( IndexType i = 0; i < 5; i++ )       // 7th row
-        m_4.setElement( 7, i, 1 );
+   for( IndexType i = 0; i < 5; i++ )       // 6th row
+      m_4.setElement( 6, i, 1 );
 
-    VectorType inVector_4;
-    inVector_4.setSize( m_cols_4 );
-    for( IndexType i = 0; i < inVector_4.getSize(); i++ )
-        inVector_4.setElement( i, 2 );
+   for( IndexType i = 0; i < 5; i++ )       // 7th row
+      m_4.setElement( 7, i, 1 );
 
-    VectorType outVector_4;
-    outVector_4.setSize( m_rows_4 );
-    for( IndexType j = 0; j < outVector_4.getSize(); j++ )
-        outVector_4.setElement( j, 0 );
+   VectorType inVector_4( m_cols_4 );
+   inVector_4 = 2.0;
 
+   VectorType outVector_4( m_rows_4 );
+   outVector_4 = 0.0;
 
-    m_4.vectorProduct( inVector_4, outVector_4 );
+   m_4.vectorProduct( inVector_4, outVector_4 );
 
 
-    EXPECT_EQ( outVector_4.getElement( 0 ),  20 );
-    EXPECT_EQ( outVector_4.getElement( 1 ),  52 );
-    EXPECT_EQ( outVector_4.getElement( 2 ), 110 );
-    EXPECT_EQ( outVector_4.getElement( 3 ), 124 );
-    EXPECT_EQ( outVector_4.getElement( 4 ), 156 );
-    EXPECT_EQ( outVector_4.getElement( 5 ), 188 );
-    EXPECT_EQ( outVector_4.getElement( 6 ), 280 );
-    EXPECT_EQ( outVector_4.getElement( 7 ), 330 );
+   EXPECT_EQ( outVector_4.getElement( 0 ),  8 );
+   EXPECT_EQ( outVector_4.getElement( 1 ),  8 );
+   EXPECT_EQ( outVector_4.getElement( 2 ), 10 );
+   EXPECT_EQ( outVector_4.getElement( 3 ),  8 );
+   EXPECT_EQ( outVector_4.getElement( 4 ),  8 );
+   EXPECT_EQ( outVector_4.getElement( 5 ),  8 );
+   EXPECT_EQ( outVector_4.getElement( 6 ), 10 );
+   EXPECT_EQ( outVector_4.getElement( 7 ), 10 );
 
 
    /*
     * Sets up the following 8x8 sparse matrix:
     *
-    *    /  1  2  3  0  4  5  0  1 \   6
-    *    |  0  6  0  7  0  0  0  1 |   3
-    *    |  0  8  9  0 10  0  0  1 |   4
-    *    |  0 11 12 13 14  0  0  1 |   5
-    *    |  0 15  0  0  0  0  0  1 |   2
-    *    |  0 16 17 18 19 20 21  1 |   7
-    *    | 22 23 24 25 26 27 28  1 |   8
-    *    \ 29 30 31 32 33 34 35 36 /   8
+    *    /  1  1  1  0  1  1  0  1 \   6
+    *    |  0  1  0  1  0  0  0  1 |   3
+    *    |  0  1  1  0  1  0  0  1 |   4
+    *    |  0  1  1  1  1  0  0  1 |   5
+    *    |  0  1  0  0  0  0  0  1 |   2
+    *    |  0  1  1  1  1  1  1  1 |   7
+    *    |  1  1  1  1  1  1  1  1 |   8
+    *    \  1  1  1  1  1  1  1  1 /   8
     */
 
-    const IndexType m_rows_5 = 8;
-    const IndexType m_cols_5 = 8;
-
-    Matrix m_5;
-    m_5.reset();
-    m_5.setDimensions( m_rows_5, m_cols_5 );
-    typename Matrix::CompressedRowLengthsVector rowLengths_5;
-    rowLengths_5.setSize( m_rows_5 );
-    rowLengths_5.setElement(0, 6);
-    rowLengths_5.setElement(1, 3);
-    rowLengths_5.setElement(2, 4);
-    rowLengths_5.setElement(3, 5);
-    rowLengths_5.setElement(4, 2);
-    rowLengths_5.setElement(5, 7);
-    rowLengths_5.setElement(6, 8);
-    rowLengths_5.setElement(7, 8);
-    m_5.setCompressedRowLengths( rowLengths_5 );
-
-    RealType value_5 = 1;
-    for( IndexType i = 0; i < 3; i++ )   // 0th row
-        m_5.setElement( 0, i, 1 );
+   const IndexType m_rows_5 = 8;
+   const IndexType m_cols_5 = 8;
+
+   Matrix m_5( m_rows_5, m_cols_5 );
+   typename Matrix::CompressedRowLengthsVector rowLengths_5;
+   rowLengths_5.setSize( m_rows_5 );
+   rowLengths_5.setElement(0, 6);
+   rowLengths_5.setElement(1, 3);
+   rowLengths_5.setElement(2, 4);
+   rowLengths_5.setElement(3, 5);
+   rowLengths_5.setElement(4, 2);
+   rowLengths_5.setElement(5, 7);
+   rowLengths_5.setElement(6, 8);
+   rowLengths_5.setElement(7, 8);
+   m_5.setCompressedRowLengths( rowLengths_5 );
+
+   RealType value_5 = 1;
+   for( IndexType i = 0; i < 3; i++ )   // 0th row
+      m_5.setElement( 0, i, 1 );
 
-    m_5.setElement( 0, 4, 1 );           // 0th row
-    m_5.setElement( 0, 5, 1 );
+   m_5.setElement( 0, 4, 1 );           // 0th row
+   m_5.setElement( 0, 5, 1 );
 
-    m_5.setElement( 1, 1, 1 );           // 1st row
-    m_5.setElement( 1, 3, 1 );
+   m_5.setElement( 1, 1, 1 );           // 1st row
+   m_5.setElement( 1, 3, 1 );
 
-    for( IndexType i = 1; i < 3; i++ )            // 2nd row
-        m_5.setElement( 2, i, 1 );
+   for( IndexType i = 1; i < 3; i++ )            // 2nd row
+      m_5.setElement( 2, i, 1 );
 
-    m_5.setElement( 2, 4, 1 );           // 2nd row
+   m_5.setElement( 2, 4, 1 );           // 2nd row
 
-    for( IndexType i = 1; i < 5; i++ )            // 3rd row
-        m_5.setElement( 3, i, 1 );
+   for( IndexType i = 1; i < 5; i++ )            // 3rd row
+      m_5.setElement( 3, i, 1 );
 
-    m_5.setElement( 4, 1, 1 );           // 4th row
+   m_5.setElement( 4, 1, 1 );           // 4th row
 
-    for( IndexType i = 1; i < 7; i++ )            // 5th row
-        m_5.setElement( 5, i, 1 );
+   for( IndexType i = 1; i < 7; i++ )            // 5th row
+      m_5.setElement( 5, i, 1 );
 
-    for( IndexType i = 0; i < 7; i++ )            // 6th row
-        m_5.setElement( 6, i, 1 );
+   for( IndexType i = 0; i < 7; i++ )            // 6th row
+      m_5.setElement( 6, i, 1 );
 
-    for( IndexType i = 0; i < 8; i++ )            // 7th row
-        m_5.setElement( 7, i, 1 );
+   for( IndexType i = 0; i < 8; i++ )            // 7th row
+      m_5.setElement( 7, i, 1 );
 
-    for( IndexType i = 0; i < 7; i++ )            // 1s at the end of rows
-        m_5.setElement( i, 7, 1);
+   for( IndexType i = 0; i < 7; i++ )            // 1s at the end of rows
+      m_5.setElement( i, 7, 1);
 
-    VectorType inVector_5;
-    inVector_5.setSize( m_cols_5 );
-    for( IndexType i = 0; i < inVector_5.getSize(); i++ )
-        inVector_5.setElement( i, 2 );
+   VectorType inVector_5( m_cols_5 );
+   inVector_5 = 2.0;
 
-    VectorType outVector_5;
-    outVector_5.setSize( m_rows_5 );
-    for( IndexType j = 0; j < outVector_5.getSize(); j++ )
-        outVector_5.setElement( j, 0 );
+   VectorType outVector_5( m_rows_5 );
+   outVector_5 = 0.0;
 
-    m_5.vectorProduct( inVector_5, outVector_5 );
+   m_5.vectorProduct( inVector_5, outVector_5 );
 
-    EXPECT_EQ( outVector_5.getElement( 0 ), 1 );
-    EXPECT_EQ( outVector_5.getElement( 1 ), 1 );
-    EXPECT_EQ( outVector_5.getElement( 2 ), 1 );
-    EXPECT_EQ( outVector_5.getElement( 3 ), 1 );
-    EXPECT_EQ( outVector_5.getElement( 4 ), 1 );
-    EXPECT_EQ( outVector_5.getElement( 5 ), 1 );
-    EXPECT_EQ( outVector_5.getElement( 6 ), 1 );
-    EXPECT_EQ( outVector_5.getElement( 7 ), 1 );
+   EXPECT_EQ( outVector_5.getElement( 0 ), 12 );
+   EXPECT_EQ( outVector_5.getElement( 1 ),  6 );
+   EXPECT_EQ( outVector_5.getElement( 2 ),  8 );
+   EXPECT_EQ( outVector_5.getElement( 3 ), 10 );
+   EXPECT_EQ( outVector_5.getElement( 4 ),  4 );
+   EXPECT_EQ( outVector_5.getElement( 5 ), 14 );
+   EXPECT_EQ( outVector_5.getElement( 6 ), 16 );
+   EXPECT_EQ( outVector_5.getElement( 7 ), 16 );
 }
 
 template< typename Matrix >
@@ -974,21 +914,20 @@ void test_RowsReduction()
    /*
     * Sets up the following 8x8 sparse matrix:
     *
-    *    /  1  2  3  0  4  5  0  1 \   6
-    *    |  0  6  0  7  0  0  0  1 |   3
-    *    |  0  8  9  0 10  0  0  1 |   4
-    *    |  0 11 12 13 14  0  0  1 |   5
-    *    |  0 15  0  0  0  0  0  1 |   2
-    *    |  0 16 17 18 19 20 21  1 |   7
-    *    | 22 23 24 25 26 27 28  1 |   8
-    *    \ 29 30 31 32 33 34 35 36 /   8
+    *    /  1  1  1  0  1  1  0  1 \   6
+    *    |  0  1  0  1  0  0  0  1 |   3
+    *    |  0  1  1  0  1  0  0  1 |   4
+    *    |  0  1  1  1  1  0  0  1 |   5
+    *    |  0  1  0  0  0  0  0  1 |   2
+    *    |  0  1  1  1  1  1  1  1 |   7
+    *    |  1  1  1  1  1  1  1  1 |   8
+    *    \  1  1  1  1  1  1  1  1 /   8
     */
 
    const IndexType rows = 8;
    const IndexType cols = 8;
 
-   Matrix m;
-   m.setDimensions( rows, cols );
+   Matrix m( rows, cols );
    typename Matrix::RowsCapacitiesType rowsCapacities( rows );
    //rowLengths.setSize( rows );
    rowsCapacities.setElement(0, 6);
@@ -1065,87 +1004,85 @@ void test_RowsReduction()
    };
    m.allRowsReduction( max_fetch, max_reduce, max_keep, 0 );
    const RealType maxNorm = TNL::max( rowSums );
-   EXPECT_EQ( maxNorm, 260 ) ; // 29+30+31+32+33+34+35+36
+   EXPECT_EQ( maxNorm, 8 ) ; // 29+30+31+32+33+34+35+36
 }
 
 template< typename Matrix >
 void test_PerformSORIteration()
 {
-    using RealType = typename Matrix::RealType;
-    using DeviceType = typename Matrix::DeviceType;
-    using IndexType = typename Matrix::IndexType;
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
 
-/*
- * Sets up the following 4x4 sparse matrix:
- *
- *    /  4  1  0  0 \
- *    |  1  4  1  0 |
- *    |  0  1  4  1 |
- *    \  0  0  1  4 /
- */
+   /*
+    * Sets up the following 4x4 sparse matrix:
+    *
+    *    /  1  1  0  0 \
+    *    |  1  1  1  0 |
+    *    |  0  1  1  1 |
+    *    \  0  0  1  1 /
+    */
 
-    const IndexType m_rows = 4;
-    const IndexType m_cols = 4;
+   const IndexType m_rows = 4;
+   const IndexType m_cols = 4;
 
-    Matrix m;
-    m.reset();
-    m.setDimensions( m_rows, m_cols );
-    typename Matrix::CompressedRowLengthsVector rowLengths;
-    rowLengths.setSize( m_rows );
-    rowLengths.setValue( 3 );
-    m.setCompressedRowLengths( rowLengths );
+   Matrix m( m_rows, m_cols );
+   typename Matrix::CompressedRowLengthsVector rowLengths;
+   rowLengths.setSize( m_rows );
+   rowLengths.setValue( 3 );
+   m.setCompressedRowLengths( rowLengths );
 
-    m.setElement( 0, 0, 4.0 );        // 0th row
-    m.setElement( 0, 1, 1.0);
+   m.setElement( 0, 0, 4.0 );        // 0th row
+   m.setElement( 0, 1, 1.0);
 
-    m.setElement( 1, 0, 1.0 );        // 1st row
-    m.setElement( 1, 1, 4.0 );
-    m.setElement( 1, 2, 1.0 );
+   m.setElement( 1, 0, 1.0 );        // 1st row
+   m.setElement( 1, 1, 4.0 );
+   m.setElement( 1, 2, 1.0 );
 
-    m.setElement( 2, 1, 1.0 );        // 2nd row
-    m.setElement( 2, 2, 4.0 );
-    m.setElement( 2, 3, 1.0 );
+   m.setElement( 2, 1, 1.0 );        // 2nd row
+   m.setElement( 2, 2, 4.0 );
+   m.setElement( 2, 3, 1.0 );
 
-    m.setElement( 3, 2, 1.0 );        // 3rd row
-    m.setElement( 3, 3, 4.0 );
+   m.setElement( 3, 2, 1.0 );        // 3rd row
+   m.setElement( 3, 3, 4.0 );
 
-    RealType bVector [ 4 ] = { 1, 1, 1, 1 };
-    RealType xVector [ 4 ] = { 1, 1, 1, 1 };
+   RealType bVector [ 4 ] = { 1, 1, 1, 1 };
+   RealType xVector [ 4 ] = { 1, 1, 1, 1 };
 
-    IndexType row = 0;
-    RealType omega = 1;
+   IndexType row = 0;
+   RealType omega = 1;
 
 
-    m.performSORIteration( bVector, row++, xVector, omega);
+   m.performSORIteration( bVector, row++, xVector, omega);
 
-    EXPECT_EQ( xVector[ 0 ], 0.0 );
-    EXPECT_EQ( xVector[ 1 ], 1.0 );
-    EXPECT_EQ( xVector[ 2 ], 1.0 );
-    EXPECT_EQ( xVector[ 3 ], 1.0 );
+   EXPECT_EQ( xVector[ 0 ], 0.0 );
+   EXPECT_EQ( xVector[ 1 ], 1.0 );
+   EXPECT_EQ( xVector[ 2 ], 1.0 );
+   EXPECT_EQ( xVector[ 3 ], 1.0 );
 
 
-    m.performSORIteration( bVector, row++, xVector, omega);
+   m.performSORIteration( bVector, row++, xVector, omega);
 
-    EXPECT_EQ( xVector[ 0 ], 0.0 );
-    EXPECT_EQ( xVector[ 1 ], 0.0 );
-    EXPECT_EQ( xVector[ 2 ], 1.0 );
-    EXPECT_EQ( xVector[ 3 ], 1.0 );
+   EXPECT_EQ( xVector[ 0 ], 0.0 );
+   EXPECT_EQ( xVector[ 1 ], 0.0 );
+   EXPECT_EQ( xVector[ 2 ], 1.0 );
+   EXPECT_EQ( xVector[ 3 ], 1.0 );
 
 
-    m.performSORIteration( bVector, row++, xVector, omega);
+   m.performSORIteration( bVector, row++, xVector, omega);
 
-    EXPECT_EQ( xVector[ 0 ], 0.0 );
-    EXPECT_EQ( xVector[ 1 ], 0.0 );
-    EXPECT_EQ( xVector[ 2 ], 0.0 );
-    EXPECT_EQ( xVector[ 3 ], 1.0 );
+   EXPECT_EQ( xVector[ 0 ], 0.0 );
+   EXPECT_EQ( xVector[ 1 ], 0.0 );
+   EXPECT_EQ( xVector[ 2 ], 0.0 );
+   EXPECT_EQ( xVector[ 3 ], 1.0 );
 
 
-    m.performSORIteration( bVector, row++, xVector, omega);
+   m.performSORIteration( bVector, row++, xVector, omega);
 
-    EXPECT_EQ( xVector[ 0 ], 0.0 );
-    EXPECT_EQ( xVector[ 1 ], 0.0 );
-    EXPECT_EQ( xVector[ 2 ], 0.0 );
-    EXPECT_EQ( xVector[ 3 ], 0.25 );
+   EXPECT_EQ( xVector[ 0 ], 0.0 );
+   EXPECT_EQ( xVector[ 1 ], 0.0 );
+   EXPECT_EQ( xVector[ 2 ], 0.0 );
+   EXPECT_EQ( xVector[ 3 ], 0.25 );
 }
 
 // This test is only for AdEllpack
@@ -1157,256 +1094,256 @@ void test_OperatorEquals()
    using IndexType = typename Matrix::IndexType;
 
    if( std::is_same< DeviceType, TNL::Devices::Cuda >::value )
-       return;
+      return;
    else
    {
-       using AdELL_host = TNL::Matrices::AdEllpack< RealType, TNL::Devices::Host, IndexType >;
-       using AdELL_cuda = TNL::Matrices::AdEllpack< RealType, TNL::Devices::Cuda, IndexType >;
-
-       /*
-        * Sets up the following 8x8 sparse matrix:
-        *
-        *    /  1  2  3  0  4  5  0  1 \   6
-        *    |  0  6  0  7  0  0  0  1 |   3
-        *    |  0  8  9  0 10  0  0  1 |   4
-        *    |  0 11 12 13 14  0  0  1 |   5
-        *    |  0 15  0  0  0  0  0  1 |   2
-        *    |  0 16 17 18 19 20 21  1 |   7
-        *    | 22 23 24 25 26 27 28  1 |   8
-        *    \ 29 30 31 32 33 34 35 36 /   8
-        */
-
-        const IndexType m_rows = 8;
-        const IndexType m_cols = 8;
-
-        AdELL_host m_host;
-
-        m_host.reset();
-        m_host.setDimensions( m_rows, m_cols );
-        typename AdELL_host::CompressedRowLengthsVector rowLengths;
-        rowLengths.setSize( m_rows );
-        rowLengths.setElement(0, 6);
-        rowLengths.setElement(1, 3);
-        rowLengths.setElement(2, 4);
-        rowLengths.setElement(3, 5);
-        rowLengths.setElement(4, 2);
-        rowLengths.setElement(5, 7);
-        rowLengths.setElement(6, 8);
-        rowLengths.setElement(7, 8);
-        m_host.setCompressedRowLengths( rowLengths );
-
-        RealType value = 1;
-        for( IndexType i = 0; i < 3; i++ )   // 0th row
-            m_host.setElement( 0, i, value++ );
-
-        m_host.setElement( 0, 4, value++ );           // 0th row
-        m_host.setElement( 0, 5, value++ );
-
-        m_host.setElement( 1, 1, value++ );           // 1st row
-        m_host.setElement( 1, 3, value++ );
-
-        for( IndexType i = 1; i < 3; i++ )            // 2nd row
-            m_host.setElement( 2, i, value++ );
-
-        m_host.setElement( 2, 4, value++ );           // 2nd row
-
-
-        for( IndexType i = 1; i < 5; i++ )            // 3rd row
-            m_host.setElement( 3, i, value++ );
-
-        m_host.setElement( 4, 1, value++ );           // 4th row
-
-        for( IndexType i = 1; i < 7; i++ )            // 5th row
-            m_host.setElement( 5, i, value++ );
-
-        for( IndexType i = 0; i < 7; i++ )            // 6th row
-            m_host.setElement( 6, i, value++ );
-
-        for( IndexType i = 0; i < 8; i++ )            // 7th row
-            m_host.setElement( 7, i, value++ );
-
-        for( IndexType i = 0; i < 7; i++ )            // 1s at the end or rows: 5, 6
-            m_host.setElement( i, 7, 1);
-
-        EXPECT_EQ( m_host.getElement( 0, 0 ),  1 );
-        EXPECT_EQ( m_host.getElement( 0, 1 ),  2 );
-        EXPECT_EQ( m_host.getElement( 0, 2 ),  3 );
-        EXPECT_EQ( m_host.getElement( 0, 3 ),  0 );
-        EXPECT_EQ( m_host.getElement( 0, 4 ),  4 );
-        EXPECT_EQ( m_host.getElement( 0, 5 ),  5 );
-        EXPECT_EQ( m_host.getElement( 0, 6 ),  0 );
-        EXPECT_EQ( m_host.getElement( 0, 7 ),  1 );
-
-        EXPECT_EQ( m_host.getElement( 1, 0 ),  0 );
-        EXPECT_EQ( m_host.getElement( 1, 1 ),  6 );
-        EXPECT_EQ( m_host.getElement( 1, 2 ),  0 );
-        EXPECT_EQ( m_host.getElement( 1, 3 ),  7 );
-        EXPECT_EQ( m_host.getElement( 1, 4 ),  0 );
-        EXPECT_EQ( m_host.getElement( 1, 5 ),  0 );
-        EXPECT_EQ( m_host.getElement( 1, 6 ),  0 );
-        EXPECT_EQ( m_host.getElement( 1, 7 ),  1 );
-
-        EXPECT_EQ( m_host.getElement( 2, 0 ),  0 );
-        EXPECT_EQ( m_host.getElement( 2, 1 ),  8 );
-        EXPECT_EQ( m_host.getElement( 2, 2 ),  9 );
-        EXPECT_EQ( m_host.getElement( 2, 3 ),  0 );
-        EXPECT_EQ( m_host.getElement( 2, 4 ), 10 );
-        EXPECT_EQ( m_host.getElement( 2, 5 ),  0 );
-        EXPECT_EQ( m_host.getElement( 2, 6 ),  0 );
-        EXPECT_EQ( m_host.getElement( 2, 7 ),  1 );
-
-        EXPECT_EQ( m_host.getElement( 3, 0 ),  0 );
-        EXPECT_EQ( m_host.getElement( 3, 1 ), 11 );
-        EXPECT_EQ( m_host.getElement( 3, 2 ), 12 );
-        EXPECT_EQ( m_host.getElement( 3, 3 ), 13 );
-        EXPECT_EQ( m_host.getElement( 3, 4 ), 14 );
-        EXPECT_EQ( m_host.getElement( 3, 5 ),  0 );
-        EXPECT_EQ( m_host.getElement( 3, 6 ),  0 );
-        EXPECT_EQ( m_host.getElement( 3, 7 ),  1 );
-
-        EXPECT_EQ( m_host.getElement( 4, 0 ),  0 );
-        EXPECT_EQ( m_host.getElement( 4, 1 ), 15 );
-        EXPECT_EQ( m_host.getElement( 4, 2 ),  0 );
-        EXPECT_EQ( m_host.getElement( 4, 3 ),  0 );
-        EXPECT_EQ( m_host.getElement( 4, 4 ),  0 );
-        EXPECT_EQ( m_host.getElement( 4, 5 ),  0 );
-        EXPECT_EQ( m_host.getElement( 4, 6 ),  0 );
-        EXPECT_EQ( m_host.getElement( 4, 7 ),  1 );
-
-        EXPECT_EQ( m_host.getElement( 5, 0 ),  0 );
-        EXPECT_EQ( m_host.getElement( 5, 1 ), 16 );
-        EXPECT_EQ( m_host.getElement( 5, 2 ), 17 );
-        EXPECT_EQ( m_host.getElement( 5, 3 ), 18 );
-        EXPECT_EQ( m_host.getElement( 5, 4 ), 19 );
-        EXPECT_EQ( m_host.getElement( 5, 5 ), 20 );
-        EXPECT_EQ( m_host.getElement( 5, 6 ), 21 );
-        EXPECT_EQ( m_host.getElement( 5, 7 ),  1 );
-
-        EXPECT_EQ( m_host.getElement( 6, 0 ), 22 );
-        EXPECT_EQ( m_host.getElement( 6, 1 ), 23 );
-        EXPECT_EQ( m_host.getElement( 6, 2 ), 24 );
-        EXPECT_EQ( m_host.getElement( 6, 3 ), 25 );
-        EXPECT_EQ( m_host.getElement( 6, 4 ), 26 );
-        EXPECT_EQ( m_host.getElement( 6, 5 ), 27 );
-        EXPECT_EQ( m_host.getElement( 6, 6 ), 28 );
-        EXPECT_EQ( m_host.getElement( 6, 7 ),  1 );
-
-        EXPECT_EQ( m_host.getElement( 7, 0 ), 29 );
-        EXPECT_EQ( m_host.getElement( 7, 1 ), 30 );
-        EXPECT_EQ( m_host.getElement( 7, 2 ), 31 );
-        EXPECT_EQ( m_host.getElement( 7, 3 ), 32 );
-        EXPECT_EQ( m_host.getElement( 7, 4 ), 33 );
-        EXPECT_EQ( m_host.getElement( 7, 5 ), 34 );
-        EXPECT_EQ( m_host.getElement( 7, 6 ), 35 );
-        EXPECT_EQ( m_host.getElement( 7, 7 ), 36 );
-
-        AdELL_cuda m_cuda;
-
-        // Copy the host matrix into the cuda matrix
-        m_cuda = m_host;
-
-        // Reset the host matrix
-        m_host.reset();
-
-        // Copy the cuda matrix back into the host matrix
-        m_host = m_cuda;
-
-        // Check the newly created double-copy host matrix
-        EXPECT_EQ( m_host.getElement( 0, 0 ),  1 );
-        EXPECT_EQ( m_host.getElement( 0, 1 ),  2 );
-        EXPECT_EQ( m_host.getElement( 0, 2 ),  3 );
-        EXPECT_EQ( m_host.getElement( 0, 3 ),  0 );
-        EXPECT_EQ( m_host.getElement( 0, 4 ),  4 );
-        EXPECT_EQ( m_host.getElement( 0, 5 ),  5 );
-        EXPECT_EQ( m_host.getElement( 0, 6 ),  0 );
-        EXPECT_EQ( m_host.getElement( 0, 7 ),  1 );
-
-        EXPECT_EQ( m_host.getElement( 1, 0 ),  0 );
-        EXPECT_EQ( m_host.getElement( 1, 1 ),  6 );
-        EXPECT_EQ( m_host.getElement( 1, 2 ),  0 );
-        EXPECT_EQ( m_host.getElement( 1, 3 ),  7 );
-        EXPECT_EQ( m_host.getElement( 1, 4 ),  0 );
-        EXPECT_EQ( m_host.getElement( 1, 5 ),  0 );
-        EXPECT_EQ( m_host.getElement( 1, 6 ),  0 );
-        EXPECT_EQ( m_host.getElement( 1, 7 ),  1 );
-
-        EXPECT_EQ( m_host.getElement( 2, 0 ),  0 );
-        EXPECT_EQ( m_host.getElement( 2, 1 ),  8 );
-        EXPECT_EQ( m_host.getElement( 2, 2 ),  9 );
-        EXPECT_EQ( m_host.getElement( 2, 3 ),  0 );
-        EXPECT_EQ( m_host.getElement( 2, 4 ), 10 );
-        EXPECT_EQ( m_host.getElement( 2, 5 ),  0 );
-        EXPECT_EQ( m_host.getElement( 2, 6 ),  0 );
-        EXPECT_EQ( m_host.getElement( 2, 7 ),  1 );
-
-        EXPECT_EQ( m_host.getElement( 3, 0 ),  0 );
-        EXPECT_EQ( m_host.getElement( 3, 1 ), 11 );
-        EXPECT_EQ( m_host.getElement( 3, 2 ), 12 );
-        EXPECT_EQ( m_host.getElement( 3, 3 ), 13 );
-        EXPECT_EQ( m_host.getElement( 3, 4 ), 14 );
-        EXPECT_EQ( m_host.getElement( 3, 5 ),  0 );
-        EXPECT_EQ( m_host.getElement( 3, 6 ),  0 );
-        EXPECT_EQ( m_host.getElement( 3, 7 ),  1 );
-
-        EXPECT_EQ( m_host.getElement( 4, 0 ),  0 );
-        EXPECT_EQ( m_host.getElement( 4, 1 ), 15 );
-        EXPECT_EQ( m_host.getElement( 4, 2 ),  0 );
-        EXPECT_EQ( m_host.getElement( 4, 3 ),  0 );
-        EXPECT_EQ( m_host.getElement( 4, 4 ),  0 );
-        EXPECT_EQ( m_host.getElement( 4, 5 ),  0 );
-        EXPECT_EQ( m_host.getElement( 4, 6 ),  0 );
-        EXPECT_EQ( m_host.getElement( 4, 7 ),  1 );
-
-        EXPECT_EQ( m_host.getElement( 5, 0 ),  0 );
-        EXPECT_EQ( m_host.getElement( 5, 1 ), 16 );
-        EXPECT_EQ( m_host.getElement( 5, 2 ), 17 );
-        EXPECT_EQ( m_host.getElement( 5, 3 ), 18 );
-        EXPECT_EQ( m_host.getElement( 5, 4 ), 19 );
-        EXPECT_EQ( m_host.getElement( 5, 5 ), 20 );
-        EXPECT_EQ( m_host.getElement( 5, 6 ), 21 );
-        EXPECT_EQ( m_host.getElement( 5, 7 ),  1 );
-
-        EXPECT_EQ( m_host.getElement( 6, 0 ), 22 );
-        EXPECT_EQ( m_host.getElement( 6, 1 ), 23 );
-        EXPECT_EQ( m_host.getElement( 6, 2 ), 24 );
-        EXPECT_EQ( m_host.getElement( 6, 3 ), 25 );
-        EXPECT_EQ( m_host.getElement( 6, 4 ), 26 );
-        EXPECT_EQ( m_host.getElement( 6, 5 ), 27 );
-        EXPECT_EQ( m_host.getElement( 6, 6 ), 28 );
-        EXPECT_EQ( m_host.getElement( 6, 7 ),  1 );
-
-        EXPECT_EQ( m_host.getElement( 7, 0 ), 29 );
-        EXPECT_EQ( m_host.getElement( 7, 1 ), 30 );
-        EXPECT_EQ( m_host.getElement( 7, 2 ), 31 );
-        EXPECT_EQ( m_host.getElement( 7, 3 ), 32 );
-        EXPECT_EQ( m_host.getElement( 7, 4 ), 33 );
-        EXPECT_EQ( m_host.getElement( 7, 5 ), 34 );
-        EXPECT_EQ( m_host.getElement( 7, 6 ), 35 );
-        EXPECT_EQ( m_host.getElement( 7, 7 ), 36 );
-
-        // Try vectorProduct with copied cuda matrix to see if it works correctly.
-        using VectorType = TNL::Containers::Vector< RealType, TNL::Devices::Cuda, IndexType >;
-
-        VectorType inVector;
-        inVector.setSize( m_cols );
-        for( IndexType i = 0; i < inVector.getSize(); i++ )
-            inVector.setElement( i, 2 );
-
-        VectorType outVector;
-        outVector.setSize( m_rows );
-        for( IndexType j = 0; j < outVector.getSize(); j++ )
-            outVector.setElement( j, 0 );
-
-        m_cuda.vectorProduct( inVector, outVector );
-
-        EXPECT_EQ( outVector.getElement( 0 ),  32 );
-        EXPECT_EQ( outVector.getElement( 1 ),  28 );
-        EXPECT_EQ( outVector.getElement( 2 ),  56 );
-        EXPECT_EQ( outVector.getElement( 3 ), 102 );
-        EXPECT_EQ( outVector.getElement( 4 ),  32 );
-        EXPECT_EQ( outVector.getElement( 5 ), 224 );
-        EXPECT_EQ( outVector.getElement( 6 ), 352 );
-        EXPECT_EQ( outVector.getElement( 7 ), 520 );
+      using AdELL_host = TNL::Matrices::AdEllpack< RealType, TNL::Devices::Host, IndexType >;
+      using AdELL_cuda = TNL::Matrices::AdEllpack< RealType, TNL::Devices::Cuda, IndexType >;
+
+      /*
+       * Sets up the following 8x8 sparse matrix:
+       *
+       *    /  1  2  3  0  4  5  0  1 \   6
+       *    |  0  6  0  7  0  0  0  1 |   3
+       *    |  0  8  9  0 10  0  0  1 |   4
+       *    |  0 11 12 13 14  0  0  1 |   5
+       *    |  0 15  0  0  0  0  0  1 |   2
+       *    |  0 16 17 18 19 20 21  1 |   7
+       *    | 22 23 24 25 26 27 28  1 |   8
+       *    \ 29 30 31 32 33 34 35 36 /   8
+       */
+
+      const IndexType m_rows = 8;
+      const IndexType m_cols = 8;
+
+      AdELL_host m_host;
+
+      m_host.reset();
+      m_host.setDimensions( m_rows, m_cols );
+      typename AdELL_host::CompressedRowLengthsVector rowLengths;
+      rowLengths.setSize( m_rows );
+      rowLengths.setElement(0, 6);
+      rowLengths.setElement(1, 3);
+      rowLengths.setElement(2, 4);
+      rowLengths.setElement(3, 5);
+      rowLengths.setElement(4, 2);
+      rowLengths.setElement(5, 7);
+      rowLengths.setElement(6, 8);
+      rowLengths.setElement(7, 8);
+      m_host.setCompressedRowLengths( rowLengths );
+
+      RealType value = 1;
+      for( IndexType i = 0; i < 3; i++ )   // 0th row
+          m_host.setElement( 0, i, value++ );
+
+      m_host.setElement( 0, 4, value++ );           // 0th row
+      m_host.setElement( 0, 5, value++ );
+
+      m_host.setElement( 1, 1, value++ );           // 1st row
+      m_host.setElement( 1, 3, value++ );
+
+      for( IndexType i = 1; i < 3; i++ )            // 2nd row
+          m_host.setElement( 2, i, value++ );
+
+      m_host.setElement( 2, 4, value++ );           // 2nd row
+
+
+      for( IndexType i = 1; i < 5; i++ )            // 3rd row
+          m_host.setElement( 3, i, value++ );
+
+      m_host.setElement( 4, 1, value++ );           // 4th row
+
+      for( IndexType i = 1; i < 7; i++ )            // 5th row
+          m_host.setElement( 5, i, value++ );
+
+      for( IndexType i = 0; i < 7; i++ )            // 6th row
+          m_host.setElement( 6, i, value++ );
+
+      for( IndexType i = 0; i < 8; i++ )            // 7th row
+          m_host.setElement( 7, i, value++ );
+
+      for( IndexType i = 0; i < 7; i++ )            // 1s at the end or rows: 5, 6
+          m_host.setElement( i, 7, 1);
+
+      EXPECT_EQ( m_host.getElement( 0, 0 ),  1 );
+      EXPECT_EQ( m_host.getElement( 0, 1 ),  2 );
+      EXPECT_EQ( m_host.getElement( 0, 2 ),  3 );
+      EXPECT_EQ( m_host.getElement( 0, 3 ),  0 );
+      EXPECT_EQ( m_host.getElement( 0, 4 ),  4 );
+      EXPECT_EQ( m_host.getElement( 0, 5 ),  5 );
+      EXPECT_EQ( m_host.getElement( 0, 6 ),  0 );
+      EXPECT_EQ( m_host.getElement( 0, 7 ),  1 );
+
+      EXPECT_EQ( m_host.getElement( 1, 0 ),  0 );
+      EXPECT_EQ( m_host.getElement( 1, 1 ),  6 );
+      EXPECT_EQ( m_host.getElement( 1, 2 ),  0 );
+      EXPECT_EQ( m_host.getElement( 1, 3 ),  7 );
+      EXPECT_EQ( m_host.getElement( 1, 4 ),  0 );
+      EXPECT_EQ( m_host.getElement( 1, 5 ),  0 );
+      EXPECT_EQ( m_host.getElement( 1, 6 ),  0 );
+      EXPECT_EQ( m_host.getElement( 1, 7 ),  1 );
+
+      EXPECT_EQ( m_host.getElement( 2, 0 ),  0 );
+      EXPECT_EQ( m_host.getElement( 2, 1 ),  8 );
+      EXPECT_EQ( m_host.getElement( 2, 2 ),  9 );
+      EXPECT_EQ( m_host.getElement( 2, 3 ),  0 );
+      EXPECT_EQ( m_host.getElement( 2, 4 ), 10 );
+      EXPECT_EQ( m_host.getElement( 2, 5 ),  0 );
+      EXPECT_EQ( m_host.getElement( 2, 6 ),  0 );
+      EXPECT_EQ( m_host.getElement( 2, 7 ),  1 );
+
+      EXPECT_EQ( m_host.getElement( 3, 0 ),  0 );
+      EXPECT_EQ( m_host.getElement( 3, 1 ), 11 );
+      EXPECT_EQ( m_host.getElement( 3, 2 ), 12 );
+      EXPECT_EQ( m_host.getElement( 3, 3 ), 13 );
+      EXPECT_EQ( m_host.getElement( 3, 4 ), 14 );
+      EXPECT_EQ( m_host.getElement( 3, 5 ),  0 );
+      EXPECT_EQ( m_host.getElement( 3, 6 ),  0 );
+      EXPECT_EQ( m_host.getElement( 3, 7 ),  1 );
+
+      EXPECT_EQ( m_host.getElement( 4, 0 ),  0 );
+      EXPECT_EQ( m_host.getElement( 4, 1 ), 15 );
+      EXPECT_EQ( m_host.getElement( 4, 2 ),  0 );
+      EXPECT_EQ( m_host.getElement( 4, 3 ),  0 );
+      EXPECT_EQ( m_host.getElement( 4, 4 ),  0 );
+      EXPECT_EQ( m_host.getElement( 4, 5 ),  0 );
+      EXPECT_EQ( m_host.getElement( 4, 6 ),  0 );
+      EXPECT_EQ( m_host.getElement( 4, 7 ),  1 );
+
+      EXPECT_EQ( m_host.getElement( 5, 0 ),  0 );
+      EXPECT_EQ( m_host.getElement( 5, 1 ), 16 );
+      EXPECT_EQ( m_host.getElement( 5, 2 ), 17 );
+      EXPECT_EQ( m_host.getElement( 5, 3 ), 18 );
+      EXPECT_EQ( m_host.getElement( 5, 4 ), 19 );
+      EXPECT_EQ( m_host.getElement( 5, 5 ), 20 );
+      EXPECT_EQ( m_host.getElement( 5, 6 ), 21 );
+      EXPECT_EQ( m_host.getElement( 5, 7 ),  1 );
+
+      EXPECT_EQ( m_host.getElement( 6, 0 ), 22 );
+      EXPECT_EQ( m_host.getElement( 6, 1 ), 23 );
+      EXPECT_EQ( m_host.getElement( 6, 2 ), 24 );
+      EXPECT_EQ( m_host.getElement( 6, 3 ), 25 );
+      EXPECT_EQ( m_host.getElement( 6, 4 ), 26 );
+      EXPECT_EQ( m_host.getElement( 6, 5 ), 27 );
+      EXPECT_EQ( m_host.getElement( 6, 6 ), 28 );
+      EXPECT_EQ( m_host.getElement( 6, 7 ),  1 );
+
+      EXPECT_EQ( m_host.getElement( 7, 0 ), 29 );
+      EXPECT_EQ( m_host.getElement( 7, 1 ), 30 );
+      EXPECT_EQ( m_host.getElement( 7, 2 ), 31 );
+      EXPECT_EQ( m_host.getElement( 7, 3 ), 32 );
+      EXPECT_EQ( m_host.getElement( 7, 4 ), 33 );
+      EXPECT_EQ( m_host.getElement( 7, 5 ), 34 );
+      EXPECT_EQ( m_host.getElement( 7, 6 ), 35 );
+      EXPECT_EQ( m_host.getElement( 7, 7 ), 36 );
+
+      AdELL_cuda m_cuda;
+
+      // Copy the host matrix into the cuda matrix
+      m_cuda = m_host;
+
+      // Reset the host matrix
+      m_host.reset();
+
+      // Copy the cuda matrix back into the host matrix
+      m_host = m_cuda;
+
+      // Check the newly created double-copy host matrix
+      EXPECT_EQ( m_host.getElement( 0, 0 ),  1 );
+      EXPECT_EQ( m_host.getElement( 0, 1 ),  2 );
+      EXPECT_EQ( m_host.getElement( 0, 2 ),  3 );
+      EXPECT_EQ( m_host.getElement( 0, 3 ),  0 );
+      EXPECT_EQ( m_host.getElement( 0, 4 ),  4 );
+      EXPECT_EQ( m_host.getElement( 0, 5 ),  5 );
+      EXPECT_EQ( m_host.getElement( 0, 6 ),  0 );
+      EXPECT_EQ( m_host.getElement( 0, 7 ),  1 );
+
+      EXPECT_EQ( m_host.getElement( 1, 0 ),  0 );
+      EXPECT_EQ( m_host.getElement( 1, 1 ),  6 );
+      EXPECT_EQ( m_host.getElement( 1, 2 ),  0 );
+      EXPECT_EQ( m_host.getElement( 1, 3 ),  7 );
+      EXPECT_EQ( m_host.getElement( 1, 4 ),  0 );
+      EXPECT_EQ( m_host.getElement( 1, 5 ),  0 );
+      EXPECT_EQ( m_host.getElement( 1, 6 ),  0 );
+      EXPECT_EQ( m_host.getElement( 1, 7 ),  1 );
+
+      EXPECT_EQ( m_host.getElement( 2, 0 ),  0 );
+      EXPECT_EQ( m_host.getElement( 2, 1 ),  8 );
+      EXPECT_EQ( m_host.getElement( 2, 2 ),  9 );
+      EXPECT_EQ( m_host.getElement( 2, 3 ),  0 );
+      EXPECT_EQ( m_host.getElement( 2, 4 ), 10 );
+      EXPECT_EQ( m_host.getElement( 2, 5 ),  0 );
+      EXPECT_EQ( m_host.getElement( 2, 6 ),  0 );
+      EXPECT_EQ( m_host.getElement( 2, 7 ),  1 );
+
+      EXPECT_EQ( m_host.getElement( 3, 0 ),  0 );
+      EXPECT_EQ( m_host.getElement( 3, 1 ), 11 );
+      EXPECT_EQ( m_host.getElement( 3, 2 ), 12 );
+      EXPECT_EQ( m_host.getElement( 3, 3 ), 13 );
+      EXPECT_EQ( m_host.getElement( 3, 4 ), 14 );
+      EXPECT_EQ( m_host.getElement( 3, 5 ),  0 );
+      EXPECT_EQ( m_host.getElement( 3, 6 ),  0 );
+      EXPECT_EQ( m_host.getElement( 3, 7 ),  1 );
+
+      EXPECT_EQ( m_host.getElement( 4, 0 ),  0 );
+      EXPECT_EQ( m_host.getElement( 4, 1 ), 15 );
+      EXPECT_EQ( m_host.getElement( 4, 2 ),  0 );
+      EXPECT_EQ( m_host.getElement( 4, 3 ),  0 );
+      EXPECT_EQ( m_host.getElement( 4, 4 ),  0 );
+      EXPECT_EQ( m_host.getElement( 4, 5 ),  0 );
+      EXPECT_EQ( m_host.getElement( 4, 6 ),  0 );
+      EXPECT_EQ( m_host.getElement( 4, 7 ),  1 );
+
+      EXPECT_EQ( m_host.getElement( 5, 0 ),  0 );
+      EXPECT_EQ( m_host.getElement( 5, 1 ), 16 );
+      EXPECT_EQ( m_host.getElement( 5, 2 ), 17 );
+      EXPECT_EQ( m_host.getElement( 5, 3 ), 18 );
+      EXPECT_EQ( m_host.getElement( 5, 4 ), 19 );
+      EXPECT_EQ( m_host.getElement( 5, 5 ), 20 );
+      EXPECT_EQ( m_host.getElement( 5, 6 ), 21 );
+      EXPECT_EQ( m_host.getElement( 5, 7 ),  1 );
+
+      EXPECT_EQ( m_host.getElement( 6, 0 ), 22 );
+      EXPECT_EQ( m_host.getElement( 6, 1 ), 23 );
+      EXPECT_EQ( m_host.getElement( 6, 2 ), 24 );
+      EXPECT_EQ( m_host.getElement( 6, 3 ), 25 );
+      EXPECT_EQ( m_host.getElement( 6, 4 ), 26 );
+      EXPECT_EQ( m_host.getElement( 6, 5 ), 27 );
+      EXPECT_EQ( m_host.getElement( 6, 6 ), 28 );
+      EXPECT_EQ( m_host.getElement( 6, 7 ),  1 );
+
+      EXPECT_EQ( m_host.getElement( 7, 0 ), 29 );
+      EXPECT_EQ( m_host.getElement( 7, 1 ), 30 );
+      EXPECT_EQ( m_host.getElement( 7, 2 ), 31 );
+      EXPECT_EQ( m_host.getElement( 7, 3 ), 32 );
+      EXPECT_EQ( m_host.getElement( 7, 4 ), 33 );
+      EXPECT_EQ( m_host.getElement( 7, 5 ), 34 );
+      EXPECT_EQ( m_host.getElement( 7, 6 ), 35 );
+      EXPECT_EQ( m_host.getElement( 7, 7 ), 36 );
+
+      // Try vectorProduct with copied cuda matrix to see if it works correctly.
+      using VectorType = TNL::Containers::Vector< RealType, TNL::Devices::Cuda, IndexType >;
+
+      VectorType inVector;
+      inVector.setSize( m_cols );
+      for( IndexType i = 0; i < inVector.getSize(); i++ )
+          inVector.setElement( i, 2 );
+
+      VectorType outVector;
+      outVector.setSize( m_rows );
+      for( IndexType j = 0; j < outVector.getSize(); j++ )
+          outVector.setElement( j, 0 );
+
+      m_cuda.vectorProduct( inVector, outVector );
+
+      EXPECT_EQ( outVector.getElement( 0 ),  32 );
+      EXPECT_EQ( outVector.getElement( 1 ),  28 );
+      EXPECT_EQ( outVector.getElement( 2 ),  56 );
+      EXPECT_EQ( outVector.getElement( 3 ), 102 );
+      EXPECT_EQ( outVector.getElement( 4 ),  32 );
+      EXPECT_EQ( outVector.getElement( 5 ), 224 );
+      EXPECT_EQ( outVector.getElement( 6 ), 352 );
+      EXPECT_EQ( outVector.getElement( 7 ), 520 );
    }
 }
 
@@ -1420,10 +1357,10 @@ void test_SaveAndLoad( const char* filename )
    /*
     * Sets up the following 4x4 sparse matrix:
     *
-    *    /  1  2  3  0 \
-    *    |  0  4  0  5 |
-    *    |  6  7  8  0 |
-    *    \  0  9 10 11 /
+    *    /  1  1  1  0 \
+    *    |  0  1  0  1 |
+    *    |  1  1  1  0 |
+    *    \  0  1  1  1 /
     */
 
     const IndexType m_rows = 4;
@@ -1484,24 +1421,24 @@ void test_SaveAndLoad( const char* filename )
     EXPECT_EQ( savedMatrix.getElement( 3, 3 ), loadedMatrix.getElement( 3, 3 ) );
 
     EXPECT_EQ( savedMatrix.getElement( 0, 0 ),  1 );
-    EXPECT_EQ( savedMatrix.getElement( 0, 1 ),  2 );
-    EXPECT_EQ( savedMatrix.getElement( 0, 2 ),  3 );
+    EXPECT_EQ( savedMatrix.getElement( 0, 1 ),  1 );
+    EXPECT_EQ( savedMatrix.getElement( 0, 2 ),  1 );
     EXPECT_EQ( savedMatrix.getElement( 0, 3 ),  0 );
 
     EXPECT_EQ( savedMatrix.getElement( 1, 0 ),  0 );
-    EXPECT_EQ( savedMatrix.getElement( 1, 1 ),  4 );
+    EXPECT_EQ( savedMatrix.getElement( 1, 1 ),  1 );
     EXPECT_EQ( savedMatrix.getElement( 1, 2 ),  0 );
-    EXPECT_EQ( savedMatrix.getElement( 1, 3 ),  5 );
+    EXPECT_EQ( savedMatrix.getElement( 1, 3 ),  1 );
 
-    EXPECT_EQ( savedMatrix.getElement( 2, 0 ),  6 );
-    EXPECT_EQ( savedMatrix.getElement( 2, 1 ),  7 );
-    EXPECT_EQ( savedMatrix.getElement( 2, 2 ),  8 );
+    EXPECT_EQ( savedMatrix.getElement( 2, 0 ),  1 );
+    EXPECT_EQ( savedMatrix.getElement( 2, 1 ),  1 );
+    EXPECT_EQ( savedMatrix.getElement( 2, 2 ),  1 );
     EXPECT_EQ( savedMatrix.getElement( 2, 3 ),  0 );
 
     EXPECT_EQ( savedMatrix.getElement( 3, 0 ),  0 );
-    EXPECT_EQ( savedMatrix.getElement( 3, 1 ),  9 );
-    EXPECT_EQ( savedMatrix.getElement( 3, 2 ), 10 );
-    EXPECT_EQ( savedMatrix.getElement( 3, 3 ), 11 );
+    EXPECT_EQ( savedMatrix.getElement( 3, 1 ),  1 );
+    EXPECT_EQ( savedMatrix.getElement( 3, 2 ),  1 );
+    EXPECT_EQ( savedMatrix.getElement( 3, 3 ),  1 );
 
     EXPECT_EQ( std::remove( filename ), 0 );
 }
@@ -1516,11 +1453,11 @@ void test_Print()
 /*
  * Sets up the following 5x4 sparse matrix:
  *
- *    /  1  2  3  0 \
- *    |  0  0  0  4 |
- *    |  5  6  7  0 |
- *    |  0  8  9 10 |
- *    \  0  0 11 12 /
+ *    /  1  1  1  0 \
+ *    |  0  0  0  1 |
+ *    |  1  1  1  0 |
+ *    |  0  1  1  1 |
+ *    \  0  0  1  1 /
  */
 
     const IndexType m_rows = 5;
diff --git a/src/UnitTests/Matrices/SparseMatrixTest.hpp b/src/UnitTests/Matrices/SparseMatrixTest.hpp
index b0a9fcb00..b6b5a368f 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest.hpp
+++ b/src/UnitTests/Matrices/SparseMatrixTest.hpp
@@ -284,27 +284,6 @@ void test_GetRow()
     rowLengths.setElement( 9, 10 );
     m.setCompressedRowLengths( rowLengths );
 
-    /*RealType value = 1;
-    for( IndexType i = 0; i < 4; i++ )
-        m.setElement( 0, 2 * i, value++ );
-
-    for( IndexType i = 0; i < 3; i++ )
-        m.setElement( 1, i, value++ );
-
-    for( IndexType i = 0; i < 8; i++ )
-        m.setElement( 2, i, value++ );
-
-    for( IndexType i = 0; i < 2; i++ )
-        m.setElement( 3, i, value++ );
-
-    for( IndexType i = 4; i < 8; i++ )
-        m.setElement( i, 0, value++ );
-
-    for( IndexType j = 8; j < rows; j++)
-    {
-        for( IndexType i = 0; i < cols; i++ )
-            m.setElement( j, i, value++ );
-    }*/
     auto matrixView = m.getView();
     auto f = [=] __cuda_callable__ ( const IndexType rowIdx ) mutable {
        auto row = matrixView.getRow( rowIdx );
-- 
GitLab


From 91b5371c2f1491ee6914b08dcf378335a31bff21 Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Tue, 4 Feb 2020 10:38:06 +0100
Subject: [PATCH 125/179] All binary sparse matrix unit tests pass well.

---
 src/TNL/Matrices/SparseMatrixView.hpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/TNL/Matrices/SparseMatrixView.hpp b/src/TNL/Matrices/SparseMatrixView.hpp
index 5b043753f..afc21788a 100644
--- a/src/TNL/Matrices/SparseMatrixView.hpp
+++ b/src/TNL/Matrices/SparseMatrixView.hpp
@@ -454,7 +454,10 @@ forRows( IndexType first, IndexType last, Function& function )
    const IndexType paddingIndex_ = this->getPaddingIndex();
    auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx, bool& compute ) mutable {
       if( isBinary() )
-         function( rowIdx, localIdx, columns_view[ globalIdx ], 1, compute );
+      {
+         RealType one( columns_view[ globalIdx ] != paddingIndex_ );
+         function( rowIdx, localIdx, columns_view[ globalIdx ], one, compute );
+      }
       else
          function( rowIdx, localIdx, columns_view[ globalIdx ], values_view[ globalIdx ], compute );
    };
-- 
GitLab


From 7fd796ef33d91dd538392de1b1f2afe9dfe29a5d Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Tue, 4 Feb 2020 10:48:44 +0100
Subject: [PATCH 126/179] Deleted useless general unit tests of sparse
 matrices.

---
 src/UnitTests/Matrices/CMakeLists.txt       |  8 -----
 src/UnitTests/Matrices/SparseMatrixTest.cpp | 11 ------
 src/UnitTests/Matrices/SparseMatrixTest.cu  | 11 ------
 src/UnitTests/Matrices/SparseMatrixTest.h   | 39 ---------------------
 4 files changed, 69 deletions(-)
 delete mode 100644 src/UnitTests/Matrices/SparseMatrixTest.cpp
 delete mode 100644 src/UnitTests/Matrices/SparseMatrixTest.cu
 delete mode 100644 src/UnitTests/Matrices/SparseMatrixTest.h

diff --git a/src/UnitTests/Matrices/CMakeLists.txt b/src/UnitTests/Matrices/CMakeLists.txt
index 4b12e81a3..d7259fc03 100644
--- a/src/UnitTests/Matrices/CMakeLists.txt
+++ b/src/UnitTests/Matrices/CMakeLists.txt
@@ -1,9 +1,6 @@
 ADD_SUBDIRECTORY( Legacy )
 
 IF( BUILD_CUDA )
-   CUDA_ADD_EXECUTABLE( SparseMatrixTest SparseMatrixTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
-   TARGET_LINK_LIBRARIES( SparseMatrixTest ${GTEST_BOTH_LIBRARIES} )
-
    CUDA_ADD_EXECUTABLE( SparseMatrixCopyTest SparseMatrixCopyTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
    TARGET_LINK_LIBRARIES( SparseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} )
 
@@ -32,10 +29,6 @@ IF( BUILD_CUDA )
    TARGET_LINK_LIBRARIES( BinarySparseMatrixTest ${GTEST_BOTH_LIBRARIES} )
 
 ELSE(  BUILD_CUDA )
-   ADD_EXECUTABLE( SparseMatrixTest SparseMatrixTest.cpp )
-   TARGET_COMPILE_OPTIONS( SparseMatrixTest PRIVATE ${CXX_TESTS_FLAGS} )
-   TARGET_LINK_LIBRARIES( SparseMatrixTest ${GTEST_BOTH_LIBRARIES} )
-
    ADD_EXECUTABLE( SparseMatrixCopyTest SparseMatrixCopyTest.cpp )
    TARGET_COMPILE_OPTIONS( SparseMatrixCopyTest PRIVATE ${CXX_TESTS_FLAGS} )
    TARGET_LINK_LIBRARIES( SparseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} )
@@ -74,7 +67,6 @@ ELSE(  BUILD_CUDA )
 
 ENDIF( BUILD_CUDA )
 
-ADD_TEST( SparseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( SparseMatrixCopyTest ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixCopyTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( DenseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/DenseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( TridiagonalMatrixTest ${EXECUTABLE_OUTPUT_PATH}/TridiagonalMatrixTest${CMAKE_EXECUTABLE_SUFFIX} )
diff --git a/src/UnitTests/Matrices/SparseMatrixTest.cpp b/src/UnitTests/Matrices/SparseMatrixTest.cpp
deleted file mode 100644
index 46f6b9bd3..000000000
--- a/src/UnitTests/Matrices/SparseMatrixTest.cpp
+++ /dev/null
@@ -1,11 +0,0 @@
-/***************************************************************************
-                          SparseMatrixTest.cpp -  description
-                             -------------------
-    begin                : Nov 2, 2018
-    copyright            : (C) 2018 by Tomas Oberhuber et al.
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#include "SparseMatrixTest.h"
\ No newline at end of file
diff --git a/src/UnitTests/Matrices/SparseMatrixTest.cu b/src/UnitTests/Matrices/SparseMatrixTest.cu
deleted file mode 100644
index 01c23c193..000000000
--- a/src/UnitTests/Matrices/SparseMatrixTest.cu
+++ /dev/null
@@ -1,11 +0,0 @@
-/***************************************************************************
-                          SparseMatrixTest.cu -  description
-                             -------------------
-    begin                : Nov 2, 2018
-    copyright            : (C) 2018 by Tomas Oberhuber et al.
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#include "SparseMatrixTest.h"
\ No newline at end of file
diff --git a/src/UnitTests/Matrices/SparseMatrixTest.h b/src/UnitTests/Matrices/SparseMatrixTest.h
deleted file mode 100644
index b08d66c33..000000000
--- a/src/UnitTests/Matrices/SparseMatrixTest.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/***************************************************************************
-                          SparseMatrixTest.h -  description
-                             -------------------
-    begin                : Nov 2, 2018
-    copyright            : (C) 2018 by Tomas Oberhuber et al.
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#include <TNL/Matrices/Legacy/CSR.h>
-
-#include "SparseMatrixTest.hpp"
-#include <iostream>
-
-#ifdef HAVE_GTEST 
-#include <gtest/gtest.h>
-
-using CSR_host_float = TNL::Matrices::CSR< float, TNL::Devices::Host, int >;
-using CSR_host_int = TNL::Matrices::CSR< int, TNL::Devices::Host, int >;
-
-using CSR_cuda_float = TNL::Matrices::CSR< float, TNL::Devices::Cuda, int >;
-using CSR_cuda_int = TNL::Matrices::CSR< int, TNL::Devices::Cuda, int >;
-
-TEST( SparseMatrixTest, CSR_perforSORIterationTest_Host )
-{
-    //test_PerformSORIteration< CSR_host_float >();
-}
-
-#ifdef HAVE_CUDA
-TEST( SparseMatrixTest, CSR_perforSORIterationTest_Cuda )
-{
-   //    test_PerformSORIteration< CSR_cuda_float >();
-}
-#endif
-
-#endif
-
-#include "../main.h"
-- 
GitLab


From 866062ceea3a81771c12f2a8fdb6c2c2194a6f9e Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Tue, 4 Feb 2020 12:12:58 +0100
Subject: [PATCH 127/179] Renaming SparseMatrixTest.hpp to SparseMatrixTest.h.

---
 .../Matrices/{SparseMatrixTest.hpp => SparseMatrixTest.h}       | 0
 src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h          | 2 +-
 src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h      | 2 +-
 .../Matrices/SparseMatrixTest_SlicedEllpack_segments.h          | 2 +-
 4 files changed, 3 insertions(+), 3 deletions(-)
 rename src/UnitTests/Matrices/{SparseMatrixTest.hpp => SparseMatrixTest.h} (100%)

diff --git a/src/UnitTests/Matrices/SparseMatrixTest.hpp b/src/UnitTests/Matrices/SparseMatrixTest.h
similarity index 100%
rename from src/UnitTests/Matrices/SparseMatrixTest.hpp
rename to src/UnitTests/Matrices/SparseMatrixTest.h
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h
index 5ac3dde26..1e89d544a 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h
+++ b/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h
@@ -12,7 +12,7 @@
 #include <TNL/Matrices/SparseMatrix.h>
 
 
-#include "SparseMatrixTest.hpp"
+#include "SparseMatrixTest.h"
 #include <iostream>
 
 #ifdef HAVE_GTEST
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h
index 48cf9afbf..32678c2b0 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h
+++ b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h
@@ -12,7 +12,7 @@
 #include <TNL/Matrices/SparseMatrix.h>
 
 
-#include "SparseMatrixTest.hpp"
+#include "SparseMatrixTest.h"
 #include <iostream>
 
 #ifdef HAVE_GTEST
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h
index de5356f3a..11365cc5b 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h
+++ b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h
@@ -13,7 +13,7 @@
 #include <TNL/Matrices/MatrixType.h>
 
 
-#include "SparseMatrixTest.hpp"
+#include "SparseMatrixTest.h"
 #include <iostream>
 
 #ifdef HAVE_GTEST
-- 
GitLab


From c5f2624b75c33d649173edac3c34007bcfa7cd84 Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Tue, 4 Feb 2020 12:36:01 +0100
Subject: [PATCH 128/179] Renaming sparse matrix unit tests.

---
 src/UnitTests/Matrices/CMakeLists.txt         | 50 +++++++++----------
 .../Matrices/SparseMatrixTest_CSR.cpp         |  1 +
 ...R_segments.cpp => SparseMatrixTest_CSR.cu} |  0
 ..._CSR_segments.h => SparseMatrixTest_CSR.h} |  0
 .../Matrices/SparseMatrixTest_CSR_segments.cu |  1 -
 .../Matrices/SparseMatrixTest_Ellpack.cpp     |  1 +
 ...gments.cpp => SparseMatrixTest_Ellpack.cu} |  0
 ..._segments.h => SparseMatrixTest_Ellpack.h} |  0
 .../SparseMatrixTest_Ellpack_segments.cu      |  1 -
 .../SparseMatrixTest_SlicedEllpack.cpp        |  1 +
 ....cpp => SparseMatrixTest_SlicedEllpack.cu} |  0
 ...nts.h => SparseMatrixTest_SlicedEllpack.h} |  0
 ...SparseMatrixTest_SlicedEllpack_segments.cu |  1 -
 13 files changed, 28 insertions(+), 28 deletions(-)
 create mode 100644 src/UnitTests/Matrices/SparseMatrixTest_CSR.cpp
 rename src/UnitTests/Matrices/{SparseMatrixTest_CSR_segments.cpp => SparseMatrixTest_CSR.cu} (100%)
 rename src/UnitTests/Matrices/{SparseMatrixTest_CSR_segments.h => SparseMatrixTest_CSR.h} (100%)
 delete mode 100644 src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.cu
 create mode 100644 src/UnitTests/Matrices/SparseMatrixTest_Ellpack.cpp
 rename src/UnitTests/Matrices/{SparseMatrixTest_Ellpack_segments.cpp => SparseMatrixTest_Ellpack.cu} (100%)
 rename src/UnitTests/Matrices/{SparseMatrixTest_Ellpack_segments.h => SparseMatrixTest_Ellpack.h} (100%)
 delete mode 100644 src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.cu
 create mode 100644 src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cpp
 rename src/UnitTests/Matrices/{SparseMatrixTest_SlicedEllpack_segments.cpp => SparseMatrixTest_SlicedEllpack.cu} (100%)
 rename src/UnitTests/Matrices/{SparseMatrixTest_SlicedEllpack_segments.h => SparseMatrixTest_SlicedEllpack.h} (100%)
 delete mode 100644 src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.cu

diff --git a/src/UnitTests/Matrices/CMakeLists.txt b/src/UnitTests/Matrices/CMakeLists.txt
index d7259fc03..c4b2fabd3 100644
--- a/src/UnitTests/Matrices/CMakeLists.txt
+++ b/src/UnitTests/Matrices/CMakeLists.txt
@@ -1,9 +1,6 @@
 ADD_SUBDIRECTORY( Legacy )
 
 IF( BUILD_CUDA )
-   CUDA_ADD_EXECUTABLE( SparseMatrixCopyTest SparseMatrixCopyTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
-   TARGET_LINK_LIBRARIES( SparseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} )
-
    CUDA_ADD_EXECUTABLE( DenseMatrixTest DenseMatrixTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
    TARGET_LINK_LIBRARIES( DenseMatrixTest ${GTEST_BOTH_LIBRARIES} )
 
@@ -16,23 +13,22 @@ IF( BUILD_CUDA )
    CUDA_ADD_EXECUTABLE( MultidiagonalMatrixTest MultidiagonalMatrixTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
    TARGET_LINK_LIBRARIES( MultidiagonalMatrixTest ${GTEST_BOTH_LIBRARIES} )
 
-   CUDA_ADD_EXECUTABLE( SparseMatrixTest_CSR_segments SparseMatrixTest_CSR_segments.cu OPTIONS ${CXX_TESTS_FLAGS} )
-   TARGET_LINK_LIBRARIES( SparseMatrixTest_CSR_segments ${GTEST_BOTH_LIBRARIES} )
+   CUDA_ADD_EXECUTABLE( SparseMatrixTest_CSR SparseMatrixTest_CSR.cu OPTIONS ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( SparseMatrixTest_CSR ${GTEST_BOTH_LIBRARIES} )
 
-   CUDA_ADD_EXECUTABLE( SparseMatrixTest_Ellpack_segments SparseMatrixTest_Ellpack_segments.cu OPTIONS ${CXX_TESTS_FLAGS} )
-   TARGET_LINK_LIBRARIES( SparseMatrixTest_Ellpack_segments ${GTEST_BOTH_LIBRARIES} )
+   CUDA_ADD_EXECUTABLE( SparseMatrixTest_Ellpack SparseMatrixTest_Ellpack.cu OPTIONS ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( SparseMatrixTest_Ellpack ${GTEST_BOTH_LIBRARIES} )
 
-   CUDA_ADD_EXECUTABLE( SparseMatrixTest_SlicedEllpack_segments SparseMatrixTest_SlicedEllpack_segments.cu OPTIONS ${CXX_TESTS_FLAGS} )
-   TARGET_LINK_LIBRARIES( SparseMatrixTest_SlicedEllpack_segments ${GTEST_BOTH_LIBRARIES} )
+   CUDA_ADD_EXECUTABLE( SparseMatrixTest_SlicedEllpack SparseMatrixTest_SlicedEllpack.cu OPTIONS ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( SparseMatrixTest_SlicedEllpack ${GTEST_BOTH_LIBRARIES} )
+
+   CUDA_ADD_EXECUTABLE( SparseMatrixCopyTest SparseMatrixCopyTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( SparseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} )
 
    CUDA_ADD_EXECUTABLE( BinarySparseMatrixTest BinarySparseMatrixTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
    TARGET_LINK_LIBRARIES( BinarySparseMatrixTest ${GTEST_BOTH_LIBRARIES} )
 
 ELSE(  BUILD_CUDA )
-   ADD_EXECUTABLE( SparseMatrixCopyTest SparseMatrixCopyTest.cpp )
-   TARGET_COMPILE_OPTIONS( SparseMatrixCopyTest PRIVATE ${CXX_TESTS_FLAGS} )
-   TARGET_LINK_LIBRARIES( SparseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} )
-
    ADD_EXECUTABLE( DenseMatrixTest DenseMatrixTest.cpp )
    TARGET_COMPILE_OPTIONS( DenseMatrixTest PRIVATE ${CXX_TESTS_FLAGS} )
    TARGET_LINK_LIBRARIES( DenseMatrixTest ${GTEST_BOTH_LIBRARIES} )
@@ -49,17 +45,21 @@ ELSE(  BUILD_CUDA )
    TARGET_COMPILE_OPTIONS( MultidiagonalMatrixTest PRIVATE ${CXX_TESTS_FLAGS} )
    TARGET_LINK_LIBRARIES( MultidiagonalMatrixTest ${GTEST_BOTH_LIBRARIES} )
 
-   ADD_EXECUTABLE( SparseMatrixTest_CSR_segments SparseMatrixTest_CSR_segments.cpp )
-   TARGET_COMPILE_OPTIONS( SparseMatrixTest_CSR_segments PRIVATE ${CXX_TESTS_FLAGS} )
-   TARGET_LINK_LIBRARIES( SparseMatrixTest_CSR_segments ${GTEST_BOTH_LIBRARIES} )
+   ADD_EXECUTABLE( SparseMatrixTest_CSR SparseMatrixTest_CSR.cpp )
+   TARGET_COMPILE_OPTIONS( SparseMatrixTest_CSR PRIVATE ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( SparseMatrixTest_CSR ${GTEST_BOTH_LIBRARIES} )
+
+   ADD_EXECUTABLE( SparseMatrixTest_Ellpack SparseMatrixTest_Ellpack.cpp )
+   TARGET_COMPILE_OPTIONS( SparseMatrixTest_Ellpack PRIVATE ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( SparseMatrixTest_Ellpack ${GTEST_BOTH_LIBRARIES} )
 
-   ADD_EXECUTABLE( SparseMatrixTest_Ellpack_segments SparseMatrixTest_Ellpack_segments.cpp )
-   TARGET_COMPILE_OPTIONS( SparseMatrixTest_Ellpack_segments PRIVATE ${CXX_TESTS_FLAGS} )
-   TARGET_LINK_LIBRARIES( SparseMatrixTest_Ellpack_segments ${GTEST_BOTH_LIBRARIES} )
+   ADD_EXECUTABLE( SparseMatrixTest_SlicedEllpack SparseMatrixTest_SlicedEllpack.cpp )
+   TARGET_COMPILE_OPTIONS( SparseMatrixTest_SlicedEllpack PRIVATE ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( SparseMatrixTest_SlicedEllpack ${GTEST_BOTH_LIBRARIES} )
 
-   ADD_EXECUTABLE( SparseMatrixTest_SlicedEllpack_segments SparseMatrixTest_SlicedEllpack_segments.cpp )
-   TARGET_COMPILE_OPTIONS( SparseMatrixTest_SlicedEllpack_segments PRIVATE ${CXX_TESTS_FLAGS} )
-   TARGET_LINK_LIBRARIES( SparseMatrixTest_SlicedEllpack_segments ${GTEST_BOTH_LIBRARIES} )
+   ADD_EXECUTABLE( SparseMatrixCopyTest SparseMatrixCopyTest.cpp )
+   TARGET_COMPILE_OPTIONS( SparseMatrixCopyTest PRIVATE ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( SparseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} )
 
    ADD_EXECUTABLE( BinarySparseMatrixTest BinarySparseMatrixTest.cpp )
    TARGET_COMPILE_OPTIONS( BinarySparseMatrixTest PRIVATE ${CXX_TESTS_FLAGS} )
@@ -72,9 +72,9 @@ ADD_TEST( DenseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/DenseMatrixTest${CMAKE_EXECU
 ADD_TEST( TridiagonalMatrixTest ${EXECUTABLE_OUTPUT_PATH}/TridiagonalMatrixTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( MultidiagonalMatrixTest ${EXECUTABLE_OUTPUT_PATH}/MultidiagonalMatrixTest${CMAKE_EXECUTABLE_SUFFIX} )
 
-ADD_TEST( SparseMatrixTest_CSR_segments ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_CSR_segments${CMAKE_EXECUTABLE_SUFFIX} )
-ADD_TEST( SparseMatrixTest_Ellpack_segments ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_Ellpack_segments${CMAKE_EXECUTABLE_SUFFIX} )
-ADD_TEST( SparseMatrixTest_SlicedEllpack_segments ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_SlicedEllpack_segments${CMAKE_EXECUTABLE_SUFFIX} )
+ADD_TEST( SparseMatrixTest_CSR ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_CSR${CMAKE_EXECUTABLE_SUFFIX} )
+ADD_TEST( SparseMatrixTest_Ellpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_Ellpack${CMAKE_EXECUTABLE_SUFFIX} )
+ADD_TEST( SparseMatrixTest_SlicedEllpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_SlicedEllpack${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( BinarySparseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/BinarySparseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} )
 
 if( ${BUILD_MPI} )
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR.cpp b/src/UnitTests/Matrices/SparseMatrixTest_CSR.cpp
new file mode 100644
index 000000000..258ad2c53
--- /dev/null
+++ b/src/UnitTests/Matrices/SparseMatrixTest_CSR.cpp
@@ -0,0 +1 @@
+#include "SparseMatrixTest_CSR.h"
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.cpp b/src/UnitTests/Matrices/SparseMatrixTest_CSR.cu
similarity index 100%
rename from src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.cpp
rename to src/UnitTests/Matrices/SparseMatrixTest_CSR.cu
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_CSR.h
similarity index 100%
rename from src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h
rename to src/UnitTests/Matrices/SparseMatrixTest_CSR.h
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.cu b/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.cu
deleted file mode 100644
index 771c74b9a..000000000
--- a/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.cu
+++ /dev/null
@@ -1 +0,0 @@
-#include "SparseMatrixTest_CSR_segments.h"
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.cpp b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.cpp
new file mode 100644
index 000000000..c454706f0
--- /dev/null
+++ b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.cpp
@@ -0,0 +1 @@
+#include "SparseMatrixTest_Ellpack.h"
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.cpp b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.cu
similarity index 100%
rename from src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.cpp
rename to src/UnitTests/Matrices/SparseMatrixTest_Ellpack.cu
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.h
similarity index 100%
rename from src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h
rename to src/UnitTests/Matrices/SparseMatrixTest_Ellpack.h
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.cu b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.cu
deleted file mode 100644
index 63219e9b0..000000000
--- a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.cu
+++ /dev/null
@@ -1 +0,0 @@
-#include "SparseMatrixTest_Ellpack_segments.h"
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cpp b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cpp
new file mode 100644
index 000000000..40e2e94b8
--- /dev/null
+++ b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cpp
@@ -0,0 +1 @@
+#include "SparseMatrixTest_SlicedEllpack.h"
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.cpp b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cu
similarity index 100%
rename from src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.cpp
rename to src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cu
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.h
similarity index 100%
rename from src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h
rename to src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.h
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.cu b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.cu
deleted file mode 100644
index a88301100..000000000
--- a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.cu
+++ /dev/null
@@ -1 +0,0 @@
-#include "SparseMatrixTest_SlicedEllpack_segments.h"
-- 
GitLab


From 4126b7c320369cafc354c02df7c9091de697077a Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Tue, 4 Feb 2020 12:48:57 +0100
Subject: [PATCH 129/179] Refactoring binary sparse matrix unit tests.

---
 .../Matrices/BinarySparseMatrixTest.hpp       | 1395 +++++++----------
 1 file changed, 551 insertions(+), 844 deletions(-)

diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest.hpp b/src/UnitTests/Matrices/BinarySparseMatrixTest.hpp
index 3d1775972..8b3d8f833 100644
--- a/src/UnitTests/Matrices/BinarySparseMatrixTest.hpp
+++ b/src/UnitTests/Matrices/BinarySparseMatrixTest.hpp
@@ -13,11 +13,7 @@
 #include <TNL/Math.h>
 #include <TNL/Algorithms/ParallelFor.h>
 #include <iostream>
-
-// Temporary, until test_OperatorEquals doesn't work for all formats.
-#include <TNL/Matrices/Legacy/ChunkedEllpack.h>
-#include <TNL/Matrices/Legacy/AdEllpack.h>
-#include <TNL/Matrices/Legacy/BiEllpack.h>
+#include <sstream>
 
 #ifdef HAVE_GTEST
 #include <gtest/gtest.h>
@@ -25,92 +21,93 @@
 template< typename MatrixHostFloat, typename MatrixHostInt >
 void host_test_GetType()
 {
-    bool testRan = false;
-    EXPECT_TRUE( testRan );
-    std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n";
-    std::cerr << "This test has not been implemented properly yet.\n" << std::endl;
+   bool testRan = false;
+   EXPECT_TRUE( testRan );
+   std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n";
+   std::cerr << "This test has not been implemented properly yet.\n" << std::endl;
 }
 
 template< typename MatrixCudaFloat, typename MatrixCudaInt >
 void cuda_test_GetType()
 {
-    bool testRan = false;
-    EXPECT_TRUE( testRan );
-    std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n";
-    std::cerr << "This test has not been implemented properly yet.\n" << std::endl;
+   bool testRan = false;
+   EXPECT_TRUE( testRan );
+   std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n";
+   std::cerr << "This test has not been implemented properly yet.\n" << std::endl;
 }
 
 template< typename Matrix >
 void test_SetDimensions()
 {
-    using RealType = typename Matrix::RealType;
-    using DeviceType = typename Matrix::DeviceType;
-    using IndexType = typename Matrix::IndexType;
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
 
-    const IndexType rows = 9;
-    const IndexType cols = 8;
+   const IndexType rows = 9;
+   const IndexType cols = 8;
 
-    Matrix m;
-    m.setDimensions( rows, cols );
+   Matrix m;
+   m.setDimensions( rows, cols );
+
+   EXPECT_EQ( m.getRows(), 9 );
+   EXPECT_EQ( m.getColumns(), 8 );
 
-    EXPECT_EQ( m.getRows(), 9 );
-    EXPECT_EQ( m.getColumns(), 8 );
+   Matrix m2( rows, cols );
+   EXPECT_EQ( m2.getRows(), 9 );
+   EXPECT_EQ( m2.getColumns(), 8 );
 }
 
 template< typename Matrix >
 void test_SetCompressedRowLengths()
 {
-    using RealType = typename Matrix::RealType;
-    using DeviceType = typename Matrix::DeviceType;
-    using IndexType = typename Matrix::IndexType;
-
-    const IndexType rows = 10;
-    const IndexType cols = 11;
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
 
-    Matrix m;
-    m.reset();
-    m.setDimensions( rows, cols );
-    typename Matrix::CompressedRowLengthsVector rowLengths;
-    rowLengths.setSize( rows );
-    rowLengths.setValue( 3 );
+   const IndexType rows = 10;
+   const IndexType cols = 11;
 
-    IndexType rowLength = 1;
-    for( IndexType i = 2; i < rows; i++ )
-        rowLengths.setElement( i, rowLength++ );
+   Matrix m( rows, cols );
+   typename Matrix::CompressedRowLengthsVector rowLengths;
+   rowLengths.setSize( rows );
+   rowLengths.setValue( 3 );
 
-    m.setCompressedRowLengths( rowLengths );
+   IndexType rowLength = 1;
+   for( IndexType i = 2; i < rows; i++ )
+      rowLengths.setElement( i, rowLength++ );
 
-    // Insert values into the rows.
+   m.setCompressedRowLengths( rowLengths );
 
-    for( IndexType i = 0; i < 3; i++ )      // 0th row
-        m.setElement( 0, i, 1 );
+   // Insert values into the rows.
+   for( IndexType i = 0; i < 3; i++ )      // 0th row
+      m.setElement( 0, i, 1 );
 
-    for( IndexType i = 0; i < 3; i++ )      // 1st row
-        m.setElement( 1, i, 1 );
+   for( IndexType i = 0; i < 3; i++ )      // 1st row
+      m.setElement( 1, i, 1 );
 
-    for( IndexType i = 0; i < 1; i++ )      // 2nd row
-        m.setElement( 2, i, 1 );
+   for( IndexType i = 0; i < 1; i++ )      // 2nd row
+      m.setElement( 2, i, 1 );
 
-    for( IndexType i = 0; i < 2; i++ )      // 3rd row
-        m.setElement( 3, i, 1 );
+   for( IndexType i = 0; i < 2; i++ )      // 3rd row
+      m.setElement( 3, i, 1 );
 
-    for( IndexType i = 0; i < 3; i++ )      // 4th row
-        m.setElement( 4, i, 1 );
+   for( IndexType i = 0; i < 3; i++ )      // 4th row
+      m.setElement( 4, i, 1 );
 
-    for( IndexType i = 0; i < 4; i++ )      // 5th row
-        m.setElement( 5, i, 1 );
+   for( IndexType i = 0; i < 4; i++ )      // 5th row
+      m.setElement( 5, i, 1 );
 
-    for( IndexType i = 0; i < 5; i++ )      // 6th row
-        m.setElement( 6, i, 1 );
+   for( IndexType i = 0; i < 5; i++ )      // 6th row
+      m.setElement( 6, i, 1 );
 
-    for( IndexType i = 0; i < 6; i++ )      // 7th row
-        m.setElement( 7, i, 1 );
+   for( IndexType i = 0; i < 6; i++ )      // 7th row
+      m.setElement( 7, i, 1 );
 
-    for( IndexType i = 0; i < 7; i++ )      // 8th row
-        m.setElement( 8, i, 1 );
+   for( IndexType i = 0; i < 7; i++ )      // 8th row
+      m.setElement( 8, i, 1 );
 
-    for( IndexType i = 0; i < 8; i++ )      // 9th row
-        m.setElement( 9, i, 1 );
+   for( IndexType i = 0; i < 8; i++ )      // 9th row
+      m.setElement( 9, i, 1 );
 
    rowLengths = 0;
    m.getCompressedRowLengths( rowLengths );
@@ -121,26 +118,20 @@ void test_SetCompressedRowLengths()
 template< typename Matrix1, typename Matrix2 >
 void test_SetLike()
 {
-    using RealType = typename Matrix1::RealType;
-    using DeviceType = typename Matrix1::DeviceType;
-    using IndexType = typename Matrix1::IndexType;
-
-    const IndexType rows = 8;
-    const IndexType cols = 7;
-
-    Matrix1 m1;
-    m1.reset();
-    m1.setDimensions( rows + 1, cols + 2 );
+   using RealType = typename Matrix1::RealType;
+   using DeviceType = typename Matrix1::DeviceType;
+   using IndexType = typename Matrix1::IndexType;
 
-    Matrix2 m2;
-    m2.reset();
-    m2.setDimensions( rows, cols );
+   const IndexType rows = 8;
+   const IndexType cols = 7;
 
-    m1.setLike( m2 );
+   Matrix1 m1( rows + 1, cols + 2 );
+   Matrix2 m2( rows, cols );
 
+   m1.setLike( m2 );
 
-    EXPECT_EQ( m1.getRows(), m2.getRows() );
-    EXPECT_EQ( m1.getColumns(), m2.getColumns() );
+   EXPECT_EQ( m1.getRows(), m2.getRows() );
+   EXPECT_EQ( m1.getColumns(), m2.getColumns() );
 }
 
 template< typename Matrix >
@@ -168,10 +159,7 @@ void test_GetNumberOfNonzeroMatrixElements()
    const IndexType rows = 10;
    const IndexType cols = 10;
 
-   Matrix m;
-   m.reset();
-
-   m.setDimensions( rows, cols );
+   Matrix m( rows, cols );
 
    typename Matrix::CompressedRowLengthsVector rowLengths;
    rowLengths.setSize( rows );
@@ -180,9 +168,8 @@ void test_GetNumberOfNonzeroMatrixElements()
    rowLengths.setElement( 2, 1 );
    rowLengths.setElement( 3, 1 );
    for( IndexType i = 4; i < rows - 2; i++ )
-   {
       rowLengths.setElement( i, 1 );
-   }
+
    rowLengths.setElement( 8, 1 );
    rowLengths.setElement( 9, 1 );
    m.setCompressedRowLengths( rowLengths );
@@ -203,10 +190,8 @@ void test_GetNumberOfNonzeroMatrixElements()
       m.setElement( i, 0, 1 );
 
    for( IndexType j = 8; j < rows; j++)
-   {
       for( IndexType i = 0; i < cols; i++ )
          m.setElement( j, i, 1 );
-   }
 
    EXPECT_EQ( m.getNumberOfNonzeroMatrixElements(), 41 );
 }
@@ -214,405 +199,395 @@ void test_GetNumberOfNonzeroMatrixElements()
 template< typename Matrix >
 void test_Reset()
 {
-    using RealType = typename Matrix::RealType;
-    using DeviceType = typename Matrix::DeviceType;
-    using IndexType = typename Matrix::IndexType;
-
-/*
- * Sets up the following 5x4 sparse matrix:
- *
- *    /  0  0  0  0 \
- *    |  0  0  0  0 |
- *    |  0  0  0  0 |
- *    |  0  0  0  0 |
- *    \  0  0  0  0 /
- */
-
-    const IndexType rows = 5;
-    const IndexType cols = 4;
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
 
-    Matrix m;
-    m.setDimensions( rows, cols );
+   /*
+    * Sets up the following 5x4 sparse matrix:
+    *
+    *    /  0  0  0  0 \
+    *    |  0  0  0  0 |
+    *    |  0  0  0  0 |
+    *    |  0  0  0  0 |
+    *    \  0  0  0  0 /
+    */
 
-    m.reset();
+   const IndexType rows = 5;
+   const IndexType cols = 4;
 
+   Matrix m( rows, cols );
+   m.reset();
 
-    EXPECT_EQ( m.getRows(), 0 );
-    EXPECT_EQ( m.getColumns(), 0 );
+   EXPECT_EQ( m.getRows(), 0 );
+   EXPECT_EQ( m.getColumns(), 0 );
 }
 
 template< typename Matrix >
 void test_GetRow()
 {
-    using RealType = typename Matrix::RealType;
-    using DeviceType = typename Matrix::DeviceType;
-    using IndexType = typename Matrix::IndexType;
-
-/*
- * Sets up the following 10x10 sparse matrix:
- *
- *    /  1  0  1  0  1  0  1  0  0  0  \
- *    |  1  1  1  0  0  0  0  0  0  0  |
- *    |  1  1  1  1  1  1  1  1  0  0  |
- *    |  1  1  0  0  0  0  0  0  0  0  |
- *    |  1  0  0  0  0  0  0  0  0  0  |
- *    |  1  0  0  0  0  0  0  0  0  0  |
- *    |  1  0  0  0  0  0  0  0  0  0  |
- *    |  1  0  0  0  0  0  0  0  0  0  |
- *    |  1  1  1  1  1  1  1  1  1  1  |
- *    \  1  1  1  1  1  1  1  1  1  1 /
- */
-
-    const IndexType rows = 10;
-    const IndexType cols = 10;
-
-    Matrix m( rows, cols );
-
-    typename Matrix::CompressedRowLengthsVector rowLengths;
-    rowLengths.setSize( rows );
-    rowLengths.setElement( 0, 4 );
-    rowLengths.setElement( 1, 3 );
-    rowLengths.setElement( 2, 8 );
-    rowLengths.setElement( 3, 2 );
-    for( IndexType i = 4; i < rows - 2; i++ )
-    {
-        rowLengths.setElement( i, 1 );
-    }
-    rowLengths.setElement( 8, 10 );
-    rowLengths.setElement( 9, 10 );
-    m.setCompressedRowLengths( rowLengths );
-
-    auto matrixView = m.getView();
-    auto f = [=] __cuda_callable__ ( const IndexType rowIdx ) mutable {
-       auto row = matrixView.getRow( rowIdx );
-       switch( rowIdx )
-       {
-          case 0:
-            for( IndexType i = 0; i < 4; i++ )
-               row.setElement( i, 2 * i, 1 );
-            break;
-         case 1:
-            for( IndexType i = 0; i < 3; i++ )
-               row.setElement( i, i, 1 );
-            break;
-         case 2:
-            for( IndexType i = 0; i < 8; i++ )
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /*
+    * Sets up the following 10x10 sparse matrix:
+    *
+    *    /  1  0  1  0  1  0  1  0  0  0  \
+    *    |  1  1  1  0  0  0  0  0  0  0  |
+    *    |  1  1  1  1  1  1  1  1  0  0  |
+    *    |  1  1  0  0  0  0  0  0  0  0  |
+    *    |  1  0  0  0  0  0  0  0  0  0  |
+    *    |  1  0  0  0  0  0  0  0  0  0  |
+    *    |  1  0  0  0  0  0  0  0  0  0  |
+    *    |  1  0  0  0  0  0  0  0  0  0  |
+    *    |  1  1  1  1  1  1  1  1  1  1  |
+    *    \  1  1  1  1  1  1  1  1  1  1 /
+    */
+
+   const IndexType rows = 10;
+   const IndexType cols = 10;
+
+   Matrix m( rows, cols );
+
+   typename Matrix::CompressedRowLengthsVector rowLengths;
+   rowLengths.setSize( rows );
+   rowLengths.setElement( 0, 4 );
+   rowLengths.setElement( 1, 3 );
+   rowLengths.setElement( 2, 8 );
+   rowLengths.setElement( 3, 2 );
+   for( IndexType i = 4; i < rows - 2; i++ )
+       rowLengths.setElement( i, 1 );
+
+   rowLengths.setElement( 8, 10 );
+   rowLengths.setElement( 9, 10 );
+   m.setCompressedRowLengths( rowLengths );
+
+   auto matrixView = m.getView();
+   auto f = [=] __cuda_callable__ ( const IndexType rowIdx ) mutable {
+      auto row = matrixView.getRow( rowIdx );
+      switch( rowIdx )
+      {
+         case 0:
+           for( IndexType i = 0; i < 4; i++ )
+              row.setElement( i, 2 * i, 1 );
+           break;
+        case 1:
+           for( IndexType i = 0; i < 3; i++ )
+              row.setElement( i, i, 1 );
+           break;
+        case 2:
+           for( IndexType i = 0; i < 8; i++ )
+              row.setElement( i, i, 1 );
+           break;
+        case 3:
+           for( IndexType i = 0; i < 2; i++ )
+              row.setElement( i, i, 1 );
+           break;
+        case 4:
+           row.setElement( 0, 0, 1 );
+           break;
+        case 5:
+           row.setElement( 0, 0, 1 );
+           break;
+        case 6:
+           row.setElement( 0, 0, 1 );
+           break;
+        case 7:
+           row.setElement( 0, 0, 1 );
+           break;
+        case 8:
+            for( IndexType i = 0; i < rows; i++ )
                row.setElement( i, i, 1 );
             break;
-         case 3:
-            for( IndexType i = 0; i < 2; i++ )
+        case 9:
+            for( IndexType i = 0; i < rows; i++ )
                row.setElement( i, i, 1 );
             break;
-         case 4:
-            row.setElement( 0, 0, 1 );
-            break;
-         case 5:
-            row.setElement( 0, 0, 1 );
-            break;
-         case 6:
-            row.setElement( 0, 0, 1 );
-            break;
-         case 7:
-            row.setElement( 0, 0, 1 );
-            break;
-         case 8:
-             for( IndexType i = 0; i < rows; i++ )
-                row.setElement( i, i, 1 );
-             break;
-         case 9:
-             for( IndexType i = 0; i < rows; i++ )
-                row.setElement( i, i, 1 );
-             break;
-       }
-    };
-    TNL::Algorithms::ParallelFor< DeviceType >::exec( ( IndexType ) 0, rows, f );
-
-    EXPECT_EQ( m.getElement( 0, 0 ), 1 );
-    EXPECT_EQ( m.getElement( 0, 1 ), 0 );
-    EXPECT_EQ( m.getElement( 0, 2 ), 1 );
-    EXPECT_EQ( m.getElement( 0, 3 ), 0 );
-    EXPECT_EQ( m.getElement( 0, 4 ), 1 );
-    EXPECT_EQ( m.getElement( 0, 5 ), 0 );
-    EXPECT_EQ( m.getElement( 0, 6 ), 1 );
-    EXPECT_EQ( m.getElement( 0, 7 ), 0 );
-    EXPECT_EQ( m.getElement( 0, 8 ), 0 );
-    EXPECT_EQ( m.getElement( 0, 9 ), 0 );
-
-    EXPECT_EQ( m.getElement( 1, 0 ), 1 );
-    EXPECT_EQ( m.getElement( 1, 1 ), 1 );
-    EXPECT_EQ( m.getElement( 1, 2 ), 1 );
-    EXPECT_EQ( m.getElement( 1, 3 ), 0 );
-    EXPECT_EQ( m.getElement( 1, 4 ), 0 );
-    EXPECT_EQ( m.getElement( 1, 5 ), 0 );
-    EXPECT_EQ( m.getElement( 1, 6 ), 0 );
-    EXPECT_EQ( m.getElement( 1, 7 ), 0 );
-    EXPECT_EQ( m.getElement( 1, 8 ), 0 );
-    EXPECT_EQ( m.getElement( 1, 9 ), 0 );
-
-    EXPECT_EQ( m.getElement( 2, 0 ), 1 );
-    EXPECT_EQ( m.getElement( 2, 1 ), 1 );
-    EXPECT_EQ( m.getElement( 2, 2 ), 1 );
-    EXPECT_EQ( m.getElement( 2, 3 ), 1 );
-    EXPECT_EQ( m.getElement( 2, 4 ), 1 );
-    EXPECT_EQ( m.getElement( 2, 5 ), 1 );
-    EXPECT_EQ( m.getElement( 2, 6 ), 1 );
-    EXPECT_EQ( m.getElement( 2, 7 ), 1 );
-    EXPECT_EQ( m.getElement( 2, 8 ), 0 );
-    EXPECT_EQ( m.getElement( 2, 9 ), 0 );
-
-    EXPECT_EQ( m.getElement( 3, 0 ), 1 );
-    EXPECT_EQ( m.getElement( 3, 1 ), 1 );
-    EXPECT_EQ( m.getElement( 3, 2 ), 0 );
-    EXPECT_EQ( m.getElement( 3, 3 ), 0 );
-    EXPECT_EQ( m.getElement( 3, 4 ), 0 );
-    EXPECT_EQ( m.getElement( 3, 5 ), 0 );
-    EXPECT_EQ( m.getElement( 3, 6 ), 0 );
-    EXPECT_EQ( m.getElement( 3, 7 ), 0 );
-    EXPECT_EQ( m.getElement( 3, 8 ), 0 );
-    EXPECT_EQ( m.getElement( 3, 9 ), 0 );
-
-    EXPECT_EQ( m.getElement( 4, 0 ), 1 );
-    EXPECT_EQ( m.getElement( 4, 1 ), 0 );
-    EXPECT_EQ( m.getElement( 4, 2 ), 0 );
-    EXPECT_EQ( m.getElement( 4, 3 ), 0 );
-    EXPECT_EQ( m.getElement( 4, 4 ), 0 );
-    EXPECT_EQ( m.getElement( 4, 5 ), 0 );
-    EXPECT_EQ( m.getElement( 4, 6 ), 0 );
-    EXPECT_EQ( m.getElement( 4, 7 ), 0 );
-    EXPECT_EQ( m.getElement( 4, 8 ), 0 );
-    EXPECT_EQ( m.getElement( 4, 9 ), 0 );
-
-    EXPECT_EQ( m.getElement( 5, 0 ), 1 );
-    EXPECT_EQ( m.getElement( 5, 1 ), 0 );
-    EXPECT_EQ( m.getElement( 5, 2 ), 0 );
-    EXPECT_EQ( m.getElement( 5, 3 ), 0 );
-    EXPECT_EQ( m.getElement( 5, 4 ), 0 );
-    EXPECT_EQ( m.getElement( 5, 5 ), 0 );
-    EXPECT_EQ( m.getElement( 5, 6 ), 0 );
-    EXPECT_EQ( m.getElement( 5, 7 ), 0 );
-    EXPECT_EQ( m.getElement( 5, 8 ), 0 );
-    EXPECT_EQ( m.getElement( 5, 9 ), 0 );
-
-    EXPECT_EQ( m.getElement( 6, 0 ), 1 );
-    EXPECT_EQ( m.getElement( 6, 1 ), 0 );
-    EXPECT_EQ( m.getElement( 6, 2 ), 0 );
-    EXPECT_EQ( m.getElement( 6, 3 ), 0 );
-    EXPECT_EQ( m.getElement( 6, 4 ), 0 );
-    EXPECT_EQ( m.getElement( 6, 5 ), 0 );
-    EXPECT_EQ( m.getElement( 6, 6 ), 0 );
-    EXPECT_EQ( m.getElement( 6, 7 ), 0 );
-    EXPECT_EQ( m.getElement( 6, 8 ), 0 );
-    EXPECT_EQ( m.getElement( 6, 9 ), 0 );
-
-    EXPECT_EQ( m.getElement( 7, 0 ), 1 );
-    EXPECT_EQ( m.getElement( 7, 1 ), 0 );
-    EXPECT_EQ( m.getElement( 7, 2 ), 0 );
-    EXPECT_EQ( m.getElement( 7, 3 ), 0 );
-    EXPECT_EQ( m.getElement( 7, 4 ), 0 );
-    EXPECT_EQ( m.getElement( 7, 5 ), 0 );
-    EXPECT_EQ( m.getElement( 7, 6 ), 0 );
-    EXPECT_EQ( m.getElement( 7, 7 ), 0 );
-    EXPECT_EQ( m.getElement( 7, 8 ), 0 );
-    EXPECT_EQ( m.getElement( 7, 9 ), 0 );
-
-    EXPECT_EQ( m.getElement( 8, 0 ), 1 );
-    EXPECT_EQ( m.getElement( 8, 1 ), 1 );
-    EXPECT_EQ( m.getElement( 8, 2 ), 1 );
-    EXPECT_EQ( m.getElement( 8, 3 ), 1 );
-    EXPECT_EQ( m.getElement( 8, 4 ), 1 );
-    EXPECT_EQ( m.getElement( 8, 5 ), 1 );
-    EXPECT_EQ( m.getElement( 8, 6 ), 1 );
-    EXPECT_EQ( m.getElement( 8, 7 ), 1 );
-    EXPECT_EQ( m.getElement( 8, 8 ), 1 );
-    EXPECT_EQ( m.getElement( 8, 9 ), 1 );
-
-    EXPECT_EQ( m.getElement( 9, 0 ), 1 );
-    EXPECT_EQ( m.getElement( 9, 1 ), 1 );
-    EXPECT_EQ( m.getElement( 9, 2 ), 1 );
-    EXPECT_EQ( m.getElement( 9, 3 ), 1 );
-    EXPECT_EQ( m.getElement( 9, 4 ), 1 );
-    EXPECT_EQ( m.getElement( 9, 5 ), 1 );
-    EXPECT_EQ( m.getElement( 9, 6 ), 1 );
-    EXPECT_EQ( m.getElement( 9, 7 ), 1 );
-    EXPECT_EQ( m.getElement( 9, 8 ), 1 );
-    EXPECT_EQ( m.getElement( 9, 9 ), 1 );
+      }
+   };
+   TNL::Algorithms::ParallelFor< DeviceType >::exec( ( IndexType ) 0, rows, f );
+
+   EXPECT_EQ( m.getElement( 0, 0 ), 1 );
+   EXPECT_EQ( m.getElement( 0, 1 ), 0 );
+   EXPECT_EQ( m.getElement( 0, 2 ), 1 );
+   EXPECT_EQ( m.getElement( 0, 3 ), 0 );
+   EXPECT_EQ( m.getElement( 0, 4 ), 1 );
+   EXPECT_EQ( m.getElement( 0, 5 ), 0 );
+   EXPECT_EQ( m.getElement( 0, 6 ), 1 );
+   EXPECT_EQ( m.getElement( 0, 7 ), 0 );
+   EXPECT_EQ( m.getElement( 0, 8 ), 0 );
+   EXPECT_EQ( m.getElement( 0, 9 ), 0 );
+
+   EXPECT_EQ( m.getElement( 1, 0 ), 1 );
+   EXPECT_EQ( m.getElement( 1, 1 ), 1 );
+   EXPECT_EQ( m.getElement( 1, 2 ), 1 );
+   EXPECT_EQ( m.getElement( 1, 3 ), 0 );
+   EXPECT_EQ( m.getElement( 1, 4 ), 0 );
+   EXPECT_EQ( m.getElement( 1, 5 ), 0 );
+   EXPECT_EQ( m.getElement( 1, 6 ), 0 );
+   EXPECT_EQ( m.getElement( 1, 7 ), 0 );
+   EXPECT_EQ( m.getElement( 1, 8 ), 0 );
+   EXPECT_EQ( m.getElement( 1, 9 ), 0 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ), 1 );
+   EXPECT_EQ( m.getElement( 2, 1 ), 1 );
+   EXPECT_EQ( m.getElement( 2, 2 ), 1 );
+   EXPECT_EQ( m.getElement( 2, 3 ), 1 );
+   EXPECT_EQ( m.getElement( 2, 4 ), 1 );
+   EXPECT_EQ( m.getElement( 2, 5 ), 1 );
+   EXPECT_EQ( m.getElement( 2, 6 ), 1 );
+   EXPECT_EQ( m.getElement( 2, 7 ), 1 );
+   EXPECT_EQ( m.getElement( 2, 8 ), 0 );
+   EXPECT_EQ( m.getElement( 2, 9 ), 0 );
+
+   EXPECT_EQ( m.getElement( 3, 0 ), 1 );
+   EXPECT_EQ( m.getElement( 3, 1 ), 1 );
+   EXPECT_EQ( m.getElement( 3, 2 ), 0 );
+   EXPECT_EQ( m.getElement( 3, 3 ), 0 );
+   EXPECT_EQ( m.getElement( 3, 4 ), 0 );
+   EXPECT_EQ( m.getElement( 3, 5 ), 0 );
+   EXPECT_EQ( m.getElement( 3, 6 ), 0 );
+   EXPECT_EQ( m.getElement( 3, 7 ), 0 );
+   EXPECT_EQ( m.getElement( 3, 8 ), 0 );
+   EXPECT_EQ( m.getElement( 3, 9 ), 0 );
+
+   EXPECT_EQ( m.getElement( 4, 0 ), 1 );
+   EXPECT_EQ( m.getElement( 4, 1 ), 0 );
+   EXPECT_EQ( m.getElement( 4, 2 ), 0 );
+   EXPECT_EQ( m.getElement( 4, 3 ), 0 );
+   EXPECT_EQ( m.getElement( 4, 4 ), 0 );
+   EXPECT_EQ( m.getElement( 4, 5 ), 0 );
+   EXPECT_EQ( m.getElement( 4, 6 ), 0 );
+   EXPECT_EQ( m.getElement( 4, 7 ), 0 );
+   EXPECT_EQ( m.getElement( 4, 8 ), 0 );
+   EXPECT_EQ( m.getElement( 4, 9 ), 0 );
+
+   EXPECT_EQ( m.getElement( 5, 0 ), 1 );
+   EXPECT_EQ( m.getElement( 5, 1 ), 0 );
+   EXPECT_EQ( m.getElement( 5, 2 ), 0 );
+   EXPECT_EQ( m.getElement( 5, 3 ), 0 );
+   EXPECT_EQ( m.getElement( 5, 4 ), 0 );
+   EXPECT_EQ( m.getElement( 5, 5 ), 0 );
+   EXPECT_EQ( m.getElement( 5, 6 ), 0 );
+   EXPECT_EQ( m.getElement( 5, 7 ), 0 );
+   EXPECT_EQ( m.getElement( 5, 8 ), 0 );
+   EXPECT_EQ( m.getElement( 5, 9 ), 0 );
+
+   EXPECT_EQ( m.getElement( 6, 0 ), 1 );
+   EXPECT_EQ( m.getElement( 6, 1 ), 0 );
+   EXPECT_EQ( m.getElement( 6, 2 ), 0 );
+   EXPECT_EQ( m.getElement( 6, 3 ), 0 );
+   EXPECT_EQ( m.getElement( 6, 4 ), 0 );
+   EXPECT_EQ( m.getElement( 6, 5 ), 0 );
+   EXPECT_EQ( m.getElement( 6, 6 ), 0 );
+   EXPECT_EQ( m.getElement( 6, 7 ), 0 );
+   EXPECT_EQ( m.getElement( 6, 8 ), 0 );
+   EXPECT_EQ( m.getElement( 6, 9 ), 0 );
+
+   EXPECT_EQ( m.getElement( 7, 0 ), 1 );
+   EXPECT_EQ( m.getElement( 7, 1 ), 0 );
+   EXPECT_EQ( m.getElement( 7, 2 ), 0 );
+   EXPECT_EQ( m.getElement( 7, 3 ), 0 );
+   EXPECT_EQ( m.getElement( 7, 4 ), 0 );
+   EXPECT_EQ( m.getElement( 7, 5 ), 0 );
+   EXPECT_EQ( m.getElement( 7, 6 ), 0 );
+   EXPECT_EQ( m.getElement( 7, 7 ), 0 );
+   EXPECT_EQ( m.getElement( 7, 8 ), 0 );
+   EXPECT_EQ( m.getElement( 7, 9 ), 0 );
+
+   EXPECT_EQ( m.getElement( 8, 0 ), 1 );
+   EXPECT_EQ( m.getElement( 8, 1 ), 1 );
+   EXPECT_EQ( m.getElement( 8, 2 ), 1 );
+   EXPECT_EQ( m.getElement( 8, 3 ), 1 );
+   EXPECT_EQ( m.getElement( 8, 4 ), 1 );
+   EXPECT_EQ( m.getElement( 8, 5 ), 1 );
+   EXPECT_EQ( m.getElement( 8, 6 ), 1 );
+   EXPECT_EQ( m.getElement( 8, 7 ), 1 );
+   EXPECT_EQ( m.getElement( 8, 8 ), 1 );
+   EXPECT_EQ( m.getElement( 8, 9 ), 1 );
+
+   EXPECT_EQ( m.getElement( 9, 0 ), 1 );
+   EXPECT_EQ( m.getElement( 9, 1 ), 1 );
+   EXPECT_EQ( m.getElement( 9, 2 ), 1 );
+   EXPECT_EQ( m.getElement( 9, 3 ), 1 );
+   EXPECT_EQ( m.getElement( 9, 4 ), 1 );
+   EXPECT_EQ( m.getElement( 9, 5 ), 1 );
+   EXPECT_EQ( m.getElement( 9, 6 ), 1 );
+   EXPECT_EQ( m.getElement( 9, 7 ), 1 );
+   EXPECT_EQ( m.getElement( 9, 8 ), 1 );
+   EXPECT_EQ( m.getElement( 9, 9 ), 1 );
 }
 
 
 template< typename Matrix >
 void test_SetElement()
 {
-    using RealType = typename Matrix::RealType;
-    using DeviceType = typename Matrix::DeviceType;
-    using IndexType = typename Matrix::IndexType;
-
-/*
- * Sets up the following 10x10 sparse matrix:
- *
- *    /  1  0  1  0  1  0  1  0  0  0  \
- *    |  1  1  1  0  0  0  0  0  0  0  |
- *    |  1  1  1  1  1  1  1  1  0  0  |
- *    |  1  1  0  0  0  0  0  0  0  0  |
- *    |  1  0  0  0  0  0  0  0  0  0  |
- *    |  1  0  0  0  0  0  0  0  0  0  |
- *    |  1  0  0  0  0  0  0  0  0  0  |
- *    |  1  0  0  0  0  0  0  0  0  0  |
- *    |  1  1  1  1  1  1  1  1  1  1  |
- *    \  1  1  1  1  1  1  1  1  1  1 /
- */
-
-    const IndexType rows = 10;
-    const IndexType cols = 10;
-
-    Matrix m;
-    m.reset();
-
-    m.setDimensions( rows, cols );
-
-    typename Matrix::CompressedRowLengthsVector rowLengths;
-    rowLengths.setSize( rows );
-    rowLengths.setElement( 0, 4 );
-    rowLengths.setElement( 1, 3 );
-    rowLengths.setElement( 2, 8 );
-    rowLengths.setElement( 3, 2 );
-    for( IndexType i = 4; i < rows - 2; i++ )
-    {
-        rowLengths.setElement( i, 1 );
-    }
-    rowLengths.setElement( 8, 10 );
-    rowLengths.setElement( 9, 10 );
-    m.setCompressedRowLengths( rowLengths );
-
-    for( IndexType i = 0; i < 4; i++ )
-        m.setElement( 0, 2 * i, 1 );
-
-    for( IndexType i = 0; i < 3; i++ )
-        m.setElement( 1, i, 1 );
-
-    for( IndexType i = 0; i < 8; i++ )
-        m.setElement( 2, i, 1 );
-
-    for( IndexType i = 0; i < 2; i++ )
-        m.setElement( 3, i, 1 );
-
-    for( IndexType i = 4; i < 8; i++ )
-        m.setElement( i, 0, 1 );
-
-    for( IndexType j = 8; j < rows; j++)
-    {
-        for( IndexType i = 0; i < cols; i++ )
-            m.setElement( j, i, 1 );
-    }
-
-    EXPECT_EQ( m.getElement( 0, 0 ), 1 );
-    EXPECT_EQ( m.getElement( 0, 1 ), 0 );
-    EXPECT_EQ( m.getElement( 0, 2 ), 1 );
-    EXPECT_EQ( m.getElement( 0, 3 ), 0 );
-    EXPECT_EQ( m.getElement( 0, 4 ), 1 );
-    EXPECT_EQ( m.getElement( 0, 5 ), 0 );
-    EXPECT_EQ( m.getElement( 0, 6 ), 1 );
-    EXPECT_EQ( m.getElement( 0, 7 ), 0 );
-    EXPECT_EQ( m.getElement( 0, 8 ), 0 );
-    EXPECT_EQ( m.getElement( 0, 9 ), 0 );
-
-    EXPECT_EQ( m.getElement( 1, 0 ), 1 );
-    EXPECT_EQ( m.getElement( 1, 1 ), 1 );
-    EXPECT_EQ( m.getElement( 1, 2 ), 1 );
-    EXPECT_EQ( m.getElement( 1, 3 ), 0 );
-    EXPECT_EQ( m.getElement( 1, 4 ), 0 );
-    EXPECT_EQ( m.getElement( 1, 5 ), 0 );
-    EXPECT_EQ( m.getElement( 1, 6 ), 0 );
-    EXPECT_EQ( m.getElement( 1, 7 ), 0 );
-    EXPECT_EQ( m.getElement( 1, 8 ), 0 );
-    EXPECT_EQ( m.getElement( 1, 9 ), 0 );
-
-    EXPECT_EQ( m.getElement( 2, 0 ), 1 );
-    EXPECT_EQ( m.getElement( 2, 1 ), 1 );
-    EXPECT_EQ( m.getElement( 2, 2 ), 1 );
-    EXPECT_EQ( m.getElement( 2, 3 ), 1 );
-    EXPECT_EQ( m.getElement( 2, 4 ), 1 );
-    EXPECT_EQ( m.getElement( 2, 5 ), 1 );
-    EXPECT_EQ( m.getElement( 2, 6 ), 1 );
-    EXPECT_EQ( m.getElement( 2, 7 ), 1 );
-    EXPECT_EQ( m.getElement( 2, 8 ), 0 );
-    EXPECT_EQ( m.getElement( 2, 9 ), 0 );
-
-    EXPECT_EQ( m.getElement( 3, 0 ), 1 );
-    EXPECT_EQ( m.getElement( 3, 1 ), 1 );
-    EXPECT_EQ( m.getElement( 3, 2 ), 0 );
-    EXPECT_EQ( m.getElement( 3, 3 ), 0 );
-    EXPECT_EQ( m.getElement( 3, 4 ), 0 );
-    EXPECT_EQ( m.getElement( 3, 5 ), 0 );
-    EXPECT_EQ( m.getElement( 3, 6 ), 0 );
-    EXPECT_EQ( m.getElement( 3, 7 ), 0 );
-    EXPECT_EQ( m.getElement( 3, 8 ), 0 );
-    EXPECT_EQ( m.getElement( 3, 9 ), 0 );
-
-    EXPECT_EQ( m.getElement( 4, 0 ), 1 );
-    EXPECT_EQ( m.getElement( 4, 1 ), 0 );
-    EXPECT_EQ( m.getElement( 4, 2 ), 0 );
-    EXPECT_EQ( m.getElement( 4, 3 ), 0 );
-    EXPECT_EQ( m.getElement( 4, 4 ), 0 );
-    EXPECT_EQ( m.getElement( 4, 5 ), 0 );
-    EXPECT_EQ( m.getElement( 4, 6 ), 0 );
-    EXPECT_EQ( m.getElement( 4, 7 ), 0 );
-    EXPECT_EQ( m.getElement( 4, 8 ), 0 );
-    EXPECT_EQ( m.getElement( 4, 9 ), 0 );
-
-    EXPECT_EQ( m.getElement( 5, 0 ), 1 );
-    EXPECT_EQ( m.getElement( 5, 1 ), 0 );
-    EXPECT_EQ( m.getElement( 5, 2 ), 0 );
-    EXPECT_EQ( m.getElement( 5, 3 ), 0 );
-    EXPECT_EQ( m.getElement( 5, 4 ), 0 );
-    EXPECT_EQ( m.getElement( 5, 5 ), 0 );
-    EXPECT_EQ( m.getElement( 5, 6 ), 0 );
-    EXPECT_EQ( m.getElement( 5, 7 ), 0 );
-    EXPECT_EQ( m.getElement( 5, 8 ), 0 );
-    EXPECT_EQ( m.getElement( 5, 9 ), 0 );
-
-    EXPECT_EQ( m.getElement( 6, 0 ), 1 );
-    EXPECT_EQ( m.getElement( 6, 1 ), 0 );
-    EXPECT_EQ( m.getElement( 6, 2 ), 0 );
-    EXPECT_EQ( m.getElement( 6, 3 ), 0 );
-    EXPECT_EQ( m.getElement( 6, 4 ), 0 );
-    EXPECT_EQ( m.getElement( 6, 5 ), 0 );
-    EXPECT_EQ( m.getElement( 6, 6 ), 0 );
-    EXPECT_EQ( m.getElement( 6, 7 ), 0 );
-    EXPECT_EQ( m.getElement( 6, 8 ), 0 );
-    EXPECT_EQ( m.getElement( 6, 9 ), 0 );
-
-    EXPECT_EQ( m.getElement( 7, 0 ), 1 );
-    EXPECT_EQ( m.getElement( 7, 1 ), 0 );
-    EXPECT_EQ( m.getElement( 7, 2 ), 0 );
-    EXPECT_EQ( m.getElement( 7, 3 ), 0 );
-    EXPECT_EQ( m.getElement( 7, 4 ), 0 );
-    EXPECT_EQ( m.getElement( 7, 5 ), 0 );
-    EXPECT_EQ( m.getElement( 7, 6 ), 0 );
-    EXPECT_EQ( m.getElement( 7, 7 ), 0 );
-    EXPECT_EQ( m.getElement( 7, 8 ), 0 );
-    EXPECT_EQ( m.getElement( 7, 9 ), 0 );
-
-    EXPECT_EQ( m.getElement( 8, 0 ), 1 );
-    EXPECT_EQ( m.getElement( 8, 1 ), 1 );
-    EXPECT_EQ( m.getElement( 8, 2 ), 1 );
-    EXPECT_EQ( m.getElement( 8, 3 ), 1 );
-    EXPECT_EQ( m.getElement( 8, 4 ), 1 );
-    EXPECT_EQ( m.getElement( 8, 5 ), 1 );
-    EXPECT_EQ( m.getElement( 8, 6 ), 1 );
-    EXPECT_EQ( m.getElement( 8, 7 ), 1 );
-    EXPECT_EQ( m.getElement( 8, 8 ), 1 );
-    EXPECT_EQ( m.getElement( 8, 9 ), 1 );
-
-    EXPECT_EQ( m.getElement( 9, 0 ), 1 );
-    EXPECT_EQ( m.getElement( 9, 1 ), 1 );
-    EXPECT_EQ( m.getElement( 9, 2 ), 1 );
-    EXPECT_EQ( m.getElement( 9, 3 ), 1 );
-    EXPECT_EQ( m.getElement( 9, 4 ), 1 );
-    EXPECT_EQ( m.getElement( 9, 5 ), 1 );
-    EXPECT_EQ( m.getElement( 9, 6 ), 1 );
-    EXPECT_EQ( m.getElement( 9, 7 ), 1 );
-    EXPECT_EQ( m.getElement( 9, 8 ), 1 );
-    EXPECT_EQ( m.getElement( 9, 9 ), 1 );
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /*
+    * Sets up the following 10x10 sparse matrix:
+    *
+    *    /  1  0  1  0  1  0  1  0  0  0  \
+    *    |  1  1  1  0  0  0  0  0  0  0  |
+    *    |  1  1  1  1  1  1  1  1  0  0  |
+    *    |  1  1  0  0  0  0  0  0  0  0  |
+    *    |  1  0  0  0  0  0  0  0  0  0  |
+    *    |  1  0  0  0  0  0  0  0  0  0  |
+    *    |  1  0  0  0  0  0  0  0  0  0  |
+    *    |  1  0  0  0  0  0  0  0  0  0  |
+    *    |  1  1  1  1  1  1  1  1  1  1  |
+    *    \  1  1  1  1  1  1  1  1  1  1 /
+    */
+
+   const IndexType rows = 10;
+   const IndexType cols = 10;
+
+   Matrix m( rows, cols );
+
+   typename Matrix::CompressedRowLengthsVector rowLengths;
+   rowLengths.setSize( rows );
+   rowLengths.setElement( 0, 4 );
+   rowLengths.setElement( 1, 3 );
+   rowLengths.setElement( 2, 8 );
+   rowLengths.setElement( 3, 2 );
+   for( IndexType i = 4; i < rows - 2; i++ )
+       rowLengths.setElement( i, 1 );
+
+   rowLengths.setElement( 8, 10 );
+   rowLengths.setElement( 9, 10 );
+   m.setCompressedRowLengths( rowLengths );
+
+   for( IndexType i = 0; i < 4; i++ )
+       m.setElement( 0, 2 * i, 1 );
+
+   for( IndexType i = 0; i < 3; i++ )
+       m.setElement( 1, i, 1 );
+
+   for( IndexType i = 0; i < 8; i++ )
+       m.setElement( 2, i, 1 );
+
+   for( IndexType i = 0; i < 2; i++ )
+       m.setElement( 3, i, 1 );
+
+   for( IndexType i = 4; i < 8; i++ )
+       m.setElement( i, 0, 1 );
+
+   for( IndexType j = 8; j < rows; j++)
+       for( IndexType i = 0; i < cols; i++ )
+           m.setElement( j, i, 1 );
+
+   EXPECT_EQ( m.getElement( 0, 0 ), 1 );
+   EXPECT_EQ( m.getElement( 0, 1 ), 0 );
+   EXPECT_EQ( m.getElement( 0, 2 ), 1 );
+   EXPECT_EQ( m.getElement( 0, 3 ), 0 );
+   EXPECT_EQ( m.getElement( 0, 4 ), 1 );
+   EXPECT_EQ( m.getElement( 0, 5 ), 0 );
+   EXPECT_EQ( m.getElement( 0, 6 ), 1 );
+   EXPECT_EQ( m.getElement( 0, 7 ), 0 );
+   EXPECT_EQ( m.getElement( 0, 8 ), 0 );
+   EXPECT_EQ( m.getElement( 0, 9 ), 0 );
+
+   EXPECT_EQ( m.getElement( 1, 0 ), 1 );
+   EXPECT_EQ( m.getElement( 1, 1 ), 1 );
+   EXPECT_EQ( m.getElement( 1, 2 ), 1 );
+   EXPECT_EQ( m.getElement( 1, 3 ), 0 );
+   EXPECT_EQ( m.getElement( 1, 4 ), 0 );
+   EXPECT_EQ( m.getElement( 1, 5 ), 0 );
+   EXPECT_EQ( m.getElement( 1, 6 ), 0 );
+   EXPECT_EQ( m.getElement( 1, 7 ), 0 );
+   EXPECT_EQ( m.getElement( 1, 8 ), 0 );
+   EXPECT_EQ( m.getElement( 1, 9 ), 0 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ), 1 );
+   EXPECT_EQ( m.getElement( 2, 1 ), 1 );
+   EXPECT_EQ( m.getElement( 2, 2 ), 1 );
+   EXPECT_EQ( m.getElement( 2, 3 ), 1 );
+   EXPECT_EQ( m.getElement( 2, 4 ), 1 );
+   EXPECT_EQ( m.getElement( 2, 5 ), 1 );
+   EXPECT_EQ( m.getElement( 2, 6 ), 1 );
+   EXPECT_EQ( m.getElement( 2, 7 ), 1 );
+   EXPECT_EQ( m.getElement( 2, 8 ), 0 );
+   EXPECT_EQ( m.getElement( 2, 9 ), 0 );
+
+   EXPECT_EQ( m.getElement( 3, 0 ), 1 );
+   EXPECT_EQ( m.getElement( 3, 1 ), 1 );
+   EXPECT_EQ( m.getElement( 3, 2 ), 0 );
+   EXPECT_EQ( m.getElement( 3, 3 ), 0 );
+   EXPECT_EQ( m.getElement( 3, 4 ), 0 );
+   EXPECT_EQ( m.getElement( 3, 5 ), 0 );
+   EXPECT_EQ( m.getElement( 3, 6 ), 0 );
+   EXPECT_EQ( m.getElement( 3, 7 ), 0 );
+   EXPECT_EQ( m.getElement( 3, 8 ), 0 );
+   EXPECT_EQ( m.getElement( 3, 9 ), 0 );
+
+   EXPECT_EQ( m.getElement( 4, 0 ), 1 );
+   EXPECT_EQ( m.getElement( 4, 1 ), 0 );
+   EXPECT_EQ( m.getElement( 4, 2 ), 0 );
+   EXPECT_EQ( m.getElement( 4, 3 ), 0 );
+   EXPECT_EQ( m.getElement( 4, 4 ), 0 );
+   EXPECT_EQ( m.getElement( 4, 5 ), 0 );
+   EXPECT_EQ( m.getElement( 4, 6 ), 0 );
+   EXPECT_EQ( m.getElement( 4, 7 ), 0 );
+   EXPECT_EQ( m.getElement( 4, 8 ), 0 );
+   EXPECT_EQ( m.getElement( 4, 9 ), 0 );
+
+   EXPECT_EQ( m.getElement( 5, 0 ), 1 );
+   EXPECT_EQ( m.getElement( 5, 1 ), 0 );
+   EXPECT_EQ( m.getElement( 5, 2 ), 0 );
+   EXPECT_EQ( m.getElement( 5, 3 ), 0 );
+   EXPECT_EQ( m.getElement( 5, 4 ), 0 );
+   EXPECT_EQ( m.getElement( 5, 5 ), 0 );
+   EXPECT_EQ( m.getElement( 5, 6 ), 0 );
+   EXPECT_EQ( m.getElement( 5, 7 ), 0 );
+   EXPECT_EQ( m.getElement( 5, 8 ), 0 );
+   EXPECT_EQ( m.getElement( 5, 9 ), 0 );
+
+   EXPECT_EQ( m.getElement( 6, 0 ), 1 );
+   EXPECT_EQ( m.getElement( 6, 1 ), 0 );
+   EXPECT_EQ( m.getElement( 6, 2 ), 0 );
+   EXPECT_EQ( m.getElement( 6, 3 ), 0 );
+   EXPECT_EQ( m.getElement( 6, 4 ), 0 );
+   EXPECT_EQ( m.getElement( 6, 5 ), 0 );
+   EXPECT_EQ( m.getElement( 6, 6 ), 0 );
+   EXPECT_EQ( m.getElement( 6, 7 ), 0 );
+   EXPECT_EQ( m.getElement( 6, 8 ), 0 );
+   EXPECT_EQ( m.getElement( 6, 9 ), 0 );
+
+   EXPECT_EQ( m.getElement( 7, 0 ), 1 );
+   EXPECT_EQ( m.getElement( 7, 1 ), 0 );
+   EXPECT_EQ( m.getElement( 7, 2 ), 0 );
+   EXPECT_EQ( m.getElement( 7, 3 ), 0 );
+   EXPECT_EQ( m.getElement( 7, 4 ), 0 );
+   EXPECT_EQ( m.getElement( 7, 5 ), 0 );
+   EXPECT_EQ( m.getElement( 7, 6 ), 0 );
+   EXPECT_EQ( m.getElement( 7, 7 ), 0 );
+   EXPECT_EQ( m.getElement( 7, 8 ), 0 );
+   EXPECT_EQ( m.getElement( 7, 9 ), 0 );
+
+   EXPECT_EQ( m.getElement( 8, 0 ), 1 );
+   EXPECT_EQ( m.getElement( 8, 1 ), 1 );
+   EXPECT_EQ( m.getElement( 8, 2 ), 1 );
+   EXPECT_EQ( m.getElement( 8, 3 ), 1 );
+   EXPECT_EQ( m.getElement( 8, 4 ), 1 );
+   EXPECT_EQ( m.getElement( 8, 5 ), 1 );
+   EXPECT_EQ( m.getElement( 8, 6 ), 1 );
+   EXPECT_EQ( m.getElement( 8, 7 ), 1 );
+   EXPECT_EQ( m.getElement( 8, 8 ), 1 );
+   EXPECT_EQ( m.getElement( 8, 9 ), 1 );
+
+   EXPECT_EQ( m.getElement( 9, 0 ), 1 );
+   EXPECT_EQ( m.getElement( 9, 1 ), 1 );
+   EXPECT_EQ( m.getElement( 9, 2 ), 1 );
+   EXPECT_EQ( m.getElement( 9, 3 ), 1 );
+   EXPECT_EQ( m.getElement( 9, 4 ), 1 );
+   EXPECT_EQ( m.getElement( 9, 5 ), 1 );
+   EXPECT_EQ( m.getElement( 9, 6 ), 1 );
+   EXPECT_EQ( m.getElement( 9, 7 ), 1 );
+   EXPECT_EQ( m.getElement( 9, 8 ), 1 );
+   EXPECT_EQ( m.getElement( 9, 9 ), 1 );
 }
 
 template< typename Matrix >
@@ -648,7 +623,7 @@ void test_VectorProduct()
 
    m_1.setElement( 1, 1, 1 );      // 1st row
    m_1.setElement( 1, 3, 1 );
-   
+
    m_1.setElement( 2, 1, 1 );      // 2nd row
 
    m_1.setElement( 3, 2, 1 );      // 3rd row
@@ -690,12 +665,12 @@ void test_VectorProduct()
    for( IndexType i = 0; i < 3; i++ )   // 0th row
       m_2.setElement( 0, i, 1 );
 
-   m_2.setElement( 1, 3, 1 );      // 1st row
+   m_2.setElement( 1, 3, 1 );           // 1st row
 
    for( IndexType i = 0; i < 3; i++ )   // 2nd row
       m_2.setElement( 2, i, 1 );
 
-   for( IndexType i = 1; i < 2; i++ )       // 3rd row
+   for( IndexType i = 1; i < 2; i++ )   // 3rd row
       m_2.setElement( 3, i, 1 );
 
    VectorType inVector_2( m_cols_2 );
@@ -706,13 +681,11 @@ void test_VectorProduct()
 
    m_2.vectorProduct( inVector_2, outVector_2 );
 
-
    EXPECT_EQ( outVector_2.getElement( 0 ), 6 );
    EXPECT_EQ( outVector_2.getElement( 1 ), 2 );
    EXPECT_EQ( outVector_2.getElement( 2 ), 6 );
    EXPECT_EQ( outVector_2.getElement( 3 ), 2 );
 
-
    /*
     * Sets up the following 4x4 sparse matrix:
     *
@@ -943,32 +916,32 @@ void test_RowsReduction()
    for( IndexType i = 0; i < 3; i++ )   // 0th row
       m.setElement( 0, i, 1 );
 
-   m.setElement( 0, 4, 1 );           // 0th row
+   m.setElement( 0, 4, 1 );             // 0th row
    m.setElement( 0, 5, 1 );
 
-   m.setElement( 1, 1, 1 );           // 1st row
+   m.setElement( 1, 1, 1 );             // 1st row
    m.setElement( 1, 3, 1 );
 
-   for( IndexType i = 1; i < 3; i++ )            // 2nd row
+   for( IndexType i = 1; i < 3; i++ )   // 2nd row
       m.setElement( 2, i, 1 );
 
-   m.setElement( 2, 4, 1 );           // 2nd row
+   m.setElement( 2, 4, 1 );             // 2nd row
 
-   for( IndexType i = 1; i < 5; i++ )            // 3rd row
+   for( IndexType i = 1; i < 5; i++ )   // 3rd row
       m.setElement( 3, i, 1 );
 
-   m.setElement( 4, 1, 1 );           // 4th row
+   m.setElement( 4, 1, 1 );             // 4th row
 
-   for( IndexType i = 1; i < 7; i++ )            // 5th row
+   for( IndexType i = 1; i < 7; i++ )   // 5th row
       m.setElement( 5, i, 1 );
 
-   for( IndexType i = 0; i < 7; i++ )            // 6th row
+   for( IndexType i = 0; i < 7; i++ )   // 6th row
       m.setElement( 6, i, 1 );
 
-   for( IndexType i = 0; i < 8; i++ )            // 7th row
+   for( IndexType i = 0; i < 8; i++ )   // 7th row
        m.setElement( 7, i, 1 );
 
-   for( IndexType i = 0; i < 7; i++ )            // 1s at the end of rows
+   for( IndexType i = 0; i < 7; i++ )   // 1s at the end of rows
       m.setElement( i, 7, 1);
 
    ////
@@ -1085,268 +1058,6 @@ void test_PerformSORIteration()
    EXPECT_EQ( xVector[ 3 ], 0.25 );
 }
 
-// This test is only for AdEllpack
-template< typename Matrix >
-void test_OperatorEquals()
-{
-   using RealType = typename Matrix::RealType;
-   using DeviceType = typename Matrix::DeviceType;
-   using IndexType = typename Matrix::IndexType;
-
-   if( std::is_same< DeviceType, TNL::Devices::Cuda >::value )
-      return;
-   else
-   {
-      using AdELL_host = TNL::Matrices::AdEllpack< RealType, TNL::Devices::Host, IndexType >;
-      using AdELL_cuda = TNL::Matrices::AdEllpack< RealType, TNL::Devices::Cuda, IndexType >;
-
-      /*
-       * Sets up the following 8x8 sparse matrix:
-       *
-       *    /  1  2  3  0  4  5  0  1 \   6
-       *    |  0  6  0  7  0  0  0  1 |   3
-       *    |  0  8  9  0 10  0  0  1 |   4
-       *    |  0 11 12 13 14  0  0  1 |   5
-       *    |  0 15  0  0  0  0  0  1 |   2
-       *    |  0 16 17 18 19 20 21  1 |   7
-       *    | 22 23 24 25 26 27 28  1 |   8
-       *    \ 29 30 31 32 33 34 35 36 /   8
-       */
-
-      const IndexType m_rows = 8;
-      const IndexType m_cols = 8;
-
-      AdELL_host m_host;
-
-      m_host.reset();
-      m_host.setDimensions( m_rows, m_cols );
-      typename AdELL_host::CompressedRowLengthsVector rowLengths;
-      rowLengths.setSize( m_rows );
-      rowLengths.setElement(0, 6);
-      rowLengths.setElement(1, 3);
-      rowLengths.setElement(2, 4);
-      rowLengths.setElement(3, 5);
-      rowLengths.setElement(4, 2);
-      rowLengths.setElement(5, 7);
-      rowLengths.setElement(6, 8);
-      rowLengths.setElement(7, 8);
-      m_host.setCompressedRowLengths( rowLengths );
-
-      RealType value = 1;
-      for( IndexType i = 0; i < 3; i++ )   // 0th row
-          m_host.setElement( 0, i, value++ );
-
-      m_host.setElement( 0, 4, value++ );           // 0th row
-      m_host.setElement( 0, 5, value++ );
-
-      m_host.setElement( 1, 1, value++ );           // 1st row
-      m_host.setElement( 1, 3, value++ );
-
-      for( IndexType i = 1; i < 3; i++ )            // 2nd row
-          m_host.setElement( 2, i, value++ );
-
-      m_host.setElement( 2, 4, value++ );           // 2nd row
-
-
-      for( IndexType i = 1; i < 5; i++ )            // 3rd row
-          m_host.setElement( 3, i, value++ );
-
-      m_host.setElement( 4, 1, value++ );           // 4th row
-
-      for( IndexType i = 1; i < 7; i++ )            // 5th row
-          m_host.setElement( 5, i, value++ );
-
-      for( IndexType i = 0; i < 7; i++ )            // 6th row
-          m_host.setElement( 6, i, value++ );
-
-      for( IndexType i = 0; i < 8; i++ )            // 7th row
-          m_host.setElement( 7, i, value++ );
-
-      for( IndexType i = 0; i < 7; i++ )            // 1s at the end or rows: 5, 6
-          m_host.setElement( i, 7, 1);
-
-      EXPECT_EQ( m_host.getElement( 0, 0 ),  1 );
-      EXPECT_EQ( m_host.getElement( 0, 1 ),  2 );
-      EXPECT_EQ( m_host.getElement( 0, 2 ),  3 );
-      EXPECT_EQ( m_host.getElement( 0, 3 ),  0 );
-      EXPECT_EQ( m_host.getElement( 0, 4 ),  4 );
-      EXPECT_EQ( m_host.getElement( 0, 5 ),  5 );
-      EXPECT_EQ( m_host.getElement( 0, 6 ),  0 );
-      EXPECT_EQ( m_host.getElement( 0, 7 ),  1 );
-
-      EXPECT_EQ( m_host.getElement( 1, 0 ),  0 );
-      EXPECT_EQ( m_host.getElement( 1, 1 ),  6 );
-      EXPECT_EQ( m_host.getElement( 1, 2 ),  0 );
-      EXPECT_EQ( m_host.getElement( 1, 3 ),  7 );
-      EXPECT_EQ( m_host.getElement( 1, 4 ),  0 );
-      EXPECT_EQ( m_host.getElement( 1, 5 ),  0 );
-      EXPECT_EQ( m_host.getElement( 1, 6 ),  0 );
-      EXPECT_EQ( m_host.getElement( 1, 7 ),  1 );
-
-      EXPECT_EQ( m_host.getElement( 2, 0 ),  0 );
-      EXPECT_EQ( m_host.getElement( 2, 1 ),  8 );
-      EXPECT_EQ( m_host.getElement( 2, 2 ),  9 );
-      EXPECT_EQ( m_host.getElement( 2, 3 ),  0 );
-      EXPECT_EQ( m_host.getElement( 2, 4 ), 10 );
-      EXPECT_EQ( m_host.getElement( 2, 5 ),  0 );
-      EXPECT_EQ( m_host.getElement( 2, 6 ),  0 );
-      EXPECT_EQ( m_host.getElement( 2, 7 ),  1 );
-
-      EXPECT_EQ( m_host.getElement( 3, 0 ),  0 );
-      EXPECT_EQ( m_host.getElement( 3, 1 ), 11 );
-      EXPECT_EQ( m_host.getElement( 3, 2 ), 12 );
-      EXPECT_EQ( m_host.getElement( 3, 3 ), 13 );
-      EXPECT_EQ( m_host.getElement( 3, 4 ), 14 );
-      EXPECT_EQ( m_host.getElement( 3, 5 ),  0 );
-      EXPECT_EQ( m_host.getElement( 3, 6 ),  0 );
-      EXPECT_EQ( m_host.getElement( 3, 7 ),  1 );
-
-      EXPECT_EQ( m_host.getElement( 4, 0 ),  0 );
-      EXPECT_EQ( m_host.getElement( 4, 1 ), 15 );
-      EXPECT_EQ( m_host.getElement( 4, 2 ),  0 );
-      EXPECT_EQ( m_host.getElement( 4, 3 ),  0 );
-      EXPECT_EQ( m_host.getElement( 4, 4 ),  0 );
-      EXPECT_EQ( m_host.getElement( 4, 5 ),  0 );
-      EXPECT_EQ( m_host.getElement( 4, 6 ),  0 );
-      EXPECT_EQ( m_host.getElement( 4, 7 ),  1 );
-
-      EXPECT_EQ( m_host.getElement( 5, 0 ),  0 );
-      EXPECT_EQ( m_host.getElement( 5, 1 ), 16 );
-      EXPECT_EQ( m_host.getElement( 5, 2 ), 17 );
-      EXPECT_EQ( m_host.getElement( 5, 3 ), 18 );
-      EXPECT_EQ( m_host.getElement( 5, 4 ), 19 );
-      EXPECT_EQ( m_host.getElement( 5, 5 ), 20 );
-      EXPECT_EQ( m_host.getElement( 5, 6 ), 21 );
-      EXPECT_EQ( m_host.getElement( 5, 7 ),  1 );
-
-      EXPECT_EQ( m_host.getElement( 6, 0 ), 22 );
-      EXPECT_EQ( m_host.getElement( 6, 1 ), 23 );
-      EXPECT_EQ( m_host.getElement( 6, 2 ), 24 );
-      EXPECT_EQ( m_host.getElement( 6, 3 ), 25 );
-      EXPECT_EQ( m_host.getElement( 6, 4 ), 26 );
-      EXPECT_EQ( m_host.getElement( 6, 5 ), 27 );
-      EXPECT_EQ( m_host.getElement( 6, 6 ), 28 );
-      EXPECT_EQ( m_host.getElement( 6, 7 ),  1 );
-
-      EXPECT_EQ( m_host.getElement( 7, 0 ), 29 );
-      EXPECT_EQ( m_host.getElement( 7, 1 ), 30 );
-      EXPECT_EQ( m_host.getElement( 7, 2 ), 31 );
-      EXPECT_EQ( m_host.getElement( 7, 3 ), 32 );
-      EXPECT_EQ( m_host.getElement( 7, 4 ), 33 );
-      EXPECT_EQ( m_host.getElement( 7, 5 ), 34 );
-      EXPECT_EQ( m_host.getElement( 7, 6 ), 35 );
-      EXPECT_EQ( m_host.getElement( 7, 7 ), 36 );
-
-      AdELL_cuda m_cuda;
-
-      // Copy the host matrix into the cuda matrix
-      m_cuda = m_host;
-
-      // Reset the host matrix
-      m_host.reset();
-
-      // Copy the cuda matrix back into the host matrix
-      m_host = m_cuda;
-
-      // Check the newly created double-copy host matrix
-      EXPECT_EQ( m_host.getElement( 0, 0 ),  1 );
-      EXPECT_EQ( m_host.getElement( 0, 1 ),  2 );
-      EXPECT_EQ( m_host.getElement( 0, 2 ),  3 );
-      EXPECT_EQ( m_host.getElement( 0, 3 ),  0 );
-      EXPECT_EQ( m_host.getElement( 0, 4 ),  4 );
-      EXPECT_EQ( m_host.getElement( 0, 5 ),  5 );
-      EXPECT_EQ( m_host.getElement( 0, 6 ),  0 );
-      EXPECT_EQ( m_host.getElement( 0, 7 ),  1 );
-
-      EXPECT_EQ( m_host.getElement( 1, 0 ),  0 );
-      EXPECT_EQ( m_host.getElement( 1, 1 ),  6 );
-      EXPECT_EQ( m_host.getElement( 1, 2 ),  0 );
-      EXPECT_EQ( m_host.getElement( 1, 3 ),  7 );
-      EXPECT_EQ( m_host.getElement( 1, 4 ),  0 );
-      EXPECT_EQ( m_host.getElement( 1, 5 ),  0 );
-      EXPECT_EQ( m_host.getElement( 1, 6 ),  0 );
-      EXPECT_EQ( m_host.getElement( 1, 7 ),  1 );
-
-      EXPECT_EQ( m_host.getElement( 2, 0 ),  0 );
-      EXPECT_EQ( m_host.getElement( 2, 1 ),  8 );
-      EXPECT_EQ( m_host.getElement( 2, 2 ),  9 );
-      EXPECT_EQ( m_host.getElement( 2, 3 ),  0 );
-      EXPECT_EQ( m_host.getElement( 2, 4 ), 10 );
-      EXPECT_EQ( m_host.getElement( 2, 5 ),  0 );
-      EXPECT_EQ( m_host.getElement( 2, 6 ),  0 );
-      EXPECT_EQ( m_host.getElement( 2, 7 ),  1 );
-
-      EXPECT_EQ( m_host.getElement( 3, 0 ),  0 );
-      EXPECT_EQ( m_host.getElement( 3, 1 ), 11 );
-      EXPECT_EQ( m_host.getElement( 3, 2 ), 12 );
-      EXPECT_EQ( m_host.getElement( 3, 3 ), 13 );
-      EXPECT_EQ( m_host.getElement( 3, 4 ), 14 );
-      EXPECT_EQ( m_host.getElement( 3, 5 ),  0 );
-      EXPECT_EQ( m_host.getElement( 3, 6 ),  0 );
-      EXPECT_EQ( m_host.getElement( 3, 7 ),  1 );
-
-      EXPECT_EQ( m_host.getElement( 4, 0 ),  0 );
-      EXPECT_EQ( m_host.getElement( 4, 1 ), 15 );
-      EXPECT_EQ( m_host.getElement( 4, 2 ),  0 );
-      EXPECT_EQ( m_host.getElement( 4, 3 ),  0 );
-      EXPECT_EQ( m_host.getElement( 4, 4 ),  0 );
-      EXPECT_EQ( m_host.getElement( 4, 5 ),  0 );
-      EXPECT_EQ( m_host.getElement( 4, 6 ),  0 );
-      EXPECT_EQ( m_host.getElement( 4, 7 ),  1 );
-
-      EXPECT_EQ( m_host.getElement( 5, 0 ),  0 );
-      EXPECT_EQ( m_host.getElement( 5, 1 ), 16 );
-      EXPECT_EQ( m_host.getElement( 5, 2 ), 17 );
-      EXPECT_EQ( m_host.getElement( 5, 3 ), 18 );
-      EXPECT_EQ( m_host.getElement( 5, 4 ), 19 );
-      EXPECT_EQ( m_host.getElement( 5, 5 ), 20 );
-      EXPECT_EQ( m_host.getElement( 5, 6 ), 21 );
-      EXPECT_EQ( m_host.getElement( 5, 7 ),  1 );
-
-      EXPECT_EQ( m_host.getElement( 6, 0 ), 22 );
-      EXPECT_EQ( m_host.getElement( 6, 1 ), 23 );
-      EXPECT_EQ( m_host.getElement( 6, 2 ), 24 );
-      EXPECT_EQ( m_host.getElement( 6, 3 ), 25 );
-      EXPECT_EQ( m_host.getElement( 6, 4 ), 26 );
-      EXPECT_EQ( m_host.getElement( 6, 5 ), 27 );
-      EXPECT_EQ( m_host.getElement( 6, 6 ), 28 );
-      EXPECT_EQ( m_host.getElement( 6, 7 ),  1 );
-
-      EXPECT_EQ( m_host.getElement( 7, 0 ), 29 );
-      EXPECT_EQ( m_host.getElement( 7, 1 ), 30 );
-      EXPECT_EQ( m_host.getElement( 7, 2 ), 31 );
-      EXPECT_EQ( m_host.getElement( 7, 3 ), 32 );
-      EXPECT_EQ( m_host.getElement( 7, 4 ), 33 );
-      EXPECT_EQ( m_host.getElement( 7, 5 ), 34 );
-      EXPECT_EQ( m_host.getElement( 7, 6 ), 35 );
-      EXPECT_EQ( m_host.getElement( 7, 7 ), 36 );
-
-      // Try vectorProduct with copied cuda matrix to see if it works correctly.
-      using VectorType = TNL::Containers::Vector< RealType, TNL::Devices::Cuda, IndexType >;
-
-      VectorType inVector;
-      inVector.setSize( m_cols );
-      for( IndexType i = 0; i < inVector.getSize(); i++ )
-          inVector.setElement( i, 2 );
-
-      VectorType outVector;
-      outVector.setSize( m_rows );
-      for( IndexType j = 0; j < outVector.getSize(); j++ )
-          outVector.setElement( j, 0 );
-
-      m_cuda.vectorProduct( inVector, outVector );
-
-      EXPECT_EQ( outVector.getElement( 0 ),  32 );
-      EXPECT_EQ( outVector.getElement( 1 ),  28 );
-      EXPECT_EQ( outVector.getElement( 2 ),  56 );
-      EXPECT_EQ( outVector.getElement( 3 ), 102 );
-      EXPECT_EQ( outVector.getElement( 4 ),  32 );
-      EXPECT_EQ( outVector.getElement( 5 ), 224 );
-      EXPECT_EQ( outVector.getElement( 6 ), 352 );
-      EXPECT_EQ( outVector.getElement( 7 ), 520 );
-   }
-}
-
 template< typename Matrix >
 void test_SaveAndLoad( const char* filename )
 {
@@ -1363,148 +1074,144 @@ void test_SaveAndLoad( const char* filename )
     *    \  0  1  1  1 /
     */
 
-    const IndexType m_rows = 4;
-    const IndexType m_cols = 4;
+   const IndexType m_rows = 4;
+   const IndexType m_cols = 4;
 
-    Matrix savedMatrix;
-    savedMatrix.reset();
-    savedMatrix.setDimensions( m_rows, m_cols );
-    typename Matrix::CompressedRowLengthsVector rowLengths;
-    rowLengths.setSize( m_rows );
-    rowLengths.setValue( 3 );
-    savedMatrix.setCompressedRowLengths( rowLengths );
+   Matrix savedMatrix( m_rows, m_cols );
+   typename Matrix::CompressedRowLengthsVector rowLengths;
+   rowLengths.setSize( m_rows );
+   rowLengths.setValue( 3 );
+   savedMatrix.setCompressedRowLengths( rowLengths );
 
-    for( IndexType i = 0; i < m_cols - 1; i++ )   // 0th row
-        savedMatrix.setElement( 0, i, 1 );
+   for( IndexType i = 0; i < m_cols - 1; i++ )   // 0th row
+       savedMatrix.setElement( 0, i, 1 );
 
-    savedMatrix.setElement( 1, 1, 1 );
-    savedMatrix.setElement( 1, 3, 1 );      // 1st row
+   savedMatrix.setElement( 1, 1, 1 );
+   savedMatrix.setElement( 1, 3, 1 );            // 1st row
 
-    for( IndexType i = 0; i < m_cols - 1; i++ )   // 2nd row
-        savedMatrix.setElement( 2, i, 1 );
+   for( IndexType i = 0; i < m_cols - 1; i++ )   // 2nd row
+       savedMatrix.setElement( 2, i, 1 );
 
-    for( IndexType i = 1; i < m_cols; i++ )       // 3rd row
-        savedMatrix.setElement( 3, i, 1 );
+   for( IndexType i = 1; i < m_cols; i++ )       // 3rd row
+       savedMatrix.setElement( 3, i, 1 );
 
-    ASSERT_NO_THROW( savedMatrix.save( filename ) );
+   ASSERT_NO_THROW( savedMatrix.save( filename ) );
 
-    Matrix loadedMatrix;
-    loadedMatrix.reset();
-    loadedMatrix.setDimensions( m_rows, m_cols );
-    typename Matrix::CompressedRowLengthsVector rowLengths2;
-    rowLengths2.setSize( m_rows );
-    rowLengths2.setValue( 3 );
-    loadedMatrix.setCompressedRowLengths( rowLengths2 );
+   Matrix loadedMatrix;
+   loadedMatrix.reset();
+   loadedMatrix.setDimensions( m_rows, m_cols );
+   typename Matrix::CompressedRowLengthsVector rowLengths2;
+   rowLengths2.setSize( m_rows );
+   rowLengths2.setValue( 3 );
+   loadedMatrix.setCompressedRowLengths( rowLengths2 );
 
 
-    ASSERT_NO_THROW( loadedMatrix.load( filename ) );
+   ASSERT_NO_THROW( loadedMatrix.load( filename ) );
 
 
-    EXPECT_EQ( savedMatrix.getElement( 0, 0 ), loadedMatrix.getElement( 0, 0 ) );
-    EXPECT_EQ( savedMatrix.getElement( 0, 1 ), loadedMatrix.getElement( 0, 1 ) );
-    EXPECT_EQ( savedMatrix.getElement( 0, 2 ), loadedMatrix.getElement( 0, 2 ) );
-    EXPECT_EQ( savedMatrix.getElement( 0, 3 ), loadedMatrix.getElement( 0, 3 ) );
+   EXPECT_EQ( savedMatrix.getElement( 0, 0 ), loadedMatrix.getElement( 0, 0 ) );
+   EXPECT_EQ( savedMatrix.getElement( 0, 1 ), loadedMatrix.getElement( 0, 1 ) );
+   EXPECT_EQ( savedMatrix.getElement( 0, 2 ), loadedMatrix.getElement( 0, 2 ) );
+   EXPECT_EQ( savedMatrix.getElement( 0, 3 ), loadedMatrix.getElement( 0, 3 ) );
 
-    EXPECT_EQ( savedMatrix.getElement( 1, 0 ), loadedMatrix.getElement( 1, 0 ) );
-    EXPECT_EQ( savedMatrix.getElement( 1, 1 ), loadedMatrix.getElement( 1, 1 ) );
-    EXPECT_EQ( savedMatrix.getElement( 1, 2 ), loadedMatrix.getElement( 1, 2 ) );
-    EXPECT_EQ( savedMatrix.getElement( 1, 3 ), loadedMatrix.getElement( 1, 3 ) );
+   EXPECT_EQ( savedMatrix.getElement( 1, 0 ), loadedMatrix.getElement( 1, 0 ) );
+   EXPECT_EQ( savedMatrix.getElement( 1, 1 ), loadedMatrix.getElement( 1, 1 ) );
+   EXPECT_EQ( savedMatrix.getElement( 1, 2 ), loadedMatrix.getElement( 1, 2 ) );
+   EXPECT_EQ( savedMatrix.getElement( 1, 3 ), loadedMatrix.getElement( 1, 3 ) );
 
-    EXPECT_EQ( savedMatrix.getElement( 2, 0 ), loadedMatrix.getElement( 2, 0 ) );
-    EXPECT_EQ( savedMatrix.getElement( 2, 1 ), loadedMatrix.getElement( 2, 1 ) );
-    EXPECT_EQ( savedMatrix.getElement( 2, 2 ), loadedMatrix.getElement( 2, 2 ) );
-    EXPECT_EQ( savedMatrix.getElement( 2, 3 ), loadedMatrix.getElement( 2, 3 ) );
+   EXPECT_EQ( savedMatrix.getElement( 2, 0 ), loadedMatrix.getElement( 2, 0 ) );
+   EXPECT_EQ( savedMatrix.getElement( 2, 1 ), loadedMatrix.getElement( 2, 1 ) );
+   EXPECT_EQ( savedMatrix.getElement( 2, 2 ), loadedMatrix.getElement( 2, 2 ) );
+   EXPECT_EQ( savedMatrix.getElement( 2, 3 ), loadedMatrix.getElement( 2, 3 ) );
 
-    EXPECT_EQ( savedMatrix.getElement( 3, 0 ), loadedMatrix.getElement( 3, 0 ) );
-    EXPECT_EQ( savedMatrix.getElement( 3, 1 ), loadedMatrix.getElement( 3, 1 ) );
-    EXPECT_EQ( savedMatrix.getElement( 3, 2 ), loadedMatrix.getElement( 3, 2 ) );
-    EXPECT_EQ( savedMatrix.getElement( 3, 3 ), loadedMatrix.getElement( 3, 3 ) );
+   EXPECT_EQ( savedMatrix.getElement( 3, 0 ), loadedMatrix.getElement( 3, 0 ) );
+   EXPECT_EQ( savedMatrix.getElement( 3, 1 ), loadedMatrix.getElement( 3, 1 ) );
+   EXPECT_EQ( savedMatrix.getElement( 3, 2 ), loadedMatrix.getElement( 3, 2 ) );
+   EXPECT_EQ( savedMatrix.getElement( 3, 3 ), loadedMatrix.getElement( 3, 3 ) );
 
-    EXPECT_EQ( savedMatrix.getElement( 0, 0 ),  1 );
-    EXPECT_EQ( savedMatrix.getElement( 0, 1 ),  1 );
-    EXPECT_EQ( savedMatrix.getElement( 0, 2 ),  1 );
-    EXPECT_EQ( savedMatrix.getElement( 0, 3 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 0, 0 ),  1 );
+   EXPECT_EQ( savedMatrix.getElement( 0, 1 ),  1 );
+   EXPECT_EQ( savedMatrix.getElement( 0, 2 ),  1 );
+   EXPECT_EQ( savedMatrix.getElement( 0, 3 ),  0 );
 
-    EXPECT_EQ( savedMatrix.getElement( 1, 0 ),  0 );
-    EXPECT_EQ( savedMatrix.getElement( 1, 1 ),  1 );
-    EXPECT_EQ( savedMatrix.getElement( 1, 2 ),  0 );
-    EXPECT_EQ( savedMatrix.getElement( 1, 3 ),  1 );
+   EXPECT_EQ( savedMatrix.getElement( 1, 0 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 1, 1 ),  1 );
+   EXPECT_EQ( savedMatrix.getElement( 1, 2 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 1, 3 ),  1 );
 
-    EXPECT_EQ( savedMatrix.getElement( 2, 0 ),  1 );
-    EXPECT_EQ( savedMatrix.getElement( 2, 1 ),  1 );
-    EXPECT_EQ( savedMatrix.getElement( 2, 2 ),  1 );
-    EXPECT_EQ( savedMatrix.getElement( 2, 3 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 2, 0 ),  1 );
+   EXPECT_EQ( savedMatrix.getElement( 2, 1 ),  1 );
+   EXPECT_EQ( savedMatrix.getElement( 2, 2 ),  1 );
+   EXPECT_EQ( savedMatrix.getElement( 2, 3 ),  0 );
 
-    EXPECT_EQ( savedMatrix.getElement( 3, 0 ),  0 );
-    EXPECT_EQ( savedMatrix.getElement( 3, 1 ),  1 );
-    EXPECT_EQ( savedMatrix.getElement( 3, 2 ),  1 );
-    EXPECT_EQ( savedMatrix.getElement( 3, 3 ),  1 );
+   EXPECT_EQ( savedMatrix.getElement( 3, 0 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 3, 1 ),  1 );
+   EXPECT_EQ( savedMatrix.getElement( 3, 2 ),  1 );
+   EXPECT_EQ( savedMatrix.getElement( 3, 3 ),  1 );
 
-    EXPECT_EQ( std::remove( filename ), 0 );
+   EXPECT_EQ( std::remove( filename ), 0 );
 }
 
 template< typename Matrix >
 void test_Print()
 {
-    using RealType = typename Matrix::RealType;
-    using DeviceType = typename Matrix::DeviceType;
-    using IndexType = typename Matrix::IndexType;
-
-/*
- * Sets up the following 5x4 sparse matrix:
- *
- *    /  1  1  1  0 \
- *    |  0  0  0  1 |
- *    |  1  1  1  0 |
- *    |  0  1  1  1 |
- *    \  0  0  1  1 /
- */
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
 
-    const IndexType m_rows = 5;
-    const IndexType m_cols = 4;
+   /*
+    * Sets up the following 5x4 sparse matrix:
+    *
+    *    /  1  1  1  0 \
+    *    |  0  0  0  1 |
+    *    |  1  1  1  0 |
+    *    |  0  1  1  1 |
+    *    \  0  0  1  1 /
+    */
 
-    Matrix m;
-    m.reset();
-    m.setDimensions( m_rows, m_cols );
-    typename Matrix::CompressedRowLengthsVector rowLengths;
-    rowLengths.setSize( m_rows );
-    rowLengths.setValue( 3 );
-    m.setCompressedRowLengths( rowLengths );
+   const IndexType m_rows = 5;
+   const IndexType m_cols = 4;
 
-    RealType value = 1;
-    for( IndexType i = 0; i < m_cols - 1; i++ )   // 0th row
-        m.setElement( 0, i, 1 );
+   Matrix m;
+   m.reset();
+   m.setDimensions( m_rows, m_cols );
+   typename Matrix::CompressedRowLengthsVector rowLengths;
+   rowLengths.setSize( m_rows );
+   rowLengths.setValue( 3 );
+   m.setCompressedRowLengths( rowLengths );
 
-    m.setElement( 1, 3, 1 );      // 1st row
+   RealType value = 1;
+   for( IndexType i = 0; i < m_cols - 1; i++ )   // 0th row
+       m.setElement( 0, i, 1 );
 
-    for( IndexType i = 0; i < m_cols - 1; i++ )   // 2nd row
-        m.setElement( 2, i, 1 );
+   m.setElement( 1, 3, 1 );      // 1st row
 
-    for( IndexType i = 1; i < m_cols; i++ )       // 3rd row
-        m.setElement( 3, i, 1 );
+   for( IndexType i = 0; i < m_cols - 1; i++ )   // 2nd row
+       m.setElement( 2, i, 1 );
 
-    for( IndexType i = 2; i < m_cols; i++ )       // 4th row
-        m.setElement( 4, i, 1 );
+   for( IndexType i = 1; i < m_cols; i++ )       // 3rd row
+       m.setElement( 3, i, 1 );
 
-    #include <sstream>
-    std::stringstream printed;
-    std::stringstream couted;
+   for( IndexType i = 2; i < m_cols; i++ )       // 4th row
+       m.setElement( 4, i, 1 );
 
-    //change the underlying buffer and save the old buffer
-    auto old_buf = std::cout.rdbuf(printed.rdbuf());
+   std::stringstream printed;
+   std::stringstream couted;
 
-    m.print( std::cout ); //all the std::cout goes to ss
+   //change the underlying buffer and save the old buffer
+   auto old_buf = std::cout.rdbuf(printed.rdbuf());
 
-    std::cout.rdbuf(old_buf); //reset
+   m.print( std::cout ); //all the std::cout goes to ss
 
-    couted << "Row: 0 ->  Col:0->1	 Col:1->1	 Col:2->1\t\n"
-               "Row: 1 ->  Col:3->1\t\n"
-               "Row: 2 ->  Col:0->1	 Col:1->1	 Col:2->1\t\n"
-               "Row: 3 ->  Col:1->1	 Col:2->1	 Col:3->1\t\n"
-               "Row: 4 ->  Col:2->1	 Col:3->1\t\n";
+   std::cout.rdbuf(old_buf); //reset
 
+   couted << "Row: 0 ->  Col:0->1	 Col:1->1	 Col:2->1\t\n"
+              "Row: 1 ->  Col:3->1\t\n"
+              "Row: 2 ->  Col:0->1	 Col:1->1	 Col:2->1\t\n"
+              "Row: 3 ->  Col:1->1	 Col:2->1	 Col:3->1\t\n"
+              "Row: 4 ->  Col:2->1	 Col:3->1\t\n";
 
-    EXPECT_EQ( printed.str(), couted.str() );
+   EXPECT_EQ( printed.str(), couted.str() );
 }
 
 #endif
-- 
GitLab


From 9ee9e37674d0a4ff7d341f97b7c569b206a55569 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Tue, 4 Feb 2020 21:23:45 +0100
Subject: [PATCH 130/179] Added binary sparse matrix unit tests for Ellpack and
 SlicedEllpack.

---
 ...Test.cu => BinarySparseMatrixTest_CSR.cpp} |   4 +-
 ...Test.cpp => BinarySparseMatrixTest_CSR.cu} |   4 +-
 ...rixTest.h => BinarySparseMatrixTest_CSR.h} |  28 ++--
 .../BinarySparseMatrixTest_Ellpack.cpp        |  11 ++
 .../BinarySparseMatrixTest_Ellpack.cu         |  11 ++
 .../Matrices/BinarySparseMatrixTest_Ellpack.h | 146 ++++++++++++++++++
 .../BinarySparseMatrixTest_SlicedEllpack.cpp  |  11 ++
 .../BinarySparseMatrixTest_SlicedEllpack.cu   |  11 ++
 .../BinarySparseMatrixTest_SlicedEllpack.h    | 146 ++++++++++++++++++
 src/UnitTests/Matrices/CMakeLists.txt         |  28 +++-
 .../Matrices/SparseMatrixTest_CSR.cu          |   2 +-
 src/UnitTests/Matrices/SparseMatrixTest_CSR.h |   2 +-
 .../Matrices/SparseMatrixTest_Ellpack.cu      |   2 +-
 .../Matrices/SparseMatrixTest_Ellpack.h       |   2 +-
 .../SparseMatrixTest_SlicedEllpack.cu         |   2 +-
 15 files changed, 381 insertions(+), 29 deletions(-)
 rename src/UnitTests/Matrices/{BinarySparseMatrixTest.cu => BinarySparseMatrixTest_CSR.cpp} (78%)
 rename src/UnitTests/Matrices/{BinarySparseMatrixTest.cpp => BinarySparseMatrixTest_CSR.cu} (78%)
 rename src/UnitTests/Matrices/{BinarySparseMatrixTest.h => BinarySparseMatrixTest_CSR.h} (87%)
 create mode 100644 src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.cpp
 create mode 100644 src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.cu
 create mode 100644 src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.h
 create mode 100644 src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.cpp
 create mode 100644 src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.cu
 create mode 100644 src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.h

diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest.cu b/src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.cpp
similarity index 78%
rename from src/UnitTests/Matrices/BinarySparseMatrixTest.cu
rename to src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.cpp
index 916f14360..b1f489105 100644
--- a/src/UnitTests/Matrices/BinarySparseMatrixTest.cu
+++ b/src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.cpp
@@ -1,5 +1,5 @@
 /***************************************************************************
-                          BinarySparseMatrixTest.cu -  description
+                          BinarySparseMatrixTest_CSR.cpp -  description
                              -------------------
     begin                : Jan 30, 2020
     copyright            : (C) 2020 by Tomas Oberhuber et al.
@@ -8,4 +8,4 @@
 
 /* See Copyright Notice in tnl/Copyright */
 
-#include "BinarySparseMatrixTest.h"
\ No newline at end of file
+#include "BinarySparseMatrixTest_CSR.h"
\ No newline at end of file
diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest.cpp b/src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.cu
similarity index 78%
rename from src/UnitTests/Matrices/BinarySparseMatrixTest.cpp
rename to src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.cu
index ea7b8d3c9..496bdde1b 100644
--- a/src/UnitTests/Matrices/BinarySparseMatrixTest.cpp
+++ b/src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.cu
@@ -1,5 +1,5 @@
 /***************************************************************************
-                          BinarySparseMatrixTest.cpp -  description
+                          BinarySparseMatrixTest_CSR.cu -  description
                              -------------------
     begin                : Jan 30, 2020
     copyright            : (C) 2020 by Tomas Oberhuber et al.
@@ -8,4 +8,4 @@
 
 /* See Copyright Notice in tnl/Copyright */
 
-#include "BinarySparseMatrixTest.h"
\ No newline at end of file
+#include "BinarySparseMatrixTest_CSR.h"
\ No newline at end of file
diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest.h b/src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.h
similarity index 87%
rename from src/UnitTests/Matrices/BinarySparseMatrixTest.h
rename to src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.h
index cb0d0bab5..9cd52741a 100644
--- a/src/UnitTests/Matrices/BinarySparseMatrixTest.h
+++ b/src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.h
@@ -1,5 +1,5 @@
 /***************************************************************************
-                          BinarySparseMatrixTest.h -  description
+                          BinarySparseMatrixTest_CSR.h -  description
                              -------------------
     begin                : Jan 30, 2020
     copyright            : (C) 2020 by Tomas Oberhuber et al.
@@ -20,7 +20,7 @@
 
 // test fixture for typed tests
 template< typename Matrix >
-class CSRMatrixTest : public ::testing::Test
+class BinaryMatrixTest_CSR : public ::testing::Test
 {
 protected:
    using CSRMatrixType = Matrix;
@@ -57,37 +57,37 @@ using CSRMatrixTypes = ::testing::Types
 #endif
 >;
 
-TYPED_TEST_SUITE( CSRMatrixTest, CSRMatrixTypes);
+TYPED_TEST_SUITE( BinaryMatrixTest_CSR, CSRMatrixTypes);
 
-TYPED_TEST( CSRMatrixTest, setDimensionsTest )
+TYPED_TEST( BinaryMatrixTest_CSR, setDimensionsTest )
 {
     using CSRMatrixType = typename TestFixture::CSRMatrixType;
 
     test_SetDimensions< CSRMatrixType >();
 }
 
-TYPED_TEST( CSRMatrixTest, setCompressedRowLengthsTest )
+TYPED_TEST( BinaryMatrixTest_CSR, setCompressedRowLengthsTest )
 {
     using CSRMatrixType = typename TestFixture::CSRMatrixType;
 
     test_SetCompressedRowLengths< CSRMatrixType >();
 }
 
-TYPED_TEST( CSRMatrixTest, setLikeTest )
+TYPED_TEST( BinaryMatrixTest_CSR, setLikeTest )
 {
     using CSRMatrixType = typename TestFixture::CSRMatrixType;
 
     test_SetLike< CSRMatrixType, CSRMatrixType >();
 }
 
-TYPED_TEST( CSRMatrixTest, resetTest )
+TYPED_TEST( BinaryMatrixTest_CSR, resetTest )
 {
     using CSRMatrixType = typename TestFixture::CSRMatrixType;
 
     test_Reset< CSRMatrixType >();
 }
 
-TYPED_TEST( CSRMatrixTest, getRowTest )
+TYPED_TEST( BinaryMatrixTest_CSR, getRowTest )
 {
     using CSRMatrixType = typename TestFixture::CSRMatrixType;
 
@@ -95,35 +95,35 @@ TYPED_TEST( CSRMatrixTest, getRowTest )
 }
 
 
-TYPED_TEST( CSRMatrixTest, setElementTest )
+TYPED_TEST( BinaryMatrixTest_CSR, setElementTest )
 {
     using CSRMatrixType = typename TestFixture::CSRMatrixType;
 
     test_SetElement< CSRMatrixType >();
 }
 
-TYPED_TEST( CSRMatrixTest, vectorProductTest )
+TYPED_TEST( BinaryMatrixTest_CSR, vectorProductTest )
 {
     using CSRMatrixType = typename TestFixture::CSRMatrixType;
 
     test_VectorProduct< CSRMatrixType >();
 }
 
-TYPED_TEST( CSRMatrixTest, rowsReduction )
+TYPED_TEST( BinaryMatrixTest_CSR, rowsReduction )
 {
     using CSRMatrixType = typename TestFixture::CSRMatrixType;
 
     test_RowsReduction< CSRMatrixType >();
 }
 
-TYPED_TEST( CSRMatrixTest, saveAndLoadTest )
+TYPED_TEST( BinaryMatrixTest_CSR, saveAndLoadTest )
 {
     using CSRMatrixType = typename TestFixture::CSRMatrixType;
 
-    test_SaveAndLoad< CSRMatrixType >( "test_BinarySparseMatrixTest" );
+    test_SaveAndLoad< CSRMatrixType >( "test_BinarySparseMatrixTest_CSR" );
 }
 
-TYPED_TEST( CSRMatrixTest, printTest )
+TYPED_TEST( BinaryMatrixTest_CSR, printTest )
 {
     using CSRMatrixType = typename TestFixture::CSRMatrixType;
 
diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.cpp b/src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.cpp
new file mode 100644
index 000000000..b1d5d71cf
--- /dev/null
+++ b/src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.cpp
@@ -0,0 +1,11 @@
+/***************************************************************************
+                          BinarySparseMatrixTest_Ellpack.cpp -  description
+                             -------------------
+    begin                : Jan 30, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include "BinarySparseMatrixTest_Ellpack.h"
\ No newline at end of file
diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.cu b/src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.cu
new file mode 100644
index 000000000..8d075f1cf
--- /dev/null
+++ b/src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.cu
@@ -0,0 +1,11 @@
+/***************************************************************************
+                          BinarySparseMatrixTest_Ellpack.cu -  description
+                             -------------------
+    begin                : Jan 30, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include "BinarySparseMatrixTest_Ellpack.h"
\ No newline at end of file
diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.h b/src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.h
new file mode 100644
index 000000000..708bd85f0
--- /dev/null
+++ b/src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.h
@@ -0,0 +1,146 @@
+/***************************************************************************
+                          BinarySparseMatrixTest_Ellpack.h -  description
+                             -------------------
+    begin                : Jan 30, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include <TNL/Containers/Segments/Ellpack.h>
+#include <TNL/Matrices/SparseMatrix.h>
+
+
+#include "BinarySparseMatrixTest.hpp"
+#include <iostream>
+
+#ifdef HAVE_GTEST
+#include <gtest/gtest.h>
+
+// test fixture for typed tests
+template< typename Matrix >
+class BinaryMatrixTest_Ellpack : public ::testing::Test
+{
+protected:
+   using EllpackMatrixType = Matrix;
+};
+
+////
+// Row-major format is used for the host system
+template< typename Device, typename Index, typename IndexAlocator >
+using RowMajorEllpack = TNL::Containers::Segments::Ellpack< Device, Index, IndexAlocator, true, 32 >;
+
+
+////
+// Column-major format is used for GPUs
+template< typename Device, typename Index, typename IndexAllocator >
+using ColumnMajorEllpack = TNL::Containers::Segments::Ellpack< Device, Index, IndexAllocator, false, 32 >;
+
+// types for which MatrixTest is instantiated
+using EllpackMatrixTypes = ::testing::Types
+<
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, RowMajorEllpack >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, RowMajorEllpack >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, RowMajorEllpack >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, RowMajorEllpack >,
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, RowMajorEllpack >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, RowMajorEllpack >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, RowMajorEllpack >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, RowMajorEllpack >,
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, RowMajorEllpack >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, RowMajorEllpack >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, RowMajorEllpack >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, RowMajorEllpack >
+#ifdef HAVE_CUDA
+   ,TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >,
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >,
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >
+#endif
+>;
+
+TYPED_TEST_SUITE( BinaryMatrixTest_Ellpack, EllpackMatrixTypes);
+
+TYPED_TEST( BinaryMatrixTest_Ellpack, setDimensionsTest )
+{
+    using EllpackMatrixType = typename TestFixture::EllpackMatrixType;
+
+    test_SetDimensions< EllpackMatrixType >();
+}
+
+TYPED_TEST( BinaryMatrixTest_Ellpack, setCompressedRowLengthsTest )
+{
+    using EllpackMatrixType = typename TestFixture::EllpackMatrixType;
+
+    test_SetCompressedRowLengths< EllpackMatrixType >();
+}
+
+TYPED_TEST( BinaryMatrixTest_Ellpack, setLikeTest )
+{
+    using EllpackMatrixType = typename TestFixture::EllpackMatrixType;
+
+    test_SetLike< EllpackMatrixType, EllpackMatrixType >();
+}
+
+TYPED_TEST( BinaryMatrixTest_Ellpack, resetTest )
+{
+    using EllpackMatrixType = typename TestFixture::EllpackMatrixType;
+
+    test_Reset< EllpackMatrixType >();
+}
+
+TYPED_TEST( BinaryMatrixTest_Ellpack, getRowTest )
+{
+    using EllpackMatrixType = typename TestFixture::EllpackMatrixType;
+
+    test_GetRow< EllpackMatrixType >();
+}
+
+
+TYPED_TEST( BinaryMatrixTest_Ellpack, setElementTest )
+{
+    using EllpackMatrixType = typename TestFixture::EllpackMatrixType;
+
+    test_SetElement< EllpackMatrixType >();
+}
+
+TYPED_TEST( BinaryMatrixTest_Ellpack, vectorProductTest )
+{
+    using EllpackMatrixType = typename TestFixture::EllpackMatrixType;
+
+    test_VectorProduct< EllpackMatrixType >();
+}
+
+TYPED_TEST( BinaryMatrixTest_Ellpack, rowsReduction )
+{
+    using EllpackMatrixType = typename TestFixture::EllpackMatrixType;
+
+    test_RowsReduction< EllpackMatrixType >();
+}
+
+TYPED_TEST( BinaryMatrixTest_Ellpack, saveAndLoadTest )
+{
+    using EllpackMatrixType = typename TestFixture::EllpackMatrixType;
+
+    test_SaveAndLoad< EllpackMatrixType >( "test_BinarySparseMatrixTest_Ellpack" );
+}
+
+TYPED_TEST( BinaryMatrixTest_Ellpack, printTest )
+{
+    using EllpackMatrixType = typename TestFixture::EllpackMatrixType;
+
+    test_Print< EllpackMatrixType >();
+}
+
+#endif
+
+#include "../main.h"
diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.cpp b/src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.cpp
new file mode 100644
index 000000000..7046d8156
--- /dev/null
+++ b/src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.cpp
@@ -0,0 +1,11 @@
+/***************************************************************************
+                          BinarySparseMatrixTest_SlicedEllpack.cpp -  description
+                             -------------------
+    begin                : Jan 30, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include "BinarySparseMatrixTest_SlicedEllpack.h"
\ No newline at end of file
diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.cu b/src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.cu
new file mode 100644
index 000000000..bb6829310
--- /dev/null
+++ b/src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.cu
@@ -0,0 +1,11 @@
+/***************************************************************************
+                          BinarySparseMatrixTest_SlicedEllpack.cu -  description
+                             -------------------
+    begin                : Jan 30, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include "BinarySparseMatrixTest_SlicedEllpack.h"
\ No newline at end of file
diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.h b/src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.h
new file mode 100644
index 000000000..7ebc25968
--- /dev/null
+++ b/src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.h
@@ -0,0 +1,146 @@
+/***************************************************************************
+                          BinarySparseMatrixTest_SlicedEllpack.h -  description
+                             -------------------
+    begin                : Jan 30, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include <TNL/Containers/Segments/SlicedEllpack.h>
+#include <TNL/Matrices/SparseMatrix.h>
+
+
+#include "BinarySparseMatrixTest.hpp"
+#include <iostream>
+
+#ifdef HAVE_GTEST
+#include <gtest/gtest.h>
+
+// test fixture for typed tests
+template< typename Matrix >
+class BinaryMatrixTest_SlicedEllpack : public ::testing::Test
+{
+protected:
+   using SlicedEllpackMatrixType = Matrix;
+};
+
+////
+// Row-major format is used for the host system
+template< typename Device, typename Index, typename IndexAllocator >
+using RowMajorSlicedEllpack = TNL::Containers::Segments::SlicedEllpack< Device, Index, IndexAllocator, true, 32 >;
+
+
+////
+// Column-major format is used for GPUs
+template< typename Device, typename Index, typename IndexAllocator >
+using ColumnMajorSlicedEllpack = TNL::Containers::Segments::SlicedEllpack< Device, Index, IndexAllocator, false, 32 >;
+
+// types for which MatrixTest is instantiated
+using SlicedEllpackMatrixTypes = ::testing::Types
+<
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >
+#ifdef HAVE_CUDA
+   ,TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >
+#endif
+>;
+
+TYPED_TEST_SUITE( BinaryMatrixTest_SlicedEllpack, SlicedEllpackMatrixTypes);
+
+TYPED_TEST( BinaryMatrixTest_SlicedEllpack, setDimensionsTest )
+{
+    using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
+
+    test_SetDimensions< SlicedEllpackMatrixType >();
+}
+
+TYPED_TEST( BinaryMatrixTest_SlicedEllpack, setCompressedRowLengthsTest )
+{
+    using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
+
+    test_SetCompressedRowLengths< SlicedEllpackMatrixType >();
+}
+
+TYPED_TEST( BinaryMatrixTest_SlicedEllpack, setLikeTest )
+{
+    using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
+
+    test_SetLike< SlicedEllpackMatrixType, SlicedEllpackMatrixType >();
+}
+
+TYPED_TEST( BinaryMatrixTest_SlicedEllpack, resetTest )
+{
+    using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
+
+    test_Reset< SlicedEllpackMatrixType >();
+}
+
+TYPED_TEST( BinaryMatrixTest_SlicedEllpack, getRowTest )
+{
+    using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
+
+    test_GetRow< SlicedEllpackMatrixType >();
+}
+
+
+TYPED_TEST( BinaryMatrixTest_SlicedEllpack, setElementTest )
+{
+    using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
+
+    test_SetElement< SlicedEllpackMatrixType >();
+}
+
+TYPED_TEST( BinaryMatrixTest_SlicedEllpack, vectorProductTest )
+{
+    using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
+
+    test_VectorProduct< SlicedEllpackMatrixType >();
+}
+
+TYPED_TEST( BinaryMatrixTest_SlicedEllpack, rowsReduction )
+{
+    using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
+
+    test_RowsReduction< SlicedEllpackMatrixType >();
+}
+
+TYPED_TEST( BinaryMatrixTest_SlicedEllpack, saveAndLoadTest )
+{
+    using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
+
+    test_SaveAndLoad< SlicedEllpackMatrixType >( "test_BinarySparseMatrixTest" );
+}
+
+TYPED_TEST( BinaryMatrixTest_SlicedEllpack, printTest )
+{
+    using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
+
+    test_Print< SlicedEllpackMatrixType >();
+}
+
+#endif
+
+#include "../main.h"
diff --git a/src/UnitTests/Matrices/CMakeLists.txt b/src/UnitTests/Matrices/CMakeLists.txt
index c4b2fabd3..60a01eaf5 100644
--- a/src/UnitTests/Matrices/CMakeLists.txt
+++ b/src/UnitTests/Matrices/CMakeLists.txt
@@ -25,8 +25,14 @@ IF( BUILD_CUDA )
    CUDA_ADD_EXECUTABLE( SparseMatrixCopyTest SparseMatrixCopyTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
    TARGET_LINK_LIBRARIES( SparseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} )
 
-   CUDA_ADD_EXECUTABLE( BinarySparseMatrixTest BinarySparseMatrixTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
-   TARGET_LINK_LIBRARIES( BinarySparseMatrixTest ${GTEST_BOTH_LIBRARIES} )
+   CUDA_ADD_EXECUTABLE( BinarySparseMatrixTest_CSR BinarySparseMatrixTest_CSR.cu OPTIONS ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( BinarySparseMatrixTest_CSR ${GTEST_BOTH_LIBRARIES} )
+
+   CUDA_ADD_EXECUTABLE( BinarySparseMatrixTest_Ellpack BinarySparseMatrixTest_Ellpack.cu OPTIONS ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( BinarySparseMatrixTest_Ellpack ${GTEST_BOTH_LIBRARIES} )
+
+   CUDA_ADD_EXECUTABLE( BinarySparseMatrixTest_SlicedEllpack BinarySparseMatrixTest_SlicedEllpack.cu OPTIONS ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( BinarySparseMatrixTest_SlicedEllpack ${GTEST_BOTH_LIBRARIES} )
 
 ELSE(  BUILD_CUDA )
    ADD_EXECUTABLE( DenseMatrixTest DenseMatrixTest.cpp )
@@ -61,9 +67,17 @@ ELSE(  BUILD_CUDA )
    TARGET_COMPILE_OPTIONS( SparseMatrixCopyTest PRIVATE ${CXX_TESTS_FLAGS} )
    TARGET_LINK_LIBRARIES( SparseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} )
 
-   ADD_EXECUTABLE( BinarySparseMatrixTest BinarySparseMatrixTest.cpp )
-   TARGET_COMPILE_OPTIONS( BinarySparseMatrixTest PRIVATE ${CXX_TESTS_FLAGS} )
-   TARGET_LINK_LIBRARIES( BinarySparseMatrixTest ${GTEST_BOTH_LIBRARIES} )
+   ADD_EXECUTABLE( BinarySparseMatrixTest_CSR BinarySparseMatrixTest_CSR.cpp )
+   TARGET_COMPILE_OPTIONS( BinarySparseMatrixTest_CSR PRIVATE ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( BinarySparseMatrixTest_CSR ${GTEST_BOTH_LIBRARIES} )
+
+   ADD_EXECUTABLE( BinarySparseMatrixTest_Ellpack BinarySparseMatrixTest_Ellpack.cpp )
+   TARGET_COMPILE_OPTIONS( BinarySparseMatrixTest_Ellpack PRIVATE ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( BinarySparseMatrixTest_Ellpack ${GTEST_BOTH_LIBRARIES} )
+
+   ADD_EXECUTABLE( BinarySparseMatrixTest_SlicedEllpack BinarySparseMatrixTest_SlicedEllpack.cpp )
+   TARGET_COMPILE_OPTIONS( BinarySparseMatrixTest_SlicedEllpack PRIVATE ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( BinarySparseMatrixTest_SlicedEllpack ${GTEST_BOTH_LIBRARIES} )
 
 ENDIF( BUILD_CUDA )
 
@@ -75,7 +89,9 @@ ADD_TEST( MultidiagonalMatrixTest ${EXECUTABLE_OUTPUT_PATH}/MultidiagonalMatrixT
 ADD_TEST( SparseMatrixTest_CSR ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_CSR${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( SparseMatrixTest_Ellpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_Ellpack${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( SparseMatrixTest_SlicedEllpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_SlicedEllpack${CMAKE_EXECUTABLE_SUFFIX} )
-ADD_TEST( BinarySparseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/BinarySparseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} )
+ADD_TEST( BinarySparseMatrixTest_CSR ${EXECUTABLE_OUTPUT_PATH}/BinarySparseMatrixTest_CSR${CMAKE_EXECUTABLE_SUFFIX} )
+ADD_TEST( BinarySparseMatrixTest_Ellpack ${EXECUTABLE_OUTPUT_PATH}/BinarySparseMatrixTest_Ellpack${CMAKE_EXECUTABLE_SUFFIX} )
+ADD_TEST( BinarySparseMatrixTest_SlicedEllpack ${EXECUTABLE_OUTPUT_PATH}/BinarySparseMatrixTest_SlicedEllpack${CMAKE_EXECUTABLE_SUFFIX} )
 
 if( ${BUILD_MPI} )
    if( BUILD_CUDA )
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR.cu b/src/UnitTests/Matrices/SparseMatrixTest_CSR.cu
index 771c74b9a..258ad2c53 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_CSR.cu
+++ b/src/UnitTests/Matrices/SparseMatrixTest_CSR.cu
@@ -1 +1 @@
-#include "SparseMatrixTest_CSR_segments.h"
+#include "SparseMatrixTest_CSR.h"
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR.h b/src/UnitTests/Matrices/SparseMatrixTest_CSR.h
index 1e89d544a..781735e7f 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_CSR.h
+++ b/src/UnitTests/Matrices/SparseMatrixTest_CSR.h
@@ -1,5 +1,5 @@
 /***************************************************************************
-                          SparseMatrixTest_CSR_segments.h -  description
+                          SparseMatrixTest_CSR.h -  description
                              -------------------
     begin                : Dec 2, 2019
     copyright            : (C) 2019 by Tomas Oberhuber et al.
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.cu b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.cu
index 63219e9b0..c454706f0 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.cu
+++ b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.cu
@@ -1 +1 @@
-#include "SparseMatrixTest_Ellpack_segments.h"
+#include "SparseMatrixTest_Ellpack.h"
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.h b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.h
index 32678c2b0..9650105f6 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.h
+++ b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.h
@@ -1,5 +1,5 @@
 /***************************************************************************
-                          SparseMatrixTest_Ellpack_segments.h -  description
+                          SparseMatrixTest_Ellpack.h -  description
                              -------------------
     begin                : Dec 3, 2019
     copyright            : (C) 2019 by Tomas Oberhuber et al.
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cu b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cu
index a88301100..40e2e94b8 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cu
+++ b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cu
@@ -1 +1 @@
-#include "SparseMatrixTest_SlicedEllpack_segments.h"
+#include "SparseMatrixTest_SlicedEllpack.h"
-- 
GitLab


From f0b9843967c5808feb13de2e83124d81f7c92c3d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Wed, 5 Feb 2020 20:03:19 +0100
Subject: [PATCH 131/179] Added binary sparse matrix copy test.

---
 src/TNL/Matrices/SparseMatrixView.hpp         |   3 +-
 .../Matrices/BinarySparseMatrixCopyTest.cpp   |  11 +
 .../Matrices/BinarySparseMatrixCopyTest.cu    |  11 +
 .../Matrices/BinarySparseMatrixCopyTest.h     | 820 ++++++++++++++++++
 src/UnitTests/Matrices/CMakeLists.txt         |  10 +-
 src/UnitTests/Matrices/SparseMatrixCopyTest.h |   7 -
 6 files changed, 853 insertions(+), 9 deletions(-)
 create mode 100644 src/UnitTests/Matrices/BinarySparseMatrixCopyTest.cpp
 create mode 100644 src/UnitTests/Matrices/BinarySparseMatrixCopyTest.cu
 create mode 100644 src/UnitTests/Matrices/BinarySparseMatrixCopyTest.h

diff --git a/src/TNL/Matrices/SparseMatrixView.hpp b/src/TNL/Matrices/SparseMatrixView.hpp
index afc21788a..16a8bc62f 100644
--- a/src/TNL/Matrices/SparseMatrixView.hpp
+++ b/src/TNL/Matrices/SparseMatrixView.hpp
@@ -263,7 +263,8 @@ addElement( const IndexType row,
       }
 
       this->columnIndexes.setElement( globalIdx, column );
-      this->values.setElement( globalIdx, value );
+      if( ! isBinary() )
+         this->values.setElement( globalIdx, value );
       return;
    }
 }
diff --git a/src/UnitTests/Matrices/BinarySparseMatrixCopyTest.cpp b/src/UnitTests/Matrices/BinarySparseMatrixCopyTest.cpp
new file mode 100644
index 000000000..51d7c4ea9
--- /dev/null
+++ b/src/UnitTests/Matrices/BinarySparseMatrixCopyTest.cpp
@@ -0,0 +1,11 @@
+/***************************************************************************
+                          BinarySparseMatrixCopyTest.cpp  -  description
+                             -------------------
+    begin                : Feb 5, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include "BinarySparseMatrixCopyTest.h"
diff --git a/src/UnitTests/Matrices/BinarySparseMatrixCopyTest.cu b/src/UnitTests/Matrices/BinarySparseMatrixCopyTest.cu
new file mode 100644
index 000000000..f29db9e96
--- /dev/null
+++ b/src/UnitTests/Matrices/BinarySparseMatrixCopyTest.cu
@@ -0,0 +1,11 @@
+/***************************************************************************
+                          BinarySparseMatrixCopyTest.cu  -  description
+                             -------------------
+    begin                : Feb 5, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include "BinarySparseMatrixCopyTest.h"
diff --git a/src/UnitTests/Matrices/BinarySparseMatrixCopyTest.h b/src/UnitTests/Matrices/BinarySparseMatrixCopyTest.h
new file mode 100644
index 000000000..d8fefeed7
--- /dev/null
+++ b/src/UnitTests/Matrices/BinarySparseMatrixCopyTest.h
@@ -0,0 +1,820 @@
+/***************************************************************************
+                          BinaryBinarySparseMatrixCopyTest.h -  description
+                             -------------------
+    begin                : Feb 5, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include <TNL/Matrices/Legacy/CSR.h>
+#include <TNL/Matrices/Legacy/Ellpack.h>
+#include <TNL/Matrices/Legacy/SlicedEllpack.h>
+
+#include <TNL/Matrices/SparseMatrix.h>
+#include <TNL/Matrices/MatrixType.h>
+#include <TNL/Matrices/Dense.h>
+#include <TNL/Matrices/Tridiagonal.h>
+#include <TNL/Matrices/Multidiagonal.h>
+#include <TNL/Containers/Segments/CSR.h>
+#include <TNL/Containers/Segments/Ellpack.h>
+#include <TNL/Containers/Segments/SlicedEllpack.h>
+
+template< typename Device, typename Index, typename IndexAllocator >
+using EllpackSegments = TNL::Containers::Segments::Ellpack< Device, Index, IndexAllocator >;
+
+template< typename Device, typename Index, typename IndexAllocator >
+using SlicedEllpackSegments = TNL::Containers::Segments::SlicedEllpack< Device, Index, IndexAllocator >;
+
+using CSR_host = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >;
+using CSR_cuda = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >;
+using E_host   = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::BinaryMatrix, EllpackSegments >;
+using E_cuda   = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::BinaryMatrix, EllpackSegments >;
+using SE_host  = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::BinaryMatrix, SlicedEllpackSegments >;
+using SE_cuda  = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::BinaryMatrix, SlicedEllpackSegments >;
+
+
+#ifdef HAVE_GTEST
+#include <gtest/gtest.h>
+
+/*
+ * Sets up the following 10x6 sparse matrix:
+ *
+ *    /  1  1             \
+ *    |           1  1  1 |
+ *    |  1  1  1          |
+ *    |     1  1  1  1  1 |
+ *    |  1  1  1  1  1    |
+ *    |  1  1             |
+ *    |  1                |
+ *    |  1                |
+ *    |  1  1  1  1  1    |
+ *    \                 1 /
+ */
+template< typename Matrix >
+void setupUnevenRowSizeMatrix( Matrix& m )
+{
+    const int rows = 10;
+    const int cols = 6;
+    m.setDimensions( rows, cols );
+    typename Matrix::CompressedRowLengthsVector rowLengths;
+    rowLengths.setSize( rows );
+    rowLengths.setValue( 5 );
+    rowLengths.setElement( 0, 2 );
+    rowLengths.setElement( 1,  3 );
+    rowLengths.setElement( 2,  3 );
+    rowLengths.setElement( 5,  2 );
+    rowLengths.setElement( 6,  1 );
+    rowLengths.setElement( 7,  1 );
+    rowLengths.setElement( 9,  1 );
+    m.setCompressedRowLengths( rowLengths );
+
+    for( int i = 0; i < cols - 4; i++ )  // 0th row
+        m.setElement( 0, i, 1 );
+
+    for( int i = 3; i < cols; i++ )      // 1st row
+        m.setElement( 1, i, 1 );
+
+    for( int i = 0; i < cols - 3; i++ )  // 2nd row
+        m.setElement( 2, i, 1 );
+
+    for( int i = 1; i < cols; i++ )      // 3rd row
+        m.setElement( 3, i, 1 );
+
+    for( int i = 0; i < cols - 1; i++ )  // 4th row
+        m.setElement( 4, i, 1 );
+
+    for( int i = 0; i < cols - 4; i++ )  // 5th row
+        m.setElement( 5, i, 1 );
+
+    m.setElement( 6, 0, 1 );   // 6th row
+
+    m.setElement( 7, 0, 1 );   // 7th row
+
+    for( int i = 0; i < cols - 1; i++ )  // 8th row
+        m.setElement( 8, i, 1 );
+
+    m.setElement( 9, 5, 1 );   // 9th row
+}
+
+template< typename Matrix >
+void checkUnevenRowSizeMatrix( Matrix& m )
+{
+   ASSERT_EQ( m.getRows(), 10 );
+   ASSERT_EQ( m.getColumns(), 6 );
+
+   EXPECT_EQ( m.getElement( 0, 0 ), 1 );
+   EXPECT_EQ( m.getElement( 0, 1 ), 1 );
+   EXPECT_EQ( m.getElement( 0, 2 ), 0 );
+   EXPECT_EQ( m.getElement( 0, 3 ), 0 );
+   EXPECT_EQ( m.getElement( 0, 4 ), 0 );
+   EXPECT_EQ( m.getElement( 0, 5 ), 0);
+
+   EXPECT_EQ( m.getElement( 1, 0 ), 0 );
+   EXPECT_EQ( m.getElement( 1, 1 ), 0 );
+   EXPECT_EQ( m.getElement( 1, 2 ), 0 );
+   EXPECT_EQ( m.getElement( 1, 3 ), 1 );
+   EXPECT_EQ( m.getElement( 1, 4 ), 1 );
+   EXPECT_EQ( m.getElement( 1, 5 ), 1 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ), 1 );
+   EXPECT_EQ( m.getElement( 2, 1 ), 1 );
+   EXPECT_EQ( m.getElement( 2, 2 ), 1 );
+   EXPECT_EQ( m.getElement( 2, 3 ), 0 );
+   EXPECT_EQ( m.getElement( 2, 4 ), 0 );
+   EXPECT_EQ( m.getElement( 2, 5 ), 0 );
+
+   EXPECT_EQ( m.getElement( 3, 0 ), 0 );
+   EXPECT_EQ( m.getElement( 3, 1 ), 1 );
+   EXPECT_EQ( m.getElement( 3, 2 ), 1 );
+   EXPECT_EQ( m.getElement( 3, 3 ), 1 );
+   EXPECT_EQ( m.getElement( 3, 4 ), 1 );
+   EXPECT_EQ( m.getElement( 3, 5 ), 1 );
+
+   EXPECT_EQ( m.getElement( 4, 0 ), 1 );
+   EXPECT_EQ( m.getElement( 4, 1 ), 1 );
+   EXPECT_EQ( m.getElement( 4, 2 ), 1 );
+   EXPECT_EQ( m.getElement( 4, 3 ), 1 );
+   EXPECT_EQ( m.getElement( 4, 4 ), 1 );
+   EXPECT_EQ( m.getElement( 4, 5 ), 0 );
+
+   EXPECT_EQ( m.getElement( 5, 0 ), 1 );
+   EXPECT_EQ( m.getElement( 5, 1 ), 1 );
+   EXPECT_EQ( m.getElement( 5, 2 ), 0 );
+   EXPECT_EQ( m.getElement( 5, 3 ), 0 );
+   EXPECT_EQ( m.getElement( 5, 4 ), 0 );
+   EXPECT_EQ( m.getElement( 5, 5 ), 0 );
+
+   EXPECT_EQ( m.getElement( 6, 0 ), 1 );
+   EXPECT_EQ( m.getElement( 6, 1 ), 0 );
+   EXPECT_EQ( m.getElement( 6, 2 ), 0 );
+   EXPECT_EQ( m.getElement( 6, 3 ), 0 );
+   EXPECT_EQ( m.getElement( 6, 4 ), 0 );
+   EXPECT_EQ( m.getElement( 6, 5 ), 0 );
+
+   EXPECT_EQ( m.getElement( 7, 0 ), 1 );
+   EXPECT_EQ( m.getElement( 7, 1 ), 0 );
+   EXPECT_EQ( m.getElement( 7, 2 ), 0 );
+   EXPECT_EQ( m.getElement( 7, 3 ), 0 );
+   EXPECT_EQ( m.getElement( 7, 4 ), 0 );
+   EXPECT_EQ( m.getElement( 7, 5 ), 0 );
+
+   EXPECT_EQ( m.getElement( 8, 0 ), 1 );
+   EXPECT_EQ( m.getElement( 8, 1 ), 1 );
+   EXPECT_EQ( m.getElement( 8, 2 ), 1 );
+   EXPECT_EQ( m.getElement( 8, 3 ), 1 );
+   EXPECT_EQ( m.getElement( 8, 4 ), 1 );
+   EXPECT_EQ( m.getElement( 8, 5 ), 0 );
+
+   EXPECT_EQ( m.getElement( 9, 0 ), 0 );
+   EXPECT_EQ( m.getElement( 9, 1 ), 0 );
+   EXPECT_EQ( m.getElement( 9, 2 ), 0 );
+   EXPECT_EQ( m.getElement( 9, 3 ), 0 );
+   EXPECT_EQ( m.getElement( 9, 4 ), 0 );
+   EXPECT_EQ( m.getElement( 9, 5 ), 1 );
+}
+
+/*
+ * Sets up the following 7x6 sparse matrix:
+ *
+ *    /              1  1 \
+ *    |           1  1  1 |
+ *    |        1  1  1    |
+ *    |     1  1  1       |
+ *    |  1  1  1          |
+ *    |  1  1             |
+ *    \  1                /
+ */
+template< typename Matrix >
+void setupAntiTriDiagMatrix( Matrix& m )
+{
+    const int rows = 7;
+    const int cols = 6;
+    m.reset();
+    m.setDimensions( rows, cols );
+    typename Matrix::CompressedRowLengthsVector rowLengths;
+    rowLengths.setSize( rows );
+    rowLengths.setValue( 3 );
+    rowLengths.setElement( 0, 4);
+    rowLengths.setElement( 1,  4 );
+    m.setCompressedRowLengths( rowLengths );
+
+    for( int i = 0; i < rows; i++ )
+        for( int j = cols - 1; j > 2; j-- )
+            if( j - i + 1 < cols && j - i + 1 >= 0 )
+                m.setElement( i, j - i + 1, 1 );
+}
+
+template< typename Matrix >
+void checkAntiTriDiagMatrix( Matrix& m )
+{
+   ASSERT_EQ( m.getRows(), 7 );
+   ASSERT_EQ( m.getColumns(), 6 );
+
+   EXPECT_EQ( m.getElement( 0, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 4 ),  1 );
+   EXPECT_EQ( m.getElement( 0, 5 ),  1);
+
+   EXPECT_EQ( m.getElement( 1, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 3 ),  1 );
+   EXPECT_EQ( m.getElement( 1, 4 ),  1 );
+   EXPECT_EQ( m.getElement( 1, 5 ),  1 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 2 ),  1 );
+   EXPECT_EQ( m.getElement( 2, 3 ),  1 );
+   EXPECT_EQ( m.getElement( 2, 4 ),  1 );
+   EXPECT_EQ( m.getElement( 2, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 3, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 1 ),  1 );
+   EXPECT_EQ( m.getElement( 3, 2 ),  1 );
+   EXPECT_EQ( m.getElement( 3, 3 ),  1 );
+   EXPECT_EQ( m.getElement( 3, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 4, 0 ),  1 );
+   EXPECT_EQ( m.getElement( 4, 1 ),  1 );
+   EXPECT_EQ( m.getElement( 4, 2 ),  1 );
+   EXPECT_EQ( m.getElement( 4, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 5, 0 ),  1 );
+   EXPECT_EQ( m.getElement( 5, 1 ),  1 );
+   EXPECT_EQ( m.getElement( 5, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 6, 0 ),  1 );
+   EXPECT_EQ( m.getElement( 6, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 5 ),  0 );
+}
+
+/*
+ * Sets up the following 7x6 sparse matrix:
+ *
+ *    / 1  1             \
+ *    | 1  1  1          |
+ *    |    1  1  1       |
+ *    |       1  1  1    |
+ *    |          1  1  1 |
+ *    |             1  1 |
+ *    \                1 /
+ */
+template< typename Matrix >
+void setupTriDiagMatrix( Matrix& m )
+{
+   const int rows = 7;
+   const int cols = 6;
+   m.reset();
+   m.setDimensions( rows, cols );
+   typename Matrix::CompressedRowLengthsVector rowLengths;
+   rowLengths.setSize( rows );
+   rowLengths.setValue( 3 );
+   rowLengths.setElement( 0 , 4 );
+   rowLengths.setElement( 1,  4 );
+   m.setCompressedRowLengths( rowLengths );
+
+   for( int i = 0; i < rows; i++ )
+      for( int j = 0; j < 3; j++ )
+         if( i + j - 1 >= 0 && i + j - 1 < cols )
+            m.setElement( i, i + j - 1, 1 );
+}
+
+template< typename Matrix >
+void checkTriDiagMatrix( Matrix& m )
+{
+   ASSERT_EQ( m.getRows(), 7 );
+   ASSERT_EQ( m.getColumns(), 6 );
+
+   EXPECT_EQ( m.getElement( 0, 0 ), 1 );
+   EXPECT_EQ( m.getElement( 0, 1 ), 1 );
+   EXPECT_EQ( m.getElement( 0, 2 ), 0 );
+   EXPECT_EQ( m.getElement( 0, 3 ), 0 );
+   EXPECT_EQ( m.getElement( 0, 4 ), 0 );
+   EXPECT_EQ( m.getElement( 0, 5 ), 0 );
+
+   EXPECT_EQ( m.getElement( 1, 0 ), 1 );
+   EXPECT_EQ( m.getElement( 1, 1 ), 1 );
+   EXPECT_EQ( m.getElement( 1, 2 ), 1 );
+   EXPECT_EQ( m.getElement( 1, 3 ), 0 );
+   EXPECT_EQ( m.getElement( 1, 4 ), 0 );
+   EXPECT_EQ( m.getElement( 1, 5 ), 0 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ), 0 );
+   EXPECT_EQ( m.getElement( 2, 1 ), 1 );
+   EXPECT_EQ( m.getElement( 2, 2 ), 1 );
+   EXPECT_EQ( m.getElement( 2, 3 ), 1 );
+   EXPECT_EQ( m.getElement( 2, 4 ), 0 );
+   EXPECT_EQ( m.getElement( 2, 5 ), 0 );
+
+   EXPECT_EQ( m.getElement( 3, 0 ), 0 );
+   EXPECT_EQ( m.getElement( 3, 1 ), 0 );
+   EXPECT_EQ( m.getElement( 3, 2 ), 1 );
+   EXPECT_EQ( m.getElement( 3, 3 ), 1 );
+   EXPECT_EQ( m.getElement( 3, 4 ), 1 );
+   EXPECT_EQ( m.getElement( 3, 5 ), 0 );
+
+   EXPECT_EQ( m.getElement( 4, 0 ), 0 );
+   EXPECT_EQ( m.getElement( 4, 1 ), 0 );
+   EXPECT_EQ( m.getElement( 4, 2 ), 0 );
+   EXPECT_EQ( m.getElement( 4, 3 ), 1 );
+   EXPECT_EQ( m.getElement( 4, 4 ), 1 );
+   EXPECT_EQ( m.getElement( 4, 5 ), 1 );
+
+   EXPECT_EQ( m.getElement( 5, 0 ), 0 );
+   EXPECT_EQ( m.getElement( 5, 1 ), 0 );
+   EXPECT_EQ( m.getElement( 5, 2 ), 0 );
+   EXPECT_EQ( m.getElement( 5, 3 ), 0 );
+   EXPECT_EQ( m.getElement( 5, 4 ), 1 );
+   EXPECT_EQ( m.getElement( 5, 5 ), 1 );
+
+   EXPECT_EQ( m.getElement( 6, 0 ), 0 );
+   EXPECT_EQ( m.getElement( 6, 1 ), 0 );
+   EXPECT_EQ( m.getElement( 6, 2 ), 0 );
+   EXPECT_EQ( m.getElement( 6, 3 ), 0 );
+   EXPECT_EQ( m.getElement( 6, 4 ), 0 );
+   EXPECT_EQ( m.getElement( 6, 5 ), 1 );
+}
+
+template< typename Matrix1, typename Matrix2 >
+void testCopyAssignment()
+{
+   {
+      SCOPED_TRACE("Tri Diagonal Matrix");
+
+      Matrix1 triDiag1;
+      setupTriDiagMatrix( triDiag1 );
+      checkTriDiagMatrix( triDiag1 );
+
+      Matrix2 triDiag2;
+      triDiag2 = triDiag1;
+      checkTriDiagMatrix( triDiag1 );
+      checkTriDiagMatrix( triDiag2 );
+   }
+   {
+      SCOPED_TRACE("Anti Tri Diagonal Matrix");
+      Matrix1 antiTriDiag1;
+      setupAntiTriDiagMatrix( antiTriDiag1 );
+      checkAntiTriDiagMatrix( antiTriDiag1 );
+
+      Matrix2 antiTriDiag2;
+      antiTriDiag2 = antiTriDiag1;
+      checkAntiTriDiagMatrix( antiTriDiag2 );
+   }
+   {
+      SCOPED_TRACE("Uneven Row Size Matrix");
+      Matrix1 unevenRowSize1;
+      setupUnevenRowSizeMatrix( unevenRowSize1 );
+      checkUnevenRowSizeMatrix( unevenRowSize1 );
+
+      Matrix2 unevenRowSize2;
+      unevenRowSize2 = unevenRowSize1;
+
+      checkUnevenRowSizeMatrix( unevenRowSize2 );
+   }
+}
+
+template< typename Matrix1, typename Matrix2 >
+void testConversion()
+{
+   {
+        SCOPED_TRACE("Tri Diagonal Matrix");
+
+        Matrix1 triDiag1;
+        setupTriDiagMatrix( triDiag1 );
+        checkTriDiagMatrix( triDiag1 );
+
+        Matrix2 triDiag2;
+        triDiag2 = triDiag1;
+        checkTriDiagMatrix( triDiag2 );
+   }
+
+   {
+        SCOPED_TRACE("Anti Tri Diagonal Matrix");
+
+        Matrix1 antiTriDiag1;
+        setupAntiTriDiagMatrix( antiTriDiag1 );
+        checkAntiTriDiagMatrix( antiTriDiag1 );
+
+        Matrix2 antiTriDiag2;
+        antiTriDiag2 = antiTriDiag1;
+        checkAntiTriDiagMatrix( antiTriDiag2 );
+   }
+
+   {
+        SCOPED_TRACE("Uneven Row Size Matrix");
+        Matrix1 unevenRowSize1;
+        setupUnevenRowSizeMatrix( unevenRowSize1 );
+        checkUnevenRowSizeMatrix( unevenRowSize1 );
+
+        Matrix2 unevenRowSize2;
+        unevenRowSize2 = unevenRowSize1;
+        checkUnevenRowSizeMatrix( unevenRowSize2 );
+   }
+}
+
+template< typename Matrix >
+void tridiagonalMatrixAssignment()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   using TridiagonalHost = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Host, IndexType >;
+   using TridiagonalCuda = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Cuda, IndexType >;
+
+   const IndexType rows( 10 ), columns( 10 );
+   TridiagonalHost hostMatrix( rows, columns );
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = TNL::max( 0, i - 1 ); j < TNL::min( columns, i + 2 ); j++ )
+         hostMatrix.setElement( i, j, 1 );
+
+   Matrix matrix;
+   matrix = hostMatrix;
+   using RowCapacitiesType = typename Matrix::RowsCapacitiesType;
+   RowCapacitiesType rowCapacities;
+   matrix.getCompressedRowLengths( rowCapacities );
+   RowCapacitiesType exactRowLengths{ 1, 3, 3, 3, 3, 3, 3, 3, 3, 2 };
+
+   EXPECT_EQ( rowCapacities, exactRowLengths );
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < columns; j++ )
+      {
+         if( abs( i - j ) > 1 )
+            EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
+         else
+            EXPECT_EQ( matrix.getElement( i, j ), 1.0 );
+      }
+
+#ifdef HAVE_CUDA
+   TridiagonalCuda cudaMatrix( rows, columns );
+   cudaMatrix = hostMatrix;
+   matrix = cudaMatrix;
+   matrix.getCompressedRowLengths( rowCapacities );
+   EXPECT_EQ( rowCapacities, exactRowLengths );
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < columns; j++ )
+      {
+         if( abs( i - j ) > 1 )
+            EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
+         else
+            EXPECT_EQ( matrix.getElement( i, j ), 1.0 );
+      }
+#endif
+}
+
+template< typename Matrix >
+void multidiagonalMatrixAssignment()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   using MultidiagonalHost = TNL::Matrices::Multidiagonal< RealType, TNL::Devices::Host, IndexType >;
+   using MultidiagonalCuda = TNL::Matrices::Multidiagonal< RealType, TNL::Devices::Cuda, IndexType >;
+   using DiagonalsShiftsType = typename MultidiagonalHost::DiagonalsShiftsType;
+   DiagonalsShiftsType diagonals{ -4, -2, 0, 1, 3, 5 };
+
+   const IndexType rows( 10 ), columns( 10 );
+   MultidiagonalHost hostMatrix( rows, columns, diagonals );
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < columns; j++ )
+         if( diagonals.containsValue( j - i ) )
+            hostMatrix.setElement( i, j, 1 );
+
+   Matrix matrix;
+   matrix = hostMatrix;
+   using RowCapacitiesType = typename Matrix::RowsCapacitiesType;
+   RowCapacitiesType rowCapacities;
+   matrix.getCompressedRowLengths( rowCapacities );
+   RowCapacitiesType exactRowLengths{ 3, 4, 5, 5, 6, 5, 5, 4, 4, 3 };
+   /*std::cerr << "hostMatrix " << hostMatrix << std::endl;
+   std::cerr << "matrix " << matrix << std::endl;
+   std::cerr << "rowCapacities " << rowCapacities << std::endl;*/
+
+   EXPECT_EQ( rowCapacities, exactRowLengths );
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < columns; j++ )
+      {
+         if( diagonals.containsValue( j - i ) )
+            EXPECT_EQ( matrix.getElement( i, j ), 1.0 );
+         else
+            EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
+      }
+
+#ifdef HAVE_CUDA
+   MultidiagonalCuda cudaMatrix( rows, columns, diagonals );
+   cudaMatrix = hostMatrix;
+   matrix = cudaMatrix;
+   matrix.getCompressedRowLengths( rowCapacities );
+   EXPECT_EQ( rowCapacities, exactRowLengths );
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < columns; j++ )
+      {
+         if( diagonals.containsValue( j - i ) )
+            EXPECT_EQ( matrix.getElement( i, j ), 1.0 );
+         else
+            EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
+      }
+#endif
+}
+
+template< typename Matrix >
+void denseMatrixAssignment()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   using DenseHost = TNL::Matrices::Dense< RealType, TNL::Devices::Host, IndexType >;
+   using DenseCuda = TNL::Matrices::Dense< RealType, TNL::Devices::Cuda, IndexType >;
+
+   const IndexType rows( 10 ), columns( 10 );
+   DenseHost hostMatrix( rows, columns );
+   for( IndexType i = 0; i < columns; i++ )
+      for( IndexType j = 0; j <= i; j++ )
+         hostMatrix( i, j ) = i + j;
+
+   Matrix matrix;
+   matrix = hostMatrix;
+   using RowCapacitiesType = typename Matrix::RowsCapacitiesType;
+   RowCapacitiesType rowCapacities;
+   matrix.getCompressedRowLengths( rowCapacities );
+   RowCapacitiesType exactRowLengths{ 0, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
+   EXPECT_EQ( rowCapacities, exactRowLengths );
+   for( IndexType i = 0; i < columns; i++ )
+      for( IndexType j = 0; j < rows; j++ )
+      {
+         if( j > i )
+            EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
+         else
+            EXPECT_EQ( matrix.getElement( i, j ), 1.0 );
+      }
+
+#ifdef HAVE_CUDA
+   DenseCuda cudaMatrix( rows, columns );
+   cudaMatrix = hostMatrix;
+   matrix = cudaMatrix;
+   matrix.getCompressedRowLengths( rowCapacities );
+   EXPECT_EQ( rowCapacities, exactRowLengths );
+   for( IndexType i = 0; i < columns; i++ )
+      for( IndexType j = 0; j < rows; j++ )
+      {
+         if( j > i )
+            EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
+         else
+            EXPECT_EQ( matrix.getElement( i, j ), 1.0 );
+      }
+#endif
+}
+
+TEST( BinarySparseMatrixCopyTest, CSR_HostToHost )
+{
+   testCopyAssignment< CSR_host, CSR_host >();
+}
+
+#ifdef HAVE_CUDA
+TEST( BinarySparseMatrixCopyTest, CSR_HostToCuda )
+{
+   testCopyAssignment< CSR_host, CSR_cuda >();
+}
+
+TEST( BinarySparseMatrixCopyTest, CSR_CudaToHost )
+{
+   testCopyAssignment< CSR_cuda, CSR_host >();
+}
+
+TEST( BinarySparseMatrixCopyTest, CSR_CudaToCuda )
+{
+   testCopyAssignment< CSR_cuda, CSR_cuda >();
+}
+#endif
+
+
+TEST( BinarySparseMatrixCopyTest, Ellpack_HostToHost )
+{
+   testCopyAssignment< E_host, E_host >();
+}
+
+#ifdef HAVE_CUDA
+TEST( BinarySparseMatrixCopyTest, Ellpack_HostToCuda )
+{
+   testCopyAssignment< E_host, E_cuda >();
+}
+
+TEST( BinarySparseMatrixCopyTest, Ellpack_CudaToHost )
+{
+   testCopyAssignment< E_cuda, E_host >();
+}
+
+TEST( BinarySparseMatrixCopyTest, Ellpack_CudaToCuda )
+{
+   testCopyAssignment< E_cuda, E_cuda >();
+}
+#endif
+
+
+TEST( BinarySparseMatrixCopyTest, SlicedEllpack_HostToHost )
+{
+   testCopyAssignment< SE_host, SE_host >();
+}
+
+#ifdef HAVE_CUDA
+TEST( BinarySparseMatrixCopyTest, SlicedEllpack_HostToCuda )
+{
+   testCopyAssignment< SE_host, SE_cuda >();
+}
+
+TEST( BinarySparseMatrixCopyTest, SlicedEllpack_CudaToHost )
+{
+   testCopyAssignment< SE_cuda, SE_host >();
+}
+
+TEST( BinarySparseMatrixCopyTest, SlicedEllpack_CudaToCuda )
+{
+   testCopyAssignment< SE_cuda, SE_cuda >();
+}
+#endif
+
+////
+// Test of conversion between formats
+TEST( BinarySparseMatrixCopyTest, CSR_to_Ellpack_host )
+{
+   testConversion< CSR_host, E_host >();
+}
+
+TEST( BinarySparseMatrixCopyTest, Ellpack_to_CSR_host )
+{
+   testConversion< E_host, CSR_host >();
+}
+
+TEST( BinarySparseMatrixCopyTest, CSR_to_SlicedEllpack_host )
+{
+   testConversion< CSR_host, SE_host >();
+}
+
+TEST( BinarySparseMatrixCopyTest, SlicedEllpack_to_CSR_host )
+{
+   testConversion< SE_host, CSR_host >();
+}
+
+TEST( BinarySparseMatrixCopyTest, Ellpack_to_SlicedEllpack_host )
+{
+   testConversion< E_host, SE_host >();
+}
+
+TEST( BinarySparseMatrixCopyTest, SlicedEllpack_to_Ellpack_host )
+{
+   testConversion< SE_host, E_host >();
+}
+
+#ifdef HAVE_CUDA
+TEST( BinarySparseMatrixCopyTest, CSR_to_Ellpack_cuda )
+{
+   testConversion< CSR_cuda, E_cuda >();
+}
+
+TEST( BinarySparseMatrixCopyTest, Ellpack_to_CSR_cuda )
+{
+   testConversion< E_cuda, CSR_cuda >();
+}
+
+TEST( BinarySparseMatrixCopyTest, CSR_to_SlicedEllpack_cuda )
+{
+   testConversion< CSR_cuda, SE_cuda >();
+}
+
+TEST( BinarySparseMatrixCopyTest, SlicedEllpack_to_CSR_cuda )
+{
+   testConversion< SE_cuda, CSR_cuda >();
+}
+
+TEST( BinarySparseMatrixCopyTest, Ellpack_to_SlicedEllpack_cuda )
+{
+   testConversion< E_cuda, SE_cuda >();
+}
+
+TEST( BinarySparseMatrixCopyTest, SlicedEllpack_to_Ellpack_cuda )
+{
+   testConversion< SE_cuda, E_cuda >();
+}
+#endif
+
+////
+// Tridiagonal matrix assignment test
+TEST( BinarySparseMatrixCopyTest, TridiagonalMatrixAssignment_to_CSR_host )
+{
+   tridiagonalMatrixAssignment< CSR_host >();
+}
+
+TEST( BinarySparseMatrixCopyTest, TridiagonalMatrixAssignment_to_Ellpack_host )
+{
+   tridiagonalMatrixAssignment< E_host >();
+}
+
+TEST( BinarySparseMatrixCopyTest, TridiagonalMatrixAssignment_to_SlicedEllpack_host )
+{
+   tridiagonalMatrixAssignment< SE_host >();
+}
+
+#ifdef HAVE_CUDA
+TEST( BinarySparseMatrixCopyTest, TridiagonalMatrixAssignment_to_CSR_cuda )
+{
+   tridiagonalMatrixAssignment< CSR_cuda >();
+}
+
+TEST( BinarySparseMatrixCopyTest, TridiagonalMatrixAssignment_to_Ellpack_cuda )
+{
+   tridiagonalMatrixAssignment< E_cuda >();
+}
+
+TEST( BinarySparseMatrixCopyTest, TridiagonalMatrixAssignment_to_SlicedEllpack_cuda )
+{
+   tridiagonalMatrixAssignment< SE_cuda >();
+}
+#endif // HAVE_CUDA
+
+////
+// Multidiagonal matrix assignment test
+TEST( BinarySparseMatrixCopyTest, MultidiagonalMatrixAssignment_to_CSR_host )
+{
+   multidiagonalMatrixAssignment< CSR_host >();
+}
+
+TEST( BinarySparseMatrixCopyTest, MultidiagonalMatrixAssignment_to_Ellpack_host )
+{
+   multidiagonalMatrixAssignment< E_host >();
+}
+
+TEST( BinarySparseMatrixCopyTest, MultidiagonalMatrixAssignment_to_SlicedEllpack_host )
+{
+   multidiagonalMatrixAssignment< SE_host >();
+}
+
+#ifdef HAVE_CUDA
+TEST( BinarySparseMatrixCopyTest, MultidiagonalMatrixAssignment_to_CSR_cuda )
+{
+   multidiagonalMatrixAssignment< CSR_cuda >();
+}
+
+TEST( BinarySparseMatrixCopyTest, MultidiagonalMatrixAssignment_to_Ellpack_cuda )
+{
+   multidiagonalMatrixAssignment< E_cuda >();
+}
+
+TEST( BinarySparseMatrixCopyTest, MultidiagonalMatrixAssignment_to_SlicedEllpack_cuda )
+{
+   multidiagonalMatrixAssignment< SE_cuda >();
+}
+#endif // HAVE_CUDA
+
+////
+// Dense matrix assignment test
+TEST( BinarySparseMatrixCopyTest, DenseMatrixAssignment_to_CSR_host )
+{
+   denseMatrixAssignment< CSR_host >();
+}
+
+TEST( BinarySparseMatrixCopyTest, DenseMatrixAssignment_to_Ellpack_host )
+{
+   denseMatrixAssignment< E_host >();
+}
+
+TEST( BinarySparseMatrixCopyTest, DenseMatrixAssignment_to_SlicedEllpack_host )
+{
+   denseMatrixAssignment< SE_host >();
+}
+
+#ifdef HAVE_CUDA
+TEST( BinarySparseMatrixCopyTest, DenseMatrixAssignment_to_CSR_cuda )
+{
+   denseMatrixAssignment< CSR_cuda >();
+}
+
+TEST( BinarySparseMatrixCopyTest, DenseMatrixAssignment_to_Ellpack_cuda )
+{
+   denseMatrixAssignment< E_cuda >();
+}
+
+TEST( BinarySparseMatrixCopyTest, DenseMatrixAssignment_to_SlicedEllpack_cuda )
+{
+   denseMatrixAssignment< SE_cuda >();
+}
+#endif // HAVE_CUDA
+
+#endif //HAVE_GTEST
+
+#include "../main.h"
diff --git a/src/UnitTests/Matrices/CMakeLists.txt b/src/UnitTests/Matrices/CMakeLists.txt
index 60a01eaf5..8da67ef6b 100644
--- a/src/UnitTests/Matrices/CMakeLists.txt
+++ b/src/UnitTests/Matrices/CMakeLists.txt
@@ -34,6 +34,9 @@ IF( BUILD_CUDA )
    CUDA_ADD_EXECUTABLE( BinarySparseMatrixTest_SlicedEllpack BinarySparseMatrixTest_SlicedEllpack.cu OPTIONS ${CXX_TESTS_FLAGS} )
    TARGET_LINK_LIBRARIES( BinarySparseMatrixTest_SlicedEllpack ${GTEST_BOTH_LIBRARIES} )
 
+   CUDA_ADD_EXECUTABLE( BinarySparseMatrixCopyTest BinarySparseMatrixCopyTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( BinarySparseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} )
+
 ELSE(  BUILD_CUDA )
    ADD_EXECUTABLE( DenseMatrixTest DenseMatrixTest.cpp )
    TARGET_COMPILE_OPTIONS( DenseMatrixTest PRIVATE ${CXX_TESTS_FLAGS} )
@@ -79,9 +82,12 @@ ELSE(  BUILD_CUDA )
    TARGET_COMPILE_OPTIONS( BinarySparseMatrixTest_SlicedEllpack PRIVATE ${CXX_TESTS_FLAGS} )
    TARGET_LINK_LIBRARIES( BinarySparseMatrixTest_SlicedEllpack ${GTEST_BOTH_LIBRARIES} )
 
+   ADD_EXECUTABLE( BinarySparseMatrixCopyTest BinarySparseMatrixCopyTest.cpp )
+   TARGET_COMPILE_OPTIONS( BinarySparseMatrixCopyTest PRIVATE ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( BinarySparseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} )
+
 ENDIF( BUILD_CUDA )
 
-ADD_TEST( SparseMatrixCopyTest ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixCopyTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( DenseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/DenseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( TridiagonalMatrixTest ${EXECUTABLE_OUTPUT_PATH}/TridiagonalMatrixTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( MultidiagonalMatrixTest ${EXECUTABLE_OUTPUT_PATH}/MultidiagonalMatrixTest${CMAKE_EXECUTABLE_SUFFIX} )
@@ -89,9 +95,11 @@ ADD_TEST( MultidiagonalMatrixTest ${EXECUTABLE_OUTPUT_PATH}/MultidiagonalMatrixT
 ADD_TEST( SparseMatrixTest_CSR ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_CSR${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( SparseMatrixTest_Ellpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_Ellpack${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( SparseMatrixTest_SlicedEllpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_SlicedEllpack${CMAKE_EXECUTABLE_SUFFIX} )
+ADD_TEST( SparseMatrixCopyTest ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixCopyTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( BinarySparseMatrixTest_CSR ${EXECUTABLE_OUTPUT_PATH}/BinarySparseMatrixTest_CSR${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( BinarySparseMatrixTest_Ellpack ${EXECUTABLE_OUTPUT_PATH}/BinarySparseMatrixTest_Ellpack${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( BinarySparseMatrixTest_SlicedEllpack ${EXECUTABLE_OUTPUT_PATH}/BinarySparseMatrixTest_SlicedEllpack${CMAKE_EXECUTABLE_SUFFIX} )
+ADD_TEST( BinarySparseMatrixCopyTest ${EXECUTABLE_OUTPUT_PATH}/BinarySparseMatrixCopyTest${CMAKE_EXECUTABLE_SUFFIX} )
 
 if( ${BUILD_MPI} )
    if( BUILD_CUDA )
diff --git a/src/UnitTests/Matrices/SparseMatrixCopyTest.h b/src/UnitTests/Matrices/SparseMatrixCopyTest.h
index 053f1e9fb..829c30677 100644
--- a/src/UnitTests/Matrices/SparseMatrixCopyTest.h
+++ b/src/UnitTests/Matrices/SparseMatrixCopyTest.h
@@ -21,13 +21,6 @@
 #include <TNL/Containers/Segments/Ellpack.h>
 #include <TNL/Containers/Segments/SlicedEllpack.h>
 
-/*using CSR_host = TNL::Matrices::CSR< int, TNL::Devices::Host, int >;
-using CSR_cuda = TNL::Matrices::CSR< int, TNL::Devices::Cuda, int >;
-using E_host = TNL::Matrices::Ellpack< int, TNL::Devices::Host, int >;
-using E_cuda = TNL::Matrices::Ellpack< int, TNL::Devices::Cuda, int >;
-using SE_host = TNL::Matrices::SlicedEllpack< int, TNL::Devices::Host, int, 2 >;
-using SE_cuda = TNL::Matrices::SlicedEllpack< int, TNL::Devices::Cuda, int, 2 >;*/
-
 template< typename Device, typename Index, typename IndexAllocator >
 using EllpackSegments = TNL::Containers::Segments::Ellpack< Device, Index, IndexAllocator >;
 
-- 
GitLab


From 3b631afcbb877516e42a9650b59470c368ea8365 Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Thu, 6 Feb 2020 12:30:19 +0100
Subject: [PATCH 132/179] Fixed

---
 src/TNL/Matrices/SparseMatrix.hpp              |  6 ++++--
 .../Matrices/BinarySparseMatrixCopyTest.h      | 18 +++++++++---------
 2 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp
index 60f4695f0..992443434 100644
--- a/src/TNL/Matrices/SparseMatrix.hpp
+++ b/src/TNL/Matrices/SparseMatrix.hpp
@@ -590,7 +590,8 @@ operator=( const Dense< Real_, Device_, Index_, RowMajorOrder, RealAllocator_ >&
          {
             IndexType thisGlobalIdx = segments_view.getGlobalIndex( rowIdx, rowLocalIndexes_view[ rowIdx ]++ );
             columns_view[ thisGlobalIdx ] = columnIdx;
-            values_view[ thisGlobalIdx ] = value;
+            if( ! isBinary() )
+               values_view[ thisGlobalIdx ] = value;
          }
       };
       matrix.forAllRows( f );
@@ -700,7 +701,8 @@ operator=( const RHSMatrix& matrix )
          {
             IndexType thisGlobalIdx = segments_view.getGlobalIndex( rowIdx, localIdx++ );
             columns_view[ thisGlobalIdx ] = columnIndex;
-            values_view[ thisGlobalIdx ] = value;
+            if( ! isBinary() )
+               values_view[ thisGlobalIdx ] = value;
             rowLocalIndexes_view[ rowIdx ] = localIdx;
          }
       };
diff --git a/src/UnitTests/Matrices/BinarySparseMatrixCopyTest.h b/src/UnitTests/Matrices/BinarySparseMatrixCopyTest.h
index d8fefeed7..b901acbbd 100644
--- a/src/UnitTests/Matrices/BinarySparseMatrixCopyTest.h
+++ b/src/UnitTests/Matrices/BinarySparseMatrixCopyTest.h
@@ -440,7 +440,7 @@ void tridiagonalMatrixAssignment()
    TridiagonalHost hostMatrix( rows, columns );
    for( IndexType i = 0; i < rows; i++ )
       for( IndexType j = TNL::max( 0, i - 1 ); j < TNL::min( columns, i + 2 ); j++ )
-         hostMatrix.setElement( i, j, 1 );
+         hostMatrix.setElement( i, j, TNL::min( i + j, 1 ) );
 
    Matrix matrix;
    matrix = hostMatrix;
@@ -456,7 +456,7 @@ void tridiagonalMatrixAssignment()
          if( abs( i - j ) > 1 )
             EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
          else
-            EXPECT_EQ( matrix.getElement( i, j ), 1.0 );
+            EXPECT_EQ( matrix.getElement( i, j ), TNL::min( i + j, 1 ) );
       }
 
 #ifdef HAVE_CUDA
@@ -471,7 +471,7 @@ void tridiagonalMatrixAssignment()
          if( abs( i - j ) > 1 )
             EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
          else
-            EXPECT_EQ( matrix.getElement( i, j ), 1.0 );
+            EXPECT_EQ( matrix.getElement( i, j ), TNL::min( i + j, 1 ) );
       }
 #endif
 }
@@ -493,7 +493,7 @@ void multidiagonalMatrixAssignment()
    for( IndexType i = 0; i < rows; i++ )
       for( IndexType j = 0; j < columns; j++ )
          if( diagonals.containsValue( j - i ) )
-            hostMatrix.setElement( i, j, 1 );
+            hostMatrix.setElement( i, j, TNL::min( i + j, 1 ) );
 
    Matrix matrix;
    matrix = hostMatrix;
@@ -510,7 +510,7 @@ void multidiagonalMatrixAssignment()
       for( IndexType j = 0; j < columns; j++ )
       {
          if( diagonals.containsValue( j - i ) )
-            EXPECT_EQ( matrix.getElement( i, j ), 1.0 );
+            EXPECT_EQ( matrix.getElement( i, j ), TNL::min( i + j, 1 ) );
          else
             EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
       }
@@ -525,7 +525,7 @@ void multidiagonalMatrixAssignment()
       for( IndexType j = 0; j < columns; j++ )
       {
          if( diagonals.containsValue( j - i ) )
-            EXPECT_EQ( matrix.getElement( i, j ), 1.0 );
+            EXPECT_EQ( matrix.getElement( i, j ), TNL::min( i + j, 1 ) );
          else
             EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
       }
@@ -546,7 +546,7 @@ void denseMatrixAssignment()
    DenseHost hostMatrix( rows, columns );
    for( IndexType i = 0; i < columns; i++ )
       for( IndexType j = 0; j <= i; j++ )
-         hostMatrix( i, j ) = i + j;
+         hostMatrix( i, j ) = TNL::min( i + j, 1 );
 
    Matrix matrix;
    matrix = hostMatrix;
@@ -561,7 +561,7 @@ void denseMatrixAssignment()
          if( j > i )
             EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
          else
-            EXPECT_EQ( matrix.getElement( i, j ), 1.0 );
+            EXPECT_EQ( matrix.getElement( i, j ), TNL::min( i + j, 1 ) );
       }
 
 #ifdef HAVE_CUDA
@@ -576,7 +576,7 @@ void denseMatrixAssignment()
          if( j > i )
             EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
          else
-            EXPECT_EQ( matrix.getElement( i, j ), 1.0 );
+            EXPECT_EQ( matrix.getElement( i, j ), TNL::min( i + j, 1 ) );
       }
 #endif
 }
-- 
GitLab


From 1ac668ecd333fe990072b2a5bdf2dd71d80df4c8 Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Fri, 7 Feb 2020 13:25:47 +0100
Subject: [PATCH 133/179] Refactoring SparseMatrixTest.

---
 src/UnitTests/Matrices/SparseMatrixTest.h | 2240 +++++++++------------
 1 file changed, 925 insertions(+), 1315 deletions(-)

diff --git a/src/UnitTests/Matrices/SparseMatrixTest.h b/src/UnitTests/Matrices/SparseMatrixTest.h
index b6b5a368f..04a9b065f 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest.h
+++ b/src/UnitTests/Matrices/SparseMatrixTest.h
@@ -1,5 +1,5 @@
 /***************************************************************************
-                          SparseMatrixTest_impl.h -  description
+                          SparseMatrixTest.h -  description
                              -------------------
     begin                : Nov 22, 2018
     copyright            : (C) 2018 by Tomas Oberhuber et al.
@@ -13,11 +13,7 @@
 #include <TNL/Math.h>
 #include <TNL/Algorithms/ParallelFor.h>
 #include <iostream>
-
-// Temporary, until test_OperatorEquals doesn't work for all formats.
-#include <TNL/Matrices/Legacy/ChunkedEllpack.h>
-#include <TNL/Matrices/Legacy/AdEllpack.h>
-#include <TNL/Matrices/Legacy/BiEllpack.h>
+#include <sstream>
 
 #ifdef HAVE_GTEST
 #include <gtest/gtest.h>
@@ -25,93 +21,91 @@
 template< typename MatrixHostFloat, typename MatrixHostInt >
 void host_test_GetType()
 {
-    bool testRan = false;
-    EXPECT_TRUE( testRan );
-    std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n";
-    std::cerr << "This test has not been implemented properly yet.\n" << std::endl;
+   bool testRan = false;
+   EXPECT_TRUE( testRan );
+   std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n";
+   std::cerr << "This test has not been implemented properly yet.\n" << std::endl;
 }
 
 template< typename MatrixCudaFloat, typename MatrixCudaInt >
 void cuda_test_GetType()
 {
-    bool testRan = false;
-    EXPECT_TRUE( testRan );
-    std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n";
-    std::cerr << "This test has not been implemented properly yet.\n" << std::endl;
+   bool testRan = false;
+   EXPECT_TRUE( testRan );
+   std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n";
+   std::cerr << "This test has not been implemented properly yet.\n" << std::endl;
 }
 
 template< typename Matrix >
 void test_SetDimensions()
 {
-    using RealType = typename Matrix::RealType;
-    using DeviceType = typename Matrix::DeviceType;
-    using IndexType = typename Matrix::IndexType;
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
 
-    const IndexType rows = 9;
-    const IndexType cols = 8;
+   const IndexType rows = 9;
+   const IndexType cols = 8;
 
-    Matrix m;
-    m.setDimensions( rows, cols );
+   Matrix m;
+   m.setDimensions( rows, cols );
 
-    EXPECT_EQ( m.getRows(), 9 );
-    EXPECT_EQ( m.getColumns(), 8 );
+   EXPECT_EQ( m.getRows(), 9 );
+   EXPECT_EQ( m.getColumns(), 8 );
 }
 
 template< typename Matrix >
 void test_SetCompressedRowLengths()
 {
-    using RealType = typename Matrix::RealType;
-    using DeviceType = typename Matrix::DeviceType;
-    using IndexType = typename Matrix::IndexType;
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
 
-    const IndexType rows = 10;
-    const IndexType cols = 11;
+   const IndexType rows = 10;
+   const IndexType cols = 11;
 
-    Matrix m;
-    m.reset();
-    m.setDimensions( rows, cols );
-    typename Matrix::CompressedRowLengthsVector rowLengths;
-    rowLengths.setSize( rows );
-    rowLengths.setValue( 3 );
+   Matrix m( rows, cols );
+   typename Matrix::CompressedRowLengthsVector rowLengths;
+   rowLengths.setSize( rows );
+   rowLengths = 3;
 
-    IndexType rowLength = 1;
-    for( IndexType i = 2; i < rows; i++ )
-        rowLengths.setElement( i, rowLength++ );
+   IndexType rowLength = 1;
+   for( IndexType i = 2; i < rows; i++ )
+      rowLengths.setElement( i, rowLength++ );
 
-    m.setCompressedRowLengths( rowLengths );
+   m.setCompressedRowLengths( rowLengths );
 
-    // Insert values into the rows.
-    RealType value = 1;
+   // Insert values into the rows.
+   RealType value = 1;
 
-    for( IndexType i = 0; i < 3; i++ )      // 0th row
-        m.setElement( 0, i, value++ );
+   for( IndexType i = 0; i < 3; i++ )      // 0th row
+      m.setElement( 0, i, value++ );
 
-    for( IndexType i = 0; i < 3; i++ )      // 1st row
-        m.setElement( 1, i, value++ );
+   for( IndexType i = 0; i < 3; i++ )      // 1st row
+      m.setElement( 1, i, value++ );
 
-    for( IndexType i = 0; i < 1; i++ )      // 2nd row
-        m.setElement( 2, i, value++ );
+   for( IndexType i = 0; i < 1; i++ )      // 2nd row
+      m.setElement( 2, i, value++ );
 
-    for( IndexType i = 0; i < 2; i++ )      // 3rd row
-        m.setElement( 3, i, value++ );
+   for( IndexType i = 0; i < 2; i++ )      // 3rd row
+      m.setElement( 3, i, value++ );
 
-    for( IndexType i = 0; i < 3; i++ )      // 4th row
-        m.setElement( 4, i, value++ );
+   for( IndexType i = 0; i < 3; i++ )      // 4th row
+      m.setElement( 4, i, value++ );
 
-    for( IndexType i = 0; i < 4; i++ )      // 5th row
-        m.setElement( 5, i, value++ );
+   for( IndexType i = 0; i < 4; i++ )      // 5th row
+      m.setElement( 5, i, value++ );
 
-    for( IndexType i = 0; i < 5; i++ )      // 6th row
-        m.setElement( 6, i, value++ );
+   for( IndexType i = 0; i < 5; i++ )      // 6th row
+      m.setElement( 6, i, value++ );
 
-    for( IndexType i = 0; i < 6; i++ )      // 7th row
-        m.setElement( 7, i, value++ );
+   for( IndexType i = 0; i < 6; i++ )      // 7th row
+      m.setElement( 7, i, value++ );
 
-    for( IndexType i = 0; i < 7; i++ )      // 8th row
-        m.setElement( 8, i, value++ );
+   for( IndexType i = 0; i < 7; i++ )      // 8th row
+      m.setElement( 8, i, value++ );
 
-    for( IndexType i = 0; i < 8; i++ )      // 9th row
-        m.setElement( 9, i, value++ );
+   for( IndexType i = 0; i < 8; i++ )      // 9th row
+      m.setElement( 9, i, value++ );
 
    rowLengths = 0;
    m.getCompressedRowLengths( rowLengths );
@@ -122,26 +116,20 @@ void test_SetCompressedRowLengths()
 template< typename Matrix1, typename Matrix2 >
 void test_SetLike()
 {
-    using RealType = typename Matrix1::RealType;
-    using DeviceType = typename Matrix1::DeviceType;
-    using IndexType = typename Matrix1::IndexType;
-
-    const IndexType rows = 8;
-    const IndexType cols = 7;
-
-    Matrix1 m1;
-    m1.reset();
-    m1.setDimensions( rows + 1, cols + 2 );
+   using RealType = typename Matrix1::RealType;
+   using DeviceType = typename Matrix1::DeviceType;
+   using IndexType = typename Matrix1::IndexType;
 
-    Matrix2 m2;
-    m2.reset();
-    m2.setDimensions( rows, cols );
+   const IndexType rows = 8;
+   const IndexType cols = 7;
 
-    m1.setLike( m2 );
+   Matrix1 m1( rows + 1, cols + 2 );
+   Matrix2 m2( rows, cols );
 
+   m1.setLike( m2 );
 
-    EXPECT_EQ( m1.getRows(), m2.getRows() );
-    EXPECT_EQ( m1.getColumns(), m2.getColumns() );
+   EXPECT_EQ( m1.getRows(), m2.getRows() );
+   EXPECT_EQ( m1.getColumns(), m2.getColumns() );
 }
 
 template< typename Matrix >
@@ -169,23 +157,9 @@ void test_GetNumberOfNonzeroMatrixElements()
    const IndexType rows = 10;
    const IndexType cols = 10;
 
-   Matrix m;
-   m.reset();
+   Matrix m( rows, cols );
 
-   m.setDimensions( rows, cols );
-
-   typename Matrix::CompressedRowLengthsVector rowLengths;
-   rowLengths.setSize( rows );
-   rowLengths.setElement( 0, 4 );
-   rowLengths.setElement( 1, 3 );
-   rowLengths.setElement( 2, 8 );
-   rowLengths.setElement( 3, 2 );
-   for( IndexType i = 4; i < rows - 2; i++ )
-   {
-      rowLengths.setElement( i, 1 );
-   }
-   rowLengths.setElement( 8, 10 );
-   rowLengths.setElement( 9, 10 );
+   typename Matrix::CompressedRowLengthsVector rowLengths{ 4, 3, 8, 2, 1, 1, 1, 1, 10, 10 };
    m.setCompressedRowLengths( rowLengths );
 
    RealType value = 1;
@@ -205,10 +179,8 @@ void test_GetNumberOfNonzeroMatrixElements()
       m.setElement( i, 0, value++ );
 
    for( IndexType j = 8; j < rows; j++)
-   {
       for( IndexType i = 0; i < cols; i++ )
          m.setElement( j, i, value++ );
-   }
 
    EXPECT_EQ( m.getNumberOfNonzeroMatrixElements(), 41 );
 }
@@ -216,81 +188,67 @@ void test_GetNumberOfNonzeroMatrixElements()
 template< typename Matrix >
 void test_Reset()
 {
-    using RealType = typename Matrix::RealType;
-    using DeviceType = typename Matrix::DeviceType;
-    using IndexType = typename Matrix::IndexType;
-
-/*
- * Sets up the following 5x4 sparse matrix:
- *
- *    /  0  0  0  0 \
- *    |  0  0  0  0 |
- *    |  0  0  0  0 |
- *    |  0  0  0  0 |
- *    \  0  0  0  0 /
- */
-
-    const IndexType rows = 5;
-    const IndexType cols = 4;
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
 
-    Matrix m;
-    m.setDimensions( rows, cols );
+   /*
+    * Sets up the following 5x4 sparse matrix:
+    *
+    *    /  0  0  0  0 \
+    *    |  0  0  0  0 |
+    *    |  0  0  0  0 |
+    *    |  0  0  0  0 |
+    *    \  0  0  0  0 /
+    */
 
-    m.reset();
+   const IndexType rows = 5;
+   const IndexType cols = 4;
 
+   Matrix m( rows, cols );
+   m.reset();
 
-    EXPECT_EQ( m.getRows(), 0 );
-    EXPECT_EQ( m.getColumns(), 0 );
+   EXPECT_EQ( m.getRows(), 0 );
+   EXPECT_EQ( m.getColumns(), 0 );
 }
 
 template< typename Matrix >
 void test_GetRow()
 {
-    using RealType = typename Matrix::RealType;
-    using DeviceType = typename Matrix::DeviceType;
-    using IndexType = typename Matrix::IndexType;
-
-/*
- * Sets up the following 10x10 sparse matrix:
- *
- *    /  1  0  2  0  3  0  4  0  0  0  \
- *    |  5  6  7  0  0  0  0  0  0  0  |
- *    |  8  9 10 11 12 13 14 15  0  0  |
- *    | 16 17  0  0  0  0  0  0  0  0  |
- *    | 18  0  0  0  0  0  0  0  0  0  |
- *    | 19  0  0  0  0  0  0  0  0  0  |
- *    | 20  0  0  0  0  0  0  0  0  0  |
- *    | 21  0  0  0  0  0  0  0  0  0  |
- *    | 22 23 24 25 26 27 28 29 30 31  |
- *    \ 32 33 34 35 36 37 38 39 40 41 /
- */
-
-    const IndexType rows = 10;
-    const IndexType cols = 10;
-
-    Matrix m( rows, cols );
-
-    typename Matrix::CompressedRowLengthsVector rowLengths;
-    rowLengths.setSize( rows );
-    rowLengths.setElement( 0, 4 );
-    rowLengths.setElement( 1, 3 );
-    rowLengths.setElement( 2, 8 );
-    rowLengths.setElement( 3, 2 );
-    for( IndexType i = 4; i < rows - 2; i++ )
-    {
-        rowLengths.setElement( i, 1 );
-    }
-    rowLengths.setElement( 8, 10 );
-    rowLengths.setElement( 9, 10 );
-    m.setCompressedRowLengths( rowLengths );
-
-    auto matrixView = m.getView();
-    auto f = [=] __cuda_callable__ ( const IndexType rowIdx ) mutable {
-       auto row = matrixView.getRow( rowIdx );
-       RealType val;
-       switch( rowIdx )
-       {
-          case 0:
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /*
+    * Sets up the following 10x10 sparse matrix:
+    *
+    *    /  1  0  2  0  3  0  4  0  0  0  \
+    *    |  5  6  7  0  0  0  0  0  0  0  |
+    *    |  8  9 10 11 12 13 14 15  0  0  |
+    *    | 16 17  0  0  0  0  0  0  0  0  |
+    *    | 18  0  0  0  0  0  0  0  0  0  |
+    *    | 19  0  0  0  0  0  0  0  0  0  |
+    *    | 20  0  0  0  0  0  0  0  0  0  |
+    *    | 21  0  0  0  0  0  0  0  0  0  |
+    *    | 22 23 24 25 26 27 28 29 30 31  |
+    *    \ 32 33 34 35 36 37 38 39 40 41 /
+    */
+
+   const IndexType rows = 10;
+   const IndexType cols = 10;
+
+   Matrix m( rows, cols );
+
+   typename Matrix::CompressedRowLengthsVector rowLengths{ 4, 3, 8, 2, 1, 1, 1, 1, 10, 10 };
+   m.setCompressedRowLengths( rowLengths );
+
+   auto matrixView = m.getView();
+   auto f = [=] __cuda_callable__ ( const IndexType rowIdx ) mutable {
+      auto row = matrixView.getRow( rowIdx );
+      RealType val;
+      switch( rowIdx )
+      {
+         case 0:
             val = 1;
             for( IndexType i = 0; i < 4; i++ )
                row.setElement( i, 2 * i, val++ );
@@ -323,716 +281,670 @@ void test_GetRow()
             row.setElement( 0, 0, 21 );
             break;
          case 8:
-             val = 22;
-             for( IndexType i = 0; i < rows; i++ )
-                row.setElement( i, i, val++ );
-             break;
+            val = 22;
+            for( IndexType i = 0; i < rows; i++ )
+               row.setElement( i, i, val++ );
+            break;
          case 9:
-             val = 32;
-             for( IndexType i = 0; i < rows; i++ )
-                row.setElement( i, i, val++ );
-             break;
-       }
-    };
-    TNL::Algorithms::ParallelFor< DeviceType >::exec( ( IndexType ) 0, rows, f );
-
-    EXPECT_EQ( m.getElement( 0, 0 ),  1 );
-    EXPECT_EQ( m.getElement( 0, 1 ),  0 );
-    EXPECT_EQ( m.getElement( 0, 2 ),  2 );
-    EXPECT_EQ( m.getElement( 0, 3 ),  0 );
-    EXPECT_EQ( m.getElement( 0, 4 ),  3 );
-    EXPECT_EQ( m.getElement( 0, 5 ),  0 );
-    EXPECT_EQ( m.getElement( 0, 6 ),  4 );
-    EXPECT_EQ( m.getElement( 0, 7 ),  0 );
-    EXPECT_EQ( m.getElement( 0, 8 ),  0 );
-    EXPECT_EQ( m.getElement( 0, 9 ),  0 );
-
-    EXPECT_EQ( m.getElement( 1, 0 ),  5 );
-    EXPECT_EQ( m.getElement( 1, 1 ),  6 );
-    EXPECT_EQ( m.getElement( 1, 2 ),  7 );
-    EXPECT_EQ( m.getElement( 1, 3 ),  0 );
-    EXPECT_EQ( m.getElement( 1, 4 ),  0 );
-    EXPECT_EQ( m.getElement( 1, 5 ),  0 );
-    EXPECT_EQ( m.getElement( 1, 6 ),  0 );
-    EXPECT_EQ( m.getElement( 1, 7 ),  0 );
-    EXPECT_EQ( m.getElement( 1, 8 ),  0 );
-    EXPECT_EQ( m.getElement( 1, 9 ),  0 );
-
-    EXPECT_EQ( m.getElement( 2, 0 ),  8 );
-    EXPECT_EQ( m.getElement( 2, 1 ),  9 );
-    EXPECT_EQ( m.getElement( 2, 2 ), 10 );
-    EXPECT_EQ( m.getElement( 2, 3 ), 11 );
-    EXPECT_EQ( m.getElement( 2, 4 ), 12 );
-    EXPECT_EQ( m.getElement( 2, 5 ), 13 );
-    EXPECT_EQ( m.getElement( 2, 6 ), 14 );
-    EXPECT_EQ( m.getElement( 2, 7 ), 15 );
-    EXPECT_EQ( m.getElement( 2, 8 ),  0 );
-    EXPECT_EQ( m.getElement( 2, 9 ),  0 );
-
-    EXPECT_EQ( m.getElement( 3, 0 ), 16 );
-    EXPECT_EQ( m.getElement( 3, 1 ), 17 );
-    EXPECT_EQ( m.getElement( 3, 2 ),  0 );
-    EXPECT_EQ( m.getElement( 3, 3 ),  0 );
-    EXPECT_EQ( m.getElement( 3, 4 ),  0 );
-    EXPECT_EQ( m.getElement( 3, 5 ),  0 );
-    EXPECT_EQ( m.getElement( 3, 6 ),  0 );
-    EXPECT_EQ( m.getElement( 3, 7 ),  0 );
-    EXPECT_EQ( m.getElement( 3, 8 ),  0 );
-    EXPECT_EQ( m.getElement( 3, 9 ),  0 );
-
-    EXPECT_EQ( m.getElement( 4, 0 ), 18 );
-    EXPECT_EQ( m.getElement( 4, 1 ),  0 );
-    EXPECT_EQ( m.getElement( 4, 2 ),  0 );
-    EXPECT_EQ( m.getElement( 4, 3 ),  0 );
-    EXPECT_EQ( m.getElement( 4, 4 ),  0 );
-    EXPECT_EQ( m.getElement( 4, 5 ),  0 );
-    EXPECT_EQ( m.getElement( 4, 6 ),  0 );
-    EXPECT_EQ( m.getElement( 4, 7 ),  0 );
-    EXPECT_EQ( m.getElement( 4, 8 ),  0 );
-    EXPECT_EQ( m.getElement( 4, 9 ),  0 );
-
-    EXPECT_EQ( m.getElement( 5, 0 ), 19 );
-    EXPECT_EQ( m.getElement( 5, 1 ),  0 );
-    EXPECT_EQ( m.getElement( 5, 2 ),  0 );
-    EXPECT_EQ( m.getElement( 5, 3 ),  0 );
-    EXPECT_EQ( m.getElement( 5, 4 ),  0 );
-    EXPECT_EQ( m.getElement( 5, 5 ),  0 );
-    EXPECT_EQ( m.getElement( 5, 6 ),  0 );
-    EXPECT_EQ( m.getElement( 5, 7 ),  0 );
-    EXPECT_EQ( m.getElement( 5, 8 ),  0 );
-    EXPECT_EQ( m.getElement( 5, 9 ),  0 );
-
-    EXPECT_EQ( m.getElement( 6, 0 ), 20 );
-    EXPECT_EQ( m.getElement( 6, 1 ),  0 );
-    EXPECT_EQ( m.getElement( 6, 2 ),  0 );
-    EXPECT_EQ( m.getElement( 6, 3 ),  0 );
-    EXPECT_EQ( m.getElement( 6, 4 ),  0 );
-    EXPECT_EQ( m.getElement( 6, 5 ),  0 );
-    EXPECT_EQ( m.getElement( 6, 6 ),  0 );
-    EXPECT_EQ( m.getElement( 6, 7 ),  0 );
-    EXPECT_EQ( m.getElement( 6, 8 ),  0 );
-    EXPECT_EQ( m.getElement( 6, 9 ),  0 );
-
-    EXPECT_EQ( m.getElement( 7, 0 ), 21 );
-    EXPECT_EQ( m.getElement( 7, 1 ),  0 );
-    EXPECT_EQ( m.getElement( 7, 2 ),  0 );
-    EXPECT_EQ( m.getElement( 7, 3 ),  0 );
-    EXPECT_EQ( m.getElement( 7, 4 ),  0 );
-    EXPECT_EQ( m.getElement( 7, 5 ),  0 );
-    EXPECT_EQ( m.getElement( 7, 6 ),  0 );
-    EXPECT_EQ( m.getElement( 7, 7 ),  0 );
-    EXPECT_EQ( m.getElement( 7, 8 ),  0 );
-    EXPECT_EQ( m.getElement( 7, 9 ),  0 );
-
-    EXPECT_EQ( m.getElement( 8, 0 ), 22 );
-    EXPECT_EQ( m.getElement( 8, 1 ), 23 );
-    EXPECT_EQ( m.getElement( 8, 2 ), 24 );
-    EXPECT_EQ( m.getElement( 8, 3 ), 25 );
-    EXPECT_EQ( m.getElement( 8, 4 ), 26 );
-    EXPECT_EQ( m.getElement( 8, 5 ), 27 );
-    EXPECT_EQ( m.getElement( 8, 6 ), 28 );
-    EXPECT_EQ( m.getElement( 8, 7 ), 29 );
-    EXPECT_EQ( m.getElement( 8, 8 ), 30 );
-    EXPECT_EQ( m.getElement( 8, 9 ), 31 );
-
-    EXPECT_EQ( m.getElement( 9, 0 ), 32 );
-    EXPECT_EQ( m.getElement( 9, 1 ), 33 );
-    EXPECT_EQ( m.getElement( 9, 2 ), 34 );
-    EXPECT_EQ( m.getElement( 9, 3 ), 35 );
-    EXPECT_EQ( m.getElement( 9, 4 ), 36 );
-    EXPECT_EQ( m.getElement( 9, 5 ), 37 );
-    EXPECT_EQ( m.getElement( 9, 6 ), 38 );
-    EXPECT_EQ( m.getElement( 9, 7 ), 39 );
-    EXPECT_EQ( m.getElement( 9, 8 ), 40 );
-    EXPECT_EQ( m.getElement( 9, 9 ), 41 );
+            val = 32;
+            for( IndexType i = 0; i < rows; i++ )
+               row.setElement( i, i, val++ );
+            break;
+      }
+   };
+   TNL::Algorithms::ParallelFor< DeviceType >::exec( ( IndexType ) 0, rows, f );
+
+   EXPECT_EQ( m.getElement( 0, 0 ),  1 );
+   EXPECT_EQ( m.getElement( 0, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 2 ),  2 );
+   EXPECT_EQ( m.getElement( 0, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 4 ),  3 );
+   EXPECT_EQ( m.getElement( 0, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 6 ),  4 );
+   EXPECT_EQ( m.getElement( 0, 7 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 8 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 9 ),  0 );
+
+   EXPECT_EQ( m.getElement( 1, 0 ),  5 );
+   EXPECT_EQ( m.getElement( 1, 1 ),  6 );
+   EXPECT_EQ( m.getElement( 1, 2 ),  7 );
+   EXPECT_EQ( m.getElement( 1, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 6 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 7 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 8 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 9 ),  0 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ),  8 );
+   EXPECT_EQ( m.getElement( 2, 1 ),  9 );
+   EXPECT_EQ( m.getElement( 2, 2 ), 10 );
+   EXPECT_EQ( m.getElement( 2, 3 ), 11 );
+   EXPECT_EQ( m.getElement( 2, 4 ), 12 );
+   EXPECT_EQ( m.getElement( 2, 5 ), 13 );
+   EXPECT_EQ( m.getElement( 2, 6 ), 14 );
+   EXPECT_EQ( m.getElement( 2, 7 ), 15 );
+   EXPECT_EQ( m.getElement( 2, 8 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 9 ),  0 );
+
+   EXPECT_EQ( m.getElement( 3, 0 ), 16 );
+   EXPECT_EQ( m.getElement( 3, 1 ), 17 );
+   EXPECT_EQ( m.getElement( 3, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 6 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 7 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 8 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 9 ),  0 );
+
+   EXPECT_EQ( m.getElement( 4, 0 ), 18 );
+   EXPECT_EQ( m.getElement( 4, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 6 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 7 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 8 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 9 ),  0 );
+
+   EXPECT_EQ( m.getElement( 5, 0 ), 19 );
+   EXPECT_EQ( m.getElement( 5, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 6 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 7 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 8 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 9 ),  0 );
+
+   EXPECT_EQ( m.getElement( 6, 0 ), 20 );
+   EXPECT_EQ( m.getElement( 6, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 6 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 7 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 8 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 9 ),  0 );
+
+   EXPECT_EQ( m.getElement( 7, 0 ), 21 );
+   EXPECT_EQ( m.getElement( 7, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 6 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 7 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 8 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 9 ),  0 );
+
+   EXPECT_EQ( m.getElement( 8, 0 ), 22 );
+   EXPECT_EQ( m.getElement( 8, 1 ), 23 );
+   EXPECT_EQ( m.getElement( 8, 2 ), 24 );
+   EXPECT_EQ( m.getElement( 8, 3 ), 25 );
+   EXPECT_EQ( m.getElement( 8, 4 ), 26 );
+   EXPECT_EQ( m.getElement( 8, 5 ), 27 );
+   EXPECT_EQ( m.getElement( 8, 6 ), 28 );
+   EXPECT_EQ( m.getElement( 8, 7 ), 29 );
+   EXPECT_EQ( m.getElement( 8, 8 ), 30 );
+   EXPECT_EQ( m.getElement( 8, 9 ), 31 );
+
+   EXPECT_EQ( m.getElement( 9, 0 ), 32 );
+   EXPECT_EQ( m.getElement( 9, 1 ), 33 );
+   EXPECT_EQ( m.getElement( 9, 2 ), 34 );
+   EXPECT_EQ( m.getElement( 9, 3 ), 35 );
+   EXPECT_EQ( m.getElement( 9, 4 ), 36 );
+   EXPECT_EQ( m.getElement( 9, 5 ), 37 );
+   EXPECT_EQ( m.getElement( 9, 6 ), 38 );
+   EXPECT_EQ( m.getElement( 9, 7 ), 39 );
+   EXPECT_EQ( m.getElement( 9, 8 ), 40 );
+   EXPECT_EQ( m.getElement( 9, 9 ), 41 );
 }
 
 
 template< typename Matrix >
 void test_SetElement()
 {
-    using RealType = typename Matrix::RealType;
-    using DeviceType = typename Matrix::DeviceType;
-    using IndexType = typename Matrix::IndexType;
-
-/*
- * Sets up the following 10x10 sparse matrix:
- *
- *    /  1  0  2  0  3  0  4  0  0  0  \
- *    |  5  6  7  0  0  0  0  0  0  0  |
- *    |  8  9 10 11 12 13 14 15  0  0  |
- *    | 16 17  0  0  0  0  0  0  0  0  |
- *    | 18  0  0  0  0  0  0  0  0  0  |
- *    | 19  0  0  0  0  0  0  0  0  0  |
- *    | 20  0  0  0  0  0  0  0  0  0  |
- *    | 21  0  0  0  0  0  0  0  0  0  |
- *    | 22 23 24 25 26 27 28 29 30 31  |
- *    \ 32 33 34 35 36 37 38 39 40 41 /
- */
-
-    const IndexType rows = 10;
-    const IndexType cols = 10;
-
-    Matrix m;
-    m.reset();
-
-    m.setDimensions( rows, cols );
-
-    typename Matrix::CompressedRowLengthsVector rowLengths;
-    rowLengths.setSize( rows );
-    rowLengths.setElement( 0, 4 );
-    rowLengths.setElement( 1, 3 );
-    rowLengths.setElement( 2, 8 );
-    rowLengths.setElement( 3, 2 );
-    for( IndexType i = 4; i < rows - 2; i++ )
-    {
-        rowLengths.setElement( i, 1 );
-    }
-    rowLengths.setElement( 8, 10 );
-    rowLengths.setElement( 9, 10 );
-    m.setCompressedRowLengths( rowLengths );
-
-    RealType value = 1;
-    for( IndexType i = 0; i < 4; i++ )
-        m.setElement( 0, 2 * i, value++ );
-
-    for( IndexType i = 0; i < 3; i++ )
-        m.setElement( 1, i, value++ );
-
-    for( IndexType i = 0; i < 8; i++ )
-        m.setElement( 2, i, value++ );
-
-    for( IndexType i = 0; i < 2; i++ )
-        m.setElement( 3, i, value++ );
-
-    for( IndexType i = 4; i < 8; i++ )
-        m.setElement( i, 0, value++ );
-
-    for( IndexType j = 8; j < rows; j++)
-    {
-        for( IndexType i = 0; i < cols; i++ )
-            m.setElement( j, i, value++ );
-    }
-
-    EXPECT_EQ( m.getElement( 0, 0 ),  1 );
-    EXPECT_EQ( m.getElement( 0, 1 ),  0 );
-    EXPECT_EQ( m.getElement( 0, 2 ),  2 );
-    EXPECT_EQ( m.getElement( 0, 3 ),  0 );
-    EXPECT_EQ( m.getElement( 0, 4 ),  3 );
-    EXPECT_EQ( m.getElement( 0, 5 ),  0 );
-    EXPECT_EQ( m.getElement( 0, 6 ),  4 );
-    EXPECT_EQ( m.getElement( 0, 7 ),  0 );
-    EXPECT_EQ( m.getElement( 0, 8 ),  0 );
-    EXPECT_EQ( m.getElement( 0, 9 ),  0 );
-
-    EXPECT_EQ( m.getElement( 1, 0 ),  5 );
-    EXPECT_EQ( m.getElement( 1, 1 ),  6 );
-    EXPECT_EQ( m.getElement( 1, 2 ),  7 );
-    EXPECT_EQ( m.getElement( 1, 3 ),  0 );
-    EXPECT_EQ( m.getElement( 1, 4 ),  0 );
-    EXPECT_EQ( m.getElement( 1, 5 ),  0 );
-    EXPECT_EQ( m.getElement( 1, 6 ),  0 );
-    EXPECT_EQ( m.getElement( 1, 7 ),  0 );
-    EXPECT_EQ( m.getElement( 1, 8 ),  0 );
-    EXPECT_EQ( m.getElement( 1, 9 ),  0 );
-
-    EXPECT_EQ( m.getElement( 2, 0 ),  8 );
-    EXPECT_EQ( m.getElement( 2, 1 ),  9 );
-    EXPECT_EQ( m.getElement( 2, 2 ), 10 );
-    EXPECT_EQ( m.getElement( 2, 3 ), 11 );
-    EXPECT_EQ( m.getElement( 2, 4 ), 12 );
-    EXPECT_EQ( m.getElement( 2, 5 ), 13 );
-    EXPECT_EQ( m.getElement( 2, 6 ), 14 );
-    EXPECT_EQ( m.getElement( 2, 7 ), 15 );
-    EXPECT_EQ( m.getElement( 2, 8 ),  0 );
-    EXPECT_EQ( m.getElement( 2, 9 ),  0 );
-
-    EXPECT_EQ( m.getElement( 3, 0 ), 16 );
-    EXPECT_EQ( m.getElement( 3, 1 ), 17 );
-    EXPECT_EQ( m.getElement( 3, 2 ),  0 );
-    EXPECT_EQ( m.getElement( 3, 3 ),  0 );
-    EXPECT_EQ( m.getElement( 3, 4 ),  0 );
-    EXPECT_EQ( m.getElement( 3, 5 ),  0 );
-    EXPECT_EQ( m.getElement( 3, 6 ),  0 );
-    EXPECT_EQ( m.getElement( 3, 7 ),  0 );
-    EXPECT_EQ( m.getElement( 3, 8 ),  0 );
-    EXPECT_EQ( m.getElement( 3, 9 ),  0 );
-
-    EXPECT_EQ( m.getElement( 4, 0 ), 18 );
-    EXPECT_EQ( m.getElement( 4, 1 ),  0 );
-    EXPECT_EQ( m.getElement( 4, 2 ),  0 );
-    EXPECT_EQ( m.getElement( 4, 3 ),  0 );
-    EXPECT_EQ( m.getElement( 4, 4 ),  0 );
-    EXPECT_EQ( m.getElement( 4, 5 ),  0 );
-    EXPECT_EQ( m.getElement( 4, 6 ),  0 );
-    EXPECT_EQ( m.getElement( 4, 7 ),  0 );
-    EXPECT_EQ( m.getElement( 4, 8 ),  0 );
-    EXPECT_EQ( m.getElement( 4, 9 ),  0 );
-
-    EXPECT_EQ( m.getElement( 5, 0 ), 19 );
-    EXPECT_EQ( m.getElement( 5, 1 ),  0 );
-    EXPECT_EQ( m.getElement( 5, 2 ),  0 );
-    EXPECT_EQ( m.getElement( 5, 3 ),  0 );
-    EXPECT_EQ( m.getElement( 5, 4 ),  0 );
-    EXPECT_EQ( m.getElement( 5, 5 ),  0 );
-    EXPECT_EQ( m.getElement( 5, 6 ),  0 );
-    EXPECT_EQ( m.getElement( 5, 7 ),  0 );
-    EXPECT_EQ( m.getElement( 5, 8 ),  0 );
-    EXPECT_EQ( m.getElement( 5, 9 ),  0 );
-
-    EXPECT_EQ( m.getElement( 6, 0 ), 20 );
-    EXPECT_EQ( m.getElement( 6, 1 ),  0 );
-    EXPECT_EQ( m.getElement( 6, 2 ),  0 );
-    EXPECT_EQ( m.getElement( 6, 3 ),  0 );
-    EXPECT_EQ( m.getElement( 6, 4 ),  0 );
-    EXPECT_EQ( m.getElement( 6, 5 ),  0 );
-    EXPECT_EQ( m.getElement( 6, 6 ),  0 );
-    EXPECT_EQ( m.getElement( 6, 7 ),  0 );
-    EXPECT_EQ( m.getElement( 6, 8 ),  0 );
-    EXPECT_EQ( m.getElement( 6, 9 ),  0 );
-
-    EXPECT_EQ( m.getElement( 7, 0 ), 21 );
-    EXPECT_EQ( m.getElement( 7, 1 ),  0 );
-    EXPECT_EQ( m.getElement( 7, 2 ),  0 );
-    EXPECT_EQ( m.getElement( 7, 3 ),  0 );
-    EXPECT_EQ( m.getElement( 7, 4 ),  0 );
-    EXPECT_EQ( m.getElement( 7, 5 ),  0 );
-    EXPECT_EQ( m.getElement( 7, 6 ),  0 );
-    EXPECT_EQ( m.getElement( 7, 7 ),  0 );
-    EXPECT_EQ( m.getElement( 7, 8 ),  0 );
-    EXPECT_EQ( m.getElement( 7, 9 ),  0 );
-
-    EXPECT_EQ( m.getElement( 8, 0 ), 22 );
-    EXPECT_EQ( m.getElement( 8, 1 ), 23 );
-    EXPECT_EQ( m.getElement( 8, 2 ), 24 );
-    EXPECT_EQ( m.getElement( 8, 3 ), 25 );
-    EXPECT_EQ( m.getElement( 8, 4 ), 26 );
-    EXPECT_EQ( m.getElement( 8, 5 ), 27 );
-    EXPECT_EQ( m.getElement( 8, 6 ), 28 );
-    EXPECT_EQ( m.getElement( 8, 7 ), 29 );
-    EXPECT_EQ( m.getElement( 8, 8 ), 30 );
-    EXPECT_EQ( m.getElement( 8, 9 ), 31 );
-
-    EXPECT_EQ( m.getElement( 9, 0 ), 32 );
-    EXPECT_EQ( m.getElement( 9, 1 ), 33 );
-    EXPECT_EQ( m.getElement( 9, 2 ), 34 );
-    EXPECT_EQ( m.getElement( 9, 3 ), 35 );
-    EXPECT_EQ( m.getElement( 9, 4 ), 36 );
-    EXPECT_EQ( m.getElement( 9, 5 ), 37 );
-    EXPECT_EQ( m.getElement( 9, 6 ), 38 );
-    EXPECT_EQ( m.getElement( 9, 7 ), 39 );
-    EXPECT_EQ( m.getElement( 9, 8 ), 40 );
-    EXPECT_EQ( m.getElement( 9, 9 ), 41 );
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /*
+    * Sets up the following 10x10 sparse matrix:
+    *
+    *    /  1  0  2  0  3  0  4  0  0  0  \
+    *    |  5  6  7  0  0  0  0  0  0  0  |
+    *    |  8  9 10 11 12 13 14 15  0  0  |
+    *    | 16 17  0  0  0  0  0  0  0  0  |
+    *    | 18  0  0  0  0  0  0  0  0  0  |
+    *    | 19  0  0  0  0  0  0  0  0  0  |
+    *    | 20  0  0  0  0  0  0  0  0  0  |
+    *    | 21  0  0  0  0  0  0  0  0  0  |
+    *    | 22 23 24 25 26 27 28 29 30 31  |
+    *    \ 32 33 34 35 36 37 38 39 40 41 /
+    */
+
+   const IndexType rows = 10;
+   const IndexType cols = 10;
+
+   Matrix m;
+   m.reset();
+
+   m.setDimensions( rows, cols );
+
+   typename Matrix::CompressedRowLengthsVector rowLengths { 4, 3, 8, 2, 1, 1, 1, 1, 10, 10 };
+   m.setCompressedRowLengths( rowLengths );
+
+   RealType value = 1;
+   for( IndexType i = 0; i < 4; i++ )
+      m.setElement( 0, 2 * i, value++ );
+
+   for( IndexType i = 0; i < 3; i++ )
+      m.setElement( 1, i, value++ );
+
+   for( IndexType i = 0; i < 8; i++ )
+      m.setElement( 2, i, value++ );
+
+   for( IndexType i = 0; i < 2; i++ )
+      m.setElement( 3, i, value++ );
+
+   for( IndexType i = 4; i < 8; i++ )
+      m.setElement( i, 0, value++ );
+
+   for( IndexType j = 8; j < rows; j++)
+      for( IndexType i = 0; i < cols; i++ )
+         m.setElement( j, i, value++ );
+
+   EXPECT_EQ( m.getElement( 0, 0 ),  1 );
+   EXPECT_EQ( m.getElement( 0, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 2 ),  2 );
+   EXPECT_EQ( m.getElement( 0, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 4 ),  3 );
+   EXPECT_EQ( m.getElement( 0, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 6 ),  4 );
+   EXPECT_EQ( m.getElement( 0, 7 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 8 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 9 ),  0 );
+
+   EXPECT_EQ( m.getElement( 1, 0 ),  5 );
+   EXPECT_EQ( m.getElement( 1, 1 ),  6 );
+   EXPECT_EQ( m.getElement( 1, 2 ),  7 );
+   EXPECT_EQ( m.getElement( 1, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 6 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 7 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 8 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 9 ),  0 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ),  8 );
+   EXPECT_EQ( m.getElement( 2, 1 ),  9 );
+   EXPECT_EQ( m.getElement( 2, 2 ), 10 );
+   EXPECT_EQ( m.getElement( 2, 3 ), 11 );
+   EXPECT_EQ( m.getElement( 2, 4 ), 12 );
+   EXPECT_EQ( m.getElement( 2, 5 ), 13 );
+   EXPECT_EQ( m.getElement( 2, 6 ), 14 );
+   EXPECT_EQ( m.getElement( 2, 7 ), 15 );
+   EXPECT_EQ( m.getElement( 2, 8 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 9 ),  0 );
+
+   EXPECT_EQ( m.getElement( 3, 0 ), 16 );
+   EXPECT_EQ( m.getElement( 3, 1 ), 17 );
+   EXPECT_EQ( m.getElement( 3, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 6 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 7 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 8 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 9 ),  0 );
+
+   EXPECT_EQ( m.getElement( 4, 0 ), 18 );
+   EXPECT_EQ( m.getElement( 4, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 6 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 7 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 8 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 9 ),  0 );
+
+   EXPECT_EQ( m.getElement( 5, 0 ), 19 );
+   EXPECT_EQ( m.getElement( 5, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 6 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 7 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 8 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 9 ),  0 );
+
+   EXPECT_EQ( m.getElement( 6, 0 ), 20 );
+   EXPECT_EQ( m.getElement( 6, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 6 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 7 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 8 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 9 ),  0 );
+
+   EXPECT_EQ( m.getElement( 7, 0 ), 21 );
+   EXPECT_EQ( m.getElement( 7, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 6 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 7 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 8 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 9 ),  0 );
+
+   EXPECT_EQ( m.getElement( 8, 0 ), 22 );
+   EXPECT_EQ( m.getElement( 8, 1 ), 23 );
+   EXPECT_EQ( m.getElement( 8, 2 ), 24 );
+   EXPECT_EQ( m.getElement( 8, 3 ), 25 );
+   EXPECT_EQ( m.getElement( 8, 4 ), 26 );
+   EXPECT_EQ( m.getElement( 8, 5 ), 27 );
+   EXPECT_EQ( m.getElement( 8, 6 ), 28 );
+   EXPECT_EQ( m.getElement( 8, 7 ), 29 );
+   EXPECT_EQ( m.getElement( 8, 8 ), 30 );
+   EXPECT_EQ( m.getElement( 8, 9 ), 31 );
+
+   EXPECT_EQ( m.getElement( 9, 0 ), 32 );
+   EXPECT_EQ( m.getElement( 9, 1 ), 33 );
+   EXPECT_EQ( m.getElement( 9, 2 ), 34 );
+   EXPECT_EQ( m.getElement( 9, 3 ), 35 );
+   EXPECT_EQ( m.getElement( 9, 4 ), 36 );
+   EXPECT_EQ( m.getElement( 9, 5 ), 37 );
+   EXPECT_EQ( m.getElement( 9, 6 ), 38 );
+   EXPECT_EQ( m.getElement( 9, 7 ), 39 );
+   EXPECT_EQ( m.getElement( 9, 8 ), 40 );
+   EXPECT_EQ( m.getElement( 9, 9 ), 41 );
 }
 
 template< typename Matrix >
 void test_AddElement()
 {
-    using RealType = typename Matrix::RealType;
-    using DeviceType = typename Matrix::DeviceType;
-    using IndexType = typename Matrix::IndexType;
-
-/*
- * Sets up the following 6x5 sparse matrix:
- *
- *    /  1  2  3  0  0 \
- *    |  0  4  5  6  0 |
- *    |  0  0  7  8  9 |
- *    | 10  0  0  0  0 |
- *    |  0 11  0  0  0 |
- *    \  0  0  0 12  0 /
- */
-
-    const IndexType rows = 6;
-    const IndexType cols = 5;
-
-    Matrix m;
-    m.reset();
-    m.setDimensions( rows, cols );
-    typename Matrix::CompressedRowLengthsVector rowLengths;
-    rowLengths.setSize( rows );
-    rowLengths.setValue( 3 );
-    m.setCompressedRowLengths( rowLengths );
-
-    RealType value = 1;
-    for( IndexType i = 0; i < cols - 2; i++ )     // 0th row
-        m.setElement( 0, i, value++ );
-
-    for( IndexType i = 1; i < cols - 1; i++ )     // 1st row
-        m.setElement( 1, i, value++ );
-
-    for( IndexType i = 2; i < cols; i++ )         // 2nd row
-        m.setElement( 2, i, value++ );
-
-    m.setElement( 3, 0, value++ );      // 3rd row
-
-    m.setElement( 4, 1, value++ );      // 4th row
-
-    m.setElement( 5, 3, value++ );      // 5th row
-
-
-    // Check the set elements
-    EXPECT_EQ( m.getElement( 0, 0 ),  1 );
-    EXPECT_EQ( m.getElement( 0, 1 ),  2 );
-    EXPECT_EQ( m.getElement( 0, 2 ),  3 );
-    EXPECT_EQ( m.getElement( 0, 3 ),  0 );
-    EXPECT_EQ( m.getElement( 0, 4 ),  0 );
-
-    EXPECT_EQ( m.getElement( 1, 0 ),  0 );
-    EXPECT_EQ( m.getElement( 1, 1 ),  4 );
-    EXPECT_EQ( m.getElement( 1, 2 ),  5 );
-    EXPECT_EQ( m.getElement( 1, 3 ),  6 );
-    EXPECT_EQ( m.getElement( 1, 4 ),  0 );
-
-    EXPECT_EQ( m.getElement( 2, 0 ),  0 );
-    EXPECT_EQ( m.getElement( 2, 1 ),  0 );
-    EXPECT_EQ( m.getElement( 2, 2 ),  7 );
-    EXPECT_EQ( m.getElement( 2, 3 ),  8 );
-    EXPECT_EQ( m.getElement( 2, 4 ),  9 );
-
-    EXPECT_EQ( m.getElement( 3, 0 ), 10 );
-    EXPECT_EQ( m.getElement( 3, 1 ),  0 );
-    EXPECT_EQ( m.getElement( 3, 2 ),  0 );
-    EXPECT_EQ( m.getElement( 3, 3 ),  0 );
-    EXPECT_EQ( m.getElement( 3, 4 ),  0 );
-
-    EXPECT_EQ( m.getElement( 4, 0 ),  0 );
-    EXPECT_EQ( m.getElement( 4, 1 ), 11 );
-    EXPECT_EQ( m.getElement( 4, 2 ),  0 );
-    EXPECT_EQ( m.getElement( 4, 3 ),  0 );
-    EXPECT_EQ( m.getElement( 4, 4 ),  0 );
-
-    EXPECT_EQ( m.getElement( 5, 0 ),  0 );
-    EXPECT_EQ( m.getElement( 5, 1 ),  0 );
-    EXPECT_EQ( m.getElement( 5, 2 ),  0 );
-    EXPECT_EQ( m.getElement( 5, 3 ), 12 );
-    EXPECT_EQ( m.getElement( 5, 4 ),  0 );
-
-    // Add new elements to the old elements with a multiplying factor applied to the old elements.
-
-/*
- * Sets up the following 6x5 sparse matrix:
- *
- *    /  1  2  3  0  0 \
- *    |  0  4  5  6  0 |
- *    |  0  0  7  8  9 |
- *    | 10  0  0  0  0 |
- *    |  0 11  0  0  0 |
- *    \  0  0  0 12  0 /
- */
-
-/*
- * The following setup results in the following 6x5 sparse matrix:
- *
- *    /  3  6  9  0  0 \
- *    |  0 12 15 18  0 |
- *    |  0  0 21 24 27 |
- *    | 30 11 12  0  0 |
- *    |  0 35 14 15  0 |
- *    \  0  0 16 41 18 /
- */
-
-    RealType newValue = 1;
-    for( IndexType i = 0; i < cols - 2; i++ )         // 0th row
-        m.addElement( 0, i, newValue++, 2.0 );
-
-    for( IndexType i = 1; i < cols - 1; i++ )         // 1st row
-        m.addElement( 1, i, newValue++, 2.0 );
-
-    for( IndexType i = 2; i < cols; i++ )             // 2nd row
-        m.addElement( 2, i, newValue++, 2.0 );
-
-    for( IndexType i = 0; i < cols - 2; i++ )         // 3rd row
-        m.addElement( 3, i, newValue++, 2.0 );
-
-    for( IndexType i = 1; i < cols - 1; i++ )         // 4th row
-        m.addElement( 4, i, newValue++, 2.0 );
-
-    for( IndexType i = 2; i < cols; i++ )             // 5th row
-        m.addElement( 5, i, newValue++, 2.0 );
-
-
-    EXPECT_EQ( m.getElement( 0, 0 ),  3 );
-    EXPECT_EQ( m.getElement( 0, 1 ),  6 );
-    EXPECT_EQ( m.getElement( 0, 2 ),  9 );
-    EXPECT_EQ( m.getElement( 0, 3 ),  0 );
-    EXPECT_EQ( m.getElement( 0, 4 ),  0 );
-
-    EXPECT_EQ( m.getElement( 1, 0 ),  0 );
-    EXPECT_EQ( m.getElement( 1, 1 ), 12 );
-    EXPECT_EQ( m.getElement( 1, 2 ), 15 );
-    EXPECT_EQ( m.getElement( 1, 3 ), 18 );
-    EXPECT_EQ( m.getElement( 1, 4 ),  0 );
-
-    EXPECT_EQ( m.getElement( 2, 0 ),  0 );
-    EXPECT_EQ( m.getElement( 2, 1 ),  0 );
-    EXPECT_EQ( m.getElement( 2, 2 ), 21 );
-    EXPECT_EQ( m.getElement( 2, 3 ), 24 );
-    EXPECT_EQ( m.getElement( 2, 4 ), 27 );
-
-    EXPECT_EQ( m.getElement( 3, 0 ), 30 );
-    EXPECT_EQ( m.getElement( 3, 1 ), 11 );
-    EXPECT_EQ( m.getElement( 3, 2 ), 12 );
-    EXPECT_EQ( m.getElement( 3, 3 ),  0 );
-    EXPECT_EQ( m.getElement( 3, 4 ),  0 );
-
-    EXPECT_EQ( m.getElement( 4, 0 ),  0 );
-    EXPECT_EQ( m.getElement( 4, 1 ), 35 );
-    EXPECT_EQ( m.getElement( 4, 2 ), 14 );
-    EXPECT_EQ( m.getElement( 4, 3 ), 15 );
-    EXPECT_EQ( m.getElement( 4, 4 ),  0 );
-
-    EXPECT_EQ( m.getElement( 5, 0 ),  0 );
-    EXPECT_EQ( m.getElement( 5, 1 ),  0 );
-    EXPECT_EQ( m.getElement( 5, 2 ), 16 );
-    EXPECT_EQ( m.getElement( 5, 3 ), 41 );
-    EXPECT_EQ( m.getElement( 5, 4 ), 18 );
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /*
+    * Sets up the following 6x5 sparse matrix:
+    *
+    *    /  1  2  3  0  0 \
+    *    |  0  4  5  6  0 |
+    *    |  0  0  7  8  9 |
+    *    | 10  0  0  0  0 |
+    *    |  0 11  0  0  0 |
+    *    \  0  0  0 12  0 /
+    */
+
+   const IndexType rows = 6;
+   const IndexType cols = 5;
+
+   Matrix m( rows, cols );
+   typename Matrix::CompressedRowLengthsVector rowLengths( rows );
+   rowLengths = 3;
+   m.setCompressedRowLengths( rowLengths );
+
+   RealType value = 1;
+   for( IndexType i = 0; i < cols - 2; i++ )     // 0th row
+      m.setElement( 0, i, value++ );
+
+   for( IndexType i = 1; i < cols - 1; i++ )     // 1st row
+      m.setElement( 1, i, value++ );
+
+   for( IndexType i = 2; i < cols; i++ )         // 2nd row
+      m.setElement( 2, i, value++ );
+
+   m.setElement( 3, 0, value++ );      // 3rd row
+
+   m.setElement( 4, 1, value++ );      // 4th row
+
+   m.setElement( 5, 3, value++ );      // 5th row
+
+
+   // Check the set elements
+   EXPECT_EQ( m.getElement( 0, 0 ),  1 );
+   EXPECT_EQ( m.getElement( 0, 1 ),  2 );
+   EXPECT_EQ( m.getElement( 0, 2 ),  3 );
+   EXPECT_EQ( m.getElement( 0, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 4 ),  0 );
+
+   EXPECT_EQ( m.getElement( 1, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 1 ),  4 );
+   EXPECT_EQ( m.getElement( 1, 2 ),  5 );
+   EXPECT_EQ( m.getElement( 1, 3 ),  6 );
+   EXPECT_EQ( m.getElement( 1, 4 ),  0 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 2 ),  7 );
+   EXPECT_EQ( m.getElement( 2, 3 ),  8 );
+   EXPECT_EQ( m.getElement( 2, 4 ),  9 );
+
+   EXPECT_EQ( m.getElement( 3, 0 ), 10 );
+   EXPECT_EQ( m.getElement( 3, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 4 ),  0 );
+
+   EXPECT_EQ( m.getElement( 4, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 1 ), 11 );
+   EXPECT_EQ( m.getElement( 4, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 4 ),  0 );
+
+   EXPECT_EQ( m.getElement( 5, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 3 ), 12 );
+   EXPECT_EQ( m.getElement( 5, 4 ),  0 );
+
+   // Add new elements to the old elements with a multiplying factor applied to the old elements.
+   /*
+    * The following setup results in the following 6x5 sparse matrix:
+    *
+    *    /  3  6  9  0  0 \
+    *    |  0 12 15 18  0 |
+    *    |  0  0 21 24 27 |
+    *    | 30 11 12  0  0 |
+    *    |  0 35 14 15  0 |
+    *    \  0  0 16 41 18 /
+    */
+
+   RealType newValue = 1;
+   for( IndexType i = 0; i < cols - 2; i++ )         // 0th row
+      m.addElement( 0, i, newValue++, 2.0 );
+
+   for( IndexType i = 1; i < cols - 1; i++ )         // 1st row
+      m.addElement( 1, i, newValue++, 2.0 );
+
+   for( IndexType i = 2; i < cols; i++ )             // 2nd row
+      m.addElement( 2, i, newValue++, 2.0 );
+
+   for( IndexType i = 0; i < cols - 2; i++ )         // 3rd row
+      m.addElement( 3, i, newValue++, 2.0 );
+
+   for( IndexType i = 1; i < cols - 1; i++ )         // 4th row
+      m.addElement( 4, i, newValue++, 2.0 );
+
+   for( IndexType i = 2; i < cols; i++ )             // 5th row
+      m.addElement( 5, i, newValue++, 2.0 );
+
+
+   EXPECT_EQ( m.getElement( 0, 0 ),  3 );
+   EXPECT_EQ( m.getElement( 0, 1 ),  6 );
+   EXPECT_EQ( m.getElement( 0, 2 ),  9 );
+   EXPECT_EQ( m.getElement( 0, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 4 ),  0 );
+
+   EXPECT_EQ( m.getElement( 1, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 1 ), 12 );
+   EXPECT_EQ( m.getElement( 1, 2 ), 15 );
+   EXPECT_EQ( m.getElement( 1, 3 ), 18 );
+   EXPECT_EQ( m.getElement( 1, 4 ),  0 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 2 ), 21 );
+   EXPECT_EQ( m.getElement( 2, 3 ), 24 );
+   EXPECT_EQ( m.getElement( 2, 4 ), 27 );
+
+   EXPECT_EQ( m.getElement( 3, 0 ), 30 );
+   EXPECT_EQ( m.getElement( 3, 1 ), 11 );
+   EXPECT_EQ( m.getElement( 3, 2 ), 12 );
+   EXPECT_EQ( m.getElement( 3, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 4 ),  0 );
+
+   EXPECT_EQ( m.getElement( 4, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 1 ), 35 );
+   EXPECT_EQ( m.getElement( 4, 2 ), 14 );
+   EXPECT_EQ( m.getElement( 4, 3 ), 15 );
+   EXPECT_EQ( m.getElement( 4, 4 ),  0 );
+
+   EXPECT_EQ( m.getElement( 5, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 2 ), 16 );
+   EXPECT_EQ( m.getElement( 5, 3 ), 41 );
+   EXPECT_EQ( m.getElement( 5, 4 ), 18 );
 }
 
 template< typename Matrix >
 void test_VectorProduct()
 {
-    using RealType = typename Matrix::RealType;
-    using DeviceType = typename Matrix::DeviceType;
-    using IndexType = typename Matrix::IndexType;
-    using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >;
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+   using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >;
 
-/*
- * Sets up the following 4x4 sparse matrix:
- *
- *    /  1  0  0  0 \
- *    |  0  2  0  3 |
- *    |  0  4  0  0 |
- *    \  0  0  5  0 /
- */
+   /*
+    * Sets up the following 4x4 sparse matrix:
+    *
+    *    /  1  0  0  0 \
+    *    |  0  2  0  3 |
+    *    |  0  4  0  0 |
+    *    \  0  0  5  0 /
+    */
 
-    const IndexType m_rows_1 = 4;
-    const IndexType m_cols_1 = 4;
+   const IndexType m_rows_1 = 4;
+   const IndexType m_cols_1 = 4;
 
-    Matrix m_1;
-    m_1.reset();
-    m_1.setDimensions( m_rows_1, m_cols_1 );
-    typename Matrix::CompressedRowLengthsVector rowLengths_1;
-    rowLengths_1.setSize( m_rows_1 );
-    rowLengths_1.setElement( 0, 1 );
-    rowLengths_1.setElement( 1, 2 );
-    rowLengths_1.setElement( 2, 1 );
-    rowLengths_1.setElement( 3, 1 );
-    m_1.setCompressedRowLengths( rowLengths_1 );
-
-    RealType value_1 = 1;
-    m_1.setElement( 0, 0, value_1++ );      // 0th row
-
-    m_1.setElement( 1, 1, value_1++ );      // 1st row
-    m_1.setElement( 1, 3, value_1++ );
-
-    m_1.setElement( 2, 1, value_1++ );      // 2nd row
-
-    m_1.setElement( 3, 2, value_1++ );      // 3rd row
-
-    VectorType inVector_1;
-    inVector_1.setSize( m_cols_1 );
-    for( IndexType i = 0; i < inVector_1.getSize(); i++ )
-        inVector_1.setElement( i, 2 );
+   Matrix m_1;
+   m_1.reset();
+   m_1.setDimensions( m_rows_1, m_cols_1 );
+   typename Matrix::CompressedRowLengthsVector rowLengths_1;
+   rowLengths_1.setSize( m_rows_1 );
+   rowLengths_1.setElement( 0, 1 );
+   rowLengths_1.setElement( 1, 2 );
+   rowLengths_1.setElement( 2, 1 );
+   rowLengths_1.setElement( 3, 1 );
+   m_1.setCompressedRowLengths( rowLengths_1 );
 
-    VectorType outVector_1;
-    outVector_1.setSize( m_rows_1 );
-    for( IndexType j = 0; j < outVector_1.getSize(); j++ )
-        outVector_1.setElement( j, 0 );
-
-
-    m_1.vectorProduct( inVector_1, outVector_1 );
+   RealType value_1 = 1;
+   m_1.setElement( 0, 0, value_1++ );      // 0th row
 
+   m_1.setElement( 1, 1, value_1++ );      // 1st row
+   m_1.setElement( 1, 3, value_1++ );
 
-    EXPECT_EQ( outVector_1.getElement( 0 ),  2 );
-    EXPECT_EQ( outVector_1.getElement( 1 ), 10 );
-    EXPECT_EQ( outVector_1.getElement( 2 ),  8 );
-    EXPECT_EQ( outVector_1.getElement( 3 ), 10 );
-
+   m_1.setElement( 2, 1, value_1++ );      // 2nd row
 
-/*
- * Sets up the following 4x4 sparse matrix:
- *
- *    /  1  2  3  0 \
- *    |  0  0  0  4 |
- *    |  5  6  7  0 |
- *    \  0  8  0  0 /
- */
+   m_1.setElement( 3, 2, value_1++ );      // 3rd row
 
-    const IndexType m_rows_2 = 4;
-    const IndexType m_cols_2 = 4;
+   VectorType inVector_1;
+   inVector_1.setSize( m_cols_1 );
+   for( IndexType i = 0; i < inVector_1.getSize(); i++ )
+       inVector_1.setElement( i, 2 );
 
-    Matrix m_2;
-    m_2.reset();
-    m_2.setDimensions( m_rows_2, m_cols_2 );
-    typename Matrix::CompressedRowLengthsVector rowLengths_2;
-    rowLengths_2.setSize( m_rows_2 );
-    rowLengths_2.setValue( 3 );
-    rowLengths_2.setElement( 1, 1 );
-    rowLengths_2.setElement( 3, 1 );
-    m_2.setCompressedRowLengths( rowLengths_2 );
-
-    RealType value_2 = 1;
-    for( IndexType i = 0; i < 3; i++ )   // 0th row
-        m_2.setElement( 0, i, value_2++ );
-
-    m_2.setElement( 1, 3, value_2++ );      // 1st row
-
-    for( IndexType i = 0; i < 3; i++ )   // 2nd row
-        m_2.setElement( 2, i, value_2++ );
-
-    for( IndexType i = 1; i < 2; i++ )       // 3rd row
-        m_2.setElement( 3, i, value_2++ );
-
-    VectorType inVector_2;
-    inVector_2.setSize( m_cols_2 );
-    for( IndexType i = 0; i < inVector_2.getSize(); i++ )
-        inVector_2.setElement( i, 2 );
-
-    VectorType outVector_2;
-    outVector_2.setSize( m_rows_2 );
-    for( IndexType j = 0; j < outVector_2.getSize(); j++ )
-        outVector_2.setElement( j, 0 );
-
-
-    m_2.vectorProduct( inVector_2, outVector_2 );
+   VectorType outVector_1;
+   outVector_1.setSize( m_rows_1 );
+   for( IndexType j = 0; j < outVector_1.getSize(); j++ )
+       outVector_1.setElement( j, 0 );
 
 
-    EXPECT_EQ( outVector_2.getElement( 0 ), 12 );
-    EXPECT_EQ( outVector_2.getElement( 1 ),  8 );
-    EXPECT_EQ( outVector_2.getElement( 2 ), 36 );
-    EXPECT_EQ( outVector_2.getElement( 3 ), 16 );
-
+   m_1.vectorProduct( inVector_1, outVector_1 );
 
-/*
- * Sets up the following 4x4 sparse matrix:
- *
- *    /  1  2  3  0 \
- *    |  0  4  5  6 |
- *    |  7  8  9  0 |
- *    \  0 10 11 12 /
- */
 
-    const IndexType m_rows_3 = 4;
-    const IndexType m_cols_3 = 4;
+   EXPECT_EQ( outVector_1.getElement( 0 ),  2 );
+   EXPECT_EQ( outVector_1.getElement( 1 ), 10 );
+   EXPECT_EQ( outVector_1.getElement( 2 ),  8 );
+   EXPECT_EQ( outVector_1.getElement( 3 ), 10 );
 
-    Matrix m_3;
-    m_3.reset();
-    m_3.setDimensions( m_rows_3, m_cols_3 );
-    typename Matrix::CompressedRowLengthsVector rowLengths_3;
-    rowLengths_3.setSize( m_rows_3 );
-    rowLengths_3.setValue( 3 );
-    m_3.setCompressedRowLengths( rowLengths_3 );
+   /*
+    * Sets up the following 4x4 sparse matrix:
+    *
+    *    /  1  2  3  0 \
+    *    |  0  0  0  4 |
+    *    |  5  6  7  0 |
+    *    \  0  8  0  0 /
+    */
+
+   const IndexType m_rows_2 = 4;
+   const IndexType m_cols_2 = 4;
+
+   Matrix m_2( m_rows_2, m_cols_2 );
+   typename Matrix::CompressedRowLengthsVector rowLengths_2{ 3, 1, 3, 1 };
+   /*rowLengths_2 = 3;
+   rowLengths_2.setElement( 1, 1 );
+   rowLengths_2.setElement( 3, 1 );*/
+   m_2.setCompressedRowLengths( rowLengths_2 );
+
+   RealType value_2 = 1;
+   for( IndexType i = 0; i < 3; i++ )   // 0th row
+      m_2.setElement( 0, i, value_2++ );
+
+   m_2.setElement( 1, 3, value_2++ );      // 1st row
+
+   for( IndexType i = 0; i < 3; i++ )   // 2nd row
+      m_2.setElement( 2, i, value_2++ );
 
-    RealType value_3 = 1;
-    for( IndexType i = 0; i < 3; i++ )          // 0th row
-        m_3.setElement( 0, i, value_3++ );
+   for( IndexType i = 1; i < 2; i++ )       // 3rd row
+      m_2.setElement( 3, i, value_2++ );
 
-    for( IndexType i = 1; i < 4; i++ )
-        m_3.setElement( 1, i, value_3++ );      // 1st row
+   VectorType inVector_2;
+   inVector_2.setSize( m_cols_2 );
+   for( IndexType i = 0; i < inVector_2.getSize(); i++ )
+      inVector_2.setElement( i, 2 );
 
-    for( IndexType i = 0; i < 3; i++ )          // 2nd row
-        m_3.setElement( 2, i, value_3++ );
+   VectorType outVector_2;
+   outVector_2.setSize( m_rows_2 );
+   for( IndexType j = 0; j < outVector_2.getSize(); j++ )
+      outVector_2.setElement( j, 0 );
+
+   m_2.vectorProduct( inVector_2, outVector_2 );
+
+   EXPECT_EQ( outVector_2.getElement( 0 ), 12 );
+   EXPECT_EQ( outVector_2.getElement( 1 ),  8 );
+   EXPECT_EQ( outVector_2.getElement( 2 ), 36 );
+   EXPECT_EQ( outVector_2.getElement( 3 ), 16 );
+
+   /*
+    * Sets up the following 4x4 sparse matrix:
+    *
+    *    /  1  2  3  0 \
+    *    |  0  4  5  6 |
+    *    |  7  8  9  0 |
+    *    \  0 10 11 12 /
+    */
 
-    for( IndexType i = 1; i < 4; i++ )          // 3rd row
-        m_3.setElement( 3, i, value_3++ );
+   const IndexType m_rows_3 = 4;
+   const IndexType m_cols_3 = 4;
 
-    VectorType inVector_3;
-    inVector_3.setSize( m_cols_3 );
-    for( IndexType i = 0; i < inVector_3.getSize(); i++ )
-        inVector_3.setElement( i, 2 );
+   Matrix m_3( m_rows_3, m_cols_3 );
+   typename Matrix::CompressedRowLengthsVector rowLengths_3{ 3, 3, 3, 3 };
+   m_3.setCompressedRowLengths( rowLengths_3 );
 
-    VectorType outVector_3;
-    outVector_3.setSize( m_rows_3 );
-    for( IndexType j = 0; j < outVector_3.getSize(); j++ )
-        outVector_3.setElement( j, 0 );
+   RealType value_3 = 1;
+   for( IndexType i = 0; i < 3; i++ )          // 0th row
+      m_3.setElement( 0, i, value_3++ );
 
+   for( IndexType i = 1; i < 4; i++ )
+      m_3.setElement( 1, i, value_3++ );      // 1st row
 
-    m_3.vectorProduct( inVector_3, outVector_3 );
+   for( IndexType i = 0; i < 3; i++ )          // 2nd row
+      m_3.setElement( 2, i, value_3++ );
 
+   for( IndexType i = 1; i < 4; i++ )          // 3rd row
+      m_3.setElement( 3, i, value_3++ );
 
-    EXPECT_EQ( outVector_3.getElement( 0 ), 12 );
-    EXPECT_EQ( outVector_3.getElement( 1 ), 30 );
-    EXPECT_EQ( outVector_3.getElement( 2 ), 48 );
-    EXPECT_EQ( outVector_3.getElement( 3 ), 66 );
+   VectorType inVector_3;
+   inVector_3.setSize( m_cols_3 );
+   for( IndexType i = 0; i < inVector_3.getSize(); i++ )
+      inVector_3.setElement( i, 2 );
 
+   VectorType outVector_3;
+   outVector_3.setSize( m_rows_3 );
+   for( IndexType j = 0; j < outVector_3.getSize(); j++ )
+      outVector_3.setElement( j, 0 );
 
-/*
- * Sets up the following 8x8 sparse matrix:
- *
- *    /  1  2  3  0  0  4  0  0 \
- *    |  0  5  6  7  8  0  0  0 |
- *    |  9 10 11 12 13  0  0  0 |
- *    |  0 14 15 16 17  0  0  0 |
- *    |  0  0 18 19 20 21  0  0 |
- *    |  0  0  0 22 23 24 25  0 |
- *    | 26 27 28 29 30  0  0  0 |
- *    \ 31 32 33 34 35  0  0  0 /
- */
+   m_3.vectorProduct( inVector_3, outVector_3 );
 
-    const IndexType m_rows_4 = 8;
-    const IndexType m_cols_4 = 8;
+   EXPECT_EQ( outVector_3.getElement( 0 ), 12 );
+   EXPECT_EQ( outVector_3.getElement( 1 ), 30 );
+   EXPECT_EQ( outVector_3.getElement( 2 ), 48 );
+   EXPECT_EQ( outVector_3.getElement( 3 ), 66 );
 
-    Matrix m_4;
-    m_4.reset();
-    m_4.setDimensions( m_rows_4, m_cols_4 );
-    typename Matrix::CompressedRowLengthsVector rowLengths_4;
-    rowLengths_4.setSize( m_rows_4 );
-    rowLengths_4.setValue( 4 );
-    rowLengths_4.setElement( 2, 5 );
-    rowLengths_4.setElement( 6, 5 );
-    rowLengths_4.setElement( 7, 5 );
-    m_4.setCompressedRowLengths( rowLengths_4 );
+   /*
+    * Sets up the following 8x8 sparse matrix:
+    *
+    *    /  1  2  3  0  0  4  0  0 \
+    *    |  0  5  6  7  8  0  0  0 |
+    *    |  9 10 11 12 13  0  0  0 |
+    *    |  0 14 15 16 17  0  0  0 |
+    *    |  0  0 18 19 20 21  0  0 |
+    *    |  0  0  0 22 23 24 25  0 |
+    *    | 26 27 28 29 30  0  0  0 |
+    *    \ 31 32 33 34 35  0  0  0 /
+    */
 
-    RealType value_4 = 1;
-    for( IndexType i = 0; i < 3; i++ )       // 0th row
-        m_4.setElement( 0, i, value_4++ );
+   const IndexType m_rows_4 = 8;
+   const IndexType m_cols_4 = 8;
 
-    m_4.setElement( 0, 5, value_4++ );
+   Matrix m_4( m_rows_4, m_cols_4 );
+   typename Matrix::CompressedRowLengthsVector rowLengths_4{ 4, 4, 5, 4, 4, 4, 5, 5 };
+   /*rowLengths_4.setSize( m_rows_4 );
+   rowLengths_4.setValue( 4 );
+   rowLengths_4.setElement( 2, 5 );
+   rowLengths_4.setElement( 6, 5 );
+   rowLengths_4.setElement( 7, 5 );*/
+   m_4.setCompressedRowLengths( rowLengths_4 );
 
-    for( IndexType i = 1; i < 5; i++ )       // 1st row
-        m_4.setElement( 1, i, value_4++ );
+   RealType value_4 = 1;
+   for( IndexType i = 0; i < 3; i++ )       // 0th row
+      m_4.setElement( 0, i, value_4++ );
 
-    for( IndexType i = 0; i < 5; i++ )       // 2nd row
-        m_4.setElement( 2, i, value_4++ );
+   m_4.setElement( 0, 5, value_4++ );
 
-    for( IndexType i = 1; i < 5; i++ )       // 3rd row
-        m_4.setElement( 3, i, value_4++ );
+   for( IndexType i = 1; i < 5; i++ )       // 1st row
+      m_4.setElement( 1, i, value_4++ );
 
-    for( IndexType i = 2; i < 6; i++ )       // 4th row
-        m_4.setElement( 4, i, value_4++ );
+   for( IndexType i = 0; i < 5; i++ )       // 2nd row
+      m_4.setElement( 2, i, value_4++ );
 
-    for( IndexType i = 3; i < 7; i++ )       // 5th row
-        m_4.setElement( 5, i, value_4++ );
+   for( IndexType i = 1; i < 5; i++ )       // 3rd row
+      m_4.setElement( 3, i, value_4++ );
 
-    for( IndexType i = 0; i < 5; i++ )       // 6th row
-        m_4.setElement( 6, i, value_4++ );
+   for( IndexType i = 2; i < 6; i++ )       // 4th row
+      m_4.setElement( 4, i, value_4++ );
 
-    for( IndexType i = 0; i < 5; i++ )       // 7th row
-        m_4.setElement( 7, i, value_4++ );
+   for( IndexType i = 3; i < 7; i++ )       // 5th row
+      m_4.setElement( 5, i, value_4++ );
 
-    VectorType inVector_4;
-    inVector_4.setSize( m_cols_4 );
-    for( IndexType i = 0; i < inVector_4.getSize(); i++ )
-        inVector_4.setElement( i, 2 );
+   for( IndexType i = 0; i < 5; i++ )       // 6th row
+      m_4.setElement( 6, i, value_4++ );
 
-    VectorType outVector_4;
-    outVector_4.setSize( m_rows_4 );
-    for( IndexType j = 0; j < outVector_4.getSize(); j++ )
-        outVector_4.setElement( j, 0 );
+   for( IndexType i = 0; i < 5; i++ )       // 7th row
+      m_4.setElement( 7, i, value_4++ );
 
+   VectorType inVector_4;
+   inVector_4.setSize( m_cols_4 );
+   for( IndexType i = 0; i < inVector_4.getSize(); i++ )
+      inVector_4.setElement( i, 2 );
 
-    m_4.vectorProduct( inVector_4, outVector_4 );
+   VectorType outVector_4;
+   outVector_4.setSize( m_rows_4 );
+   for( IndexType j = 0; j < outVector_4.getSize(); j++ )
+      outVector_4.setElement( j, 0 );
 
+   m_4.vectorProduct( inVector_4, outVector_4 );
 
-    EXPECT_EQ( outVector_4.getElement( 0 ),  20 );
-    EXPECT_EQ( outVector_4.getElement( 1 ),  52 );
-    EXPECT_EQ( outVector_4.getElement( 2 ), 110 );
-    EXPECT_EQ( outVector_4.getElement( 3 ), 124 );
-    EXPECT_EQ( outVector_4.getElement( 4 ), 156 );
-    EXPECT_EQ( outVector_4.getElement( 5 ), 188 );
-    EXPECT_EQ( outVector_4.getElement( 6 ), 280 );
-    EXPECT_EQ( outVector_4.getElement( 7 ), 330 );
+   EXPECT_EQ( outVector_4.getElement( 0 ),  20 );
+   EXPECT_EQ( outVector_4.getElement( 1 ),  52 );
+   EXPECT_EQ( outVector_4.getElement( 2 ), 110 );
+   EXPECT_EQ( outVector_4.getElement( 3 ), 124 );
+   EXPECT_EQ( outVector_4.getElement( 4 ), 156 );
+   EXPECT_EQ( outVector_4.getElement( 5 ), 188 );
+   EXPECT_EQ( outVector_4.getElement( 6 ), 280 );
+   EXPECT_EQ( outVector_4.getElement( 7 ), 330 );
 
 
    /*
@@ -1048,76 +960,65 @@ void test_VectorProduct()
     *    \ 29 30 31 32 33 34 35 36 /   8
     */
 
-    const IndexType m_rows_5 = 8;
-    const IndexType m_cols_5 = 8;
-
-    Matrix m_5;
-    m_5.reset();
-    m_5.setDimensions( m_rows_5, m_cols_5 );
-    typename Matrix::CompressedRowLengthsVector rowLengths_5;
-    rowLengths_5.setSize( m_rows_5 );
-    rowLengths_5.setElement(0, 6);
-    rowLengths_5.setElement(1, 3);
-    rowLengths_5.setElement(2, 4);
-    rowLengths_5.setElement(3, 5);
-    rowLengths_5.setElement(4, 2);
-    rowLengths_5.setElement(5, 7);
-    rowLengths_5.setElement(6, 8);
-    rowLengths_5.setElement(7, 8);
-    m_5.setCompressedRowLengths( rowLengths_5 );
-
-    RealType value_5 = 1;
-    for( IndexType i = 0; i < 3; i++ )   // 0th row
-        m_5.setElement( 0, i, value_5++ );
-
-    m_5.setElement( 0, 4, value_5++ );           // 0th row
-    m_5.setElement( 0, 5, value_5++ );
+   const IndexType m_rows_5 = 8;
+   const IndexType m_cols_5 = 8;
 
-    m_5.setElement( 1, 1, value_5++ );           // 1st row
-    m_5.setElement( 1, 3, value_5++ );
+   Matrix m_5( m_rows_5, m_cols_5 );
+   typename Matrix::CompressedRowLengthsVector rowLengths_5{ 6, 3, 4, 5, 2, 7, 8, 8 };
+   m_5.setCompressedRowLengths( rowLengths_5 );
 
-    for( IndexType i = 1; i < 3; i++ )            // 2nd row
-        m_5.setElement( 2, i, value_5++ );
-
-    m_5.setElement( 2, 4, value_5++ );           // 2nd row
+   RealType value_5 = 1;
+   for( IndexType i = 0; i < 3; i++ )   // 0th row
+      m_5.setElement( 0, i, value_5++ );
 
-    for( IndexType i = 1; i < 5; i++ )            // 3rd row
-        m_5.setElement( 3, i, value_5++ );
+   m_5.setElement( 0, 4, value_5++ );           // 0th row
+   m_5.setElement( 0, 5, value_5++ );
 
-    m_5.setElement( 4, 1, value_5++ );           // 4th row
+   m_5.setElement( 1, 1, value_5++ );           // 1st row
+   m_5.setElement( 1, 3, value_5++ );
 
-    for( IndexType i = 1; i < 7; i++ )            // 5th row
-        m_5.setElement( 5, i, value_5++ );
+   for( IndexType i = 1; i < 3; i++ )            // 2nd row
+      m_5.setElement( 2, i, value_5++ );
 
-    for( IndexType i = 0; i < 7; i++ )            // 6th row
-        m_5.setElement( 6, i, value_5++ );
+   m_5.setElement( 2, 4, value_5++ );           // 2nd row
 
-    for( IndexType i = 0; i < 8; i++ )            // 7th row
-        m_5.setElement( 7, i, value_5++ );
+   for( IndexType i = 1; i < 5; i++ )            // 3rd row
+      m_5.setElement( 3, i, value_5++ );
 
-    for( IndexType i = 0; i < 7; i++ )            // 1s at the end of rows
-        m_5.setElement( i, 7, 1);
+   m_5.setElement( 4, 1, value_5++ );           // 4th row
 
-    VectorType inVector_5;
-    inVector_5.setSize( m_cols_5 );
-    for( IndexType i = 0; i < inVector_5.getSize(); i++ )
-        inVector_5.setElement( i, 2 );
+   for( IndexType i = 1; i < 7; i++ )            // 5th row
+      m_5.setElement( 5, i, value_5++ );
 
-    VectorType outVector_5;
-    outVector_5.setSize( m_rows_5 );
-    for( IndexType j = 0; j < outVector_5.getSize(); j++ )
-        outVector_5.setElement( j, 0 );
+   for( IndexType i = 0; i < 7; i++ )            // 6th row
+      m_5.setElement( 6, i, value_5++ );
 
-    m_5.vectorProduct( inVector_5, outVector_5 );
+   for( IndexType i = 0; i < 8; i++ )            // 7th row
+      m_5.setElement( 7, i, value_5++ );
 
-    EXPECT_EQ( outVector_5.getElement( 0 ),  32 );
-    EXPECT_EQ( outVector_5.getElement( 1 ),  28 );
-    EXPECT_EQ( outVector_5.getElement( 2 ),  56 );
-    EXPECT_EQ( outVector_5.getElement( 3 ), 102 );
-    EXPECT_EQ( outVector_5.getElement( 4 ),  32 );
-    EXPECT_EQ( outVector_5.getElement( 5 ), 224 );
-    EXPECT_EQ( outVector_5.getElement( 6 ), 352 );
-    EXPECT_EQ( outVector_5.getElement( 7 ), 520 );
+   for( IndexType i = 0; i < 7; i++ )            // 1s at the end of rows
+      m_5.setElement( i, 7, 1);
+
+   VectorType inVector_5;
+   inVector_5.setSize( m_cols_5 );
+   for( IndexType i = 0; i < inVector_5.getSize(); i++ )
+       inVector_5.setElement( i, 2 );
+
+   VectorType outVector_5;
+   outVector_5.setSize( m_rows_5 );
+   for( IndexType j = 0; j < outVector_5.getSize(); j++ )
+       outVector_5.setElement( j, 0 );
+
+   m_5.vectorProduct( inVector_5, outVector_5 );
+
+   EXPECT_EQ( outVector_5.getElement( 0 ),  32 );
+   EXPECT_EQ( outVector_5.getElement( 1 ),  28 );
+   EXPECT_EQ( outVector_5.getElement( 2 ),  56 );
+   EXPECT_EQ( outVector_5.getElement( 3 ), 102 );
+   EXPECT_EQ( outVector_5.getElement( 4 ),  32 );
+   EXPECT_EQ( outVector_5.getElement( 5 ), 224 );
+   EXPECT_EQ( outVector_5.getElement( 6 ), 352 );
+   EXPECT_EQ( outVector_5.getElement( 7 ), 520 );
 }
 
 template< typename Matrix >
@@ -1145,48 +1046,39 @@ void test_RowsReduction()
 
    Matrix m;
    m.setDimensions( rows, cols );
-   typename Matrix::RowsCapacitiesType rowsCapacities( rows );
-   //rowLengths.setSize( rows );
-   rowsCapacities.setElement(0, 6);
-   rowsCapacities.setElement(1, 3);
-   rowsCapacities.setElement(2, 4);
-   rowsCapacities.setElement(3, 5);
-   rowsCapacities.setElement(4, 2);
-   rowsCapacities.setElement(5, 7);
-   rowsCapacities.setElement(6, 8);
-   rowsCapacities.setElement(7, 8);
+   typename Matrix::RowsCapacitiesType rowsCapacities{ 6, 3, 4, 5, 2, 7, 8, 8 };
    m.setCompressedRowLengths( rowsCapacities );
 
    RealType value = 1;
    for( IndexType i = 0; i < 3; i++ )   // 0th row
       m.setElement( 0, i, value++ );
 
-   m.setElement( 0, 4, value++ );           // 0th row
+   m.setElement( 0, 4, value++ );       // 0th row
    m.setElement( 0, 5, value++ );
 
-   m.setElement( 1, 1, value++ );           // 1st row
+   m.setElement( 1, 1, value++ );       // 1st row
    m.setElement( 1, 3, value++ );
 
-   for( IndexType i = 1; i < 3; i++ )            // 2nd row
+   for( IndexType i = 1; i < 3; i++ )   // 2nd row
       m.setElement( 2, i, value++ );
 
-   m.setElement( 2, 4, value++ );           // 2nd row
+   m.setElement( 2, 4, value++ );       // 2nd row
 
-   for( IndexType i = 1; i < 5; i++ )            // 3rd row
+   for( IndexType i = 1; i < 5; i++ )   // 3rd row
       m.setElement( 3, i, value++ );
 
-   m.setElement( 4, 1, value++ );           // 4th row
+   m.setElement( 4, 1, value++ );       // 4th row
 
-   for( IndexType i = 1; i < 7; i++ )            // 5th row
+   for( IndexType i = 1; i < 7; i++ )   // 5th row
       m.setElement( 5, i, value++ );
 
-   for( IndexType i = 0; i < 7; i++ )            // 6th row
+   for( IndexType i = 0; i < 7; i++ )   // 6th row
       m.setElement( 6, i, value++ );
 
-   for( IndexType i = 0; i < 8; i++ )            // 7th row
+   for( IndexType i = 0; i < 8; i++ )   // 7th row
        m.setElement( 7, i, value++ );
 
-   for( IndexType i = 0; i < 7; i++ )            // 1s at the end of rows
+   for( IndexType i = 0; i < 7; i++ )   // 1s at the end of rows
       m.setElement( i, 7, 1);
 
    ////
@@ -1228,343 +1120,74 @@ void test_RowsReduction()
 template< typename Matrix >
 void test_PerformSORIteration()
 {
-    using RealType = typename Matrix::RealType;
-    using DeviceType = typename Matrix::DeviceType;
-    using IndexType = typename Matrix::IndexType;
-
-/*
- * Sets up the following 4x4 sparse matrix:
- *
- *    /  4  1  0  0 \
- *    |  1  4  1  0 |
- *    |  0  1  4  1 |
- *    \  0  0  1  4 /
- */
-
-    const IndexType m_rows = 4;
-    const IndexType m_cols = 4;
-
-    Matrix m;
-    m.reset();
-    m.setDimensions( m_rows, m_cols );
-    typename Matrix::CompressedRowLengthsVector rowLengths;
-    rowLengths.setSize( m_rows );
-    rowLengths.setValue( 3 );
-    m.setCompressedRowLengths( rowLengths );
-
-    m.setElement( 0, 0, 4.0 );        // 0th row
-    m.setElement( 0, 1, 1.0);
-
-    m.setElement( 1, 0, 1.0 );        // 1st row
-    m.setElement( 1, 1, 4.0 );
-    m.setElement( 1, 2, 1.0 );
-
-    m.setElement( 2, 1, 1.0 );        // 2nd row
-    m.setElement( 2, 2, 4.0 );
-    m.setElement( 2, 3, 1.0 );
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
 
-    m.setElement( 3, 2, 1.0 );        // 3rd row
-    m.setElement( 3, 3, 4.0 );
+   /*
+    * Sets up the following 4x4 sparse matrix:
+    *
+    *    /  4  1  0  0 \
+    *    |  1  4  1  0 |
+    *    |  0  1  4  1 |
+    *    \  0  0  1  4 /
+    */
 
-    RealType bVector [ 4 ] = { 1, 1, 1, 1 };
-    RealType xVector [ 4 ] = { 1, 1, 1, 1 };
+   const IndexType m_rows = 4;
+   const IndexType m_cols = 4;
 
-    IndexType row = 0;
-    RealType omega = 1;
+   Matrix m( m_rows, m_cols );
+   typename Matrix::CompressedRowLengthsVector rowLengths( m_rows );
+   rowLengths = 3;
+   m.setCompressedRowLengths( rowLengths );
 
+   m.setElement( 0, 0, 4.0 );        // 0th row
+   m.setElement( 0, 1, 1.0);
 
-    m.performSORIteration( bVector, row++, xVector, omega);
+   m.setElement( 1, 0, 1.0 );        // 1st row
+   m.setElement( 1, 1, 4.0 );
+   m.setElement( 1, 2, 1.0 );
 
-    EXPECT_EQ( xVector[ 0 ], 0.0 );
-    EXPECT_EQ( xVector[ 1 ], 1.0 );
-    EXPECT_EQ( xVector[ 2 ], 1.0 );
-    EXPECT_EQ( xVector[ 3 ], 1.0 );
+   m.setElement( 2, 1, 1.0 );        // 2nd row
+   m.setElement( 2, 2, 4.0 );
+   m.setElement( 2, 3, 1.0 );
 
+   m.setElement( 3, 2, 1.0 );        // 3rd row
+   m.setElement( 3, 3, 4.0 );
 
-    m.performSORIteration( bVector, row++, xVector, omega);
+   RealType bVector [ 4 ] = { 1, 1, 1, 1 };
+   RealType xVector [ 4 ] = { 1, 1, 1, 1 };
 
-    EXPECT_EQ( xVector[ 0 ], 0.0 );
-    EXPECT_EQ( xVector[ 1 ], 0.0 );
-    EXPECT_EQ( xVector[ 2 ], 1.0 );
-    EXPECT_EQ( xVector[ 3 ], 1.0 );
+   IndexType row = 0;
+   RealType omega = 1;
 
+   m.performSORIteration( bVector, row++, xVector, omega);
 
-    m.performSORIteration( bVector, row++, xVector, omega);
+   EXPECT_EQ( xVector[ 0 ], 0.0 );
+   EXPECT_EQ( xVector[ 1 ], 1.0 );
+   EXPECT_EQ( xVector[ 2 ], 1.0 );
+   EXPECT_EQ( xVector[ 3 ], 1.0 );
 
-    EXPECT_EQ( xVector[ 0 ], 0.0 );
-    EXPECT_EQ( xVector[ 1 ], 0.0 );
-    EXPECT_EQ( xVector[ 2 ], 0.0 );
-    EXPECT_EQ( xVector[ 3 ], 1.0 );
+   m.performSORIteration( bVector, row++, xVector, omega);
 
+   EXPECT_EQ( xVector[ 0 ], 0.0 );
+   EXPECT_EQ( xVector[ 1 ], 0.0 );
+   EXPECT_EQ( xVector[ 2 ], 1.0 );
+   EXPECT_EQ( xVector[ 3 ], 1.0 );
 
-    m.performSORIteration( bVector, row++, xVector, omega);
+   m.performSORIteration( bVector, row++, xVector, omega);
 
-    EXPECT_EQ( xVector[ 0 ], 0.0 );
-    EXPECT_EQ( xVector[ 1 ], 0.0 );
-    EXPECT_EQ( xVector[ 2 ], 0.0 );
-    EXPECT_EQ( xVector[ 3 ], 0.25 );
-}
+   EXPECT_EQ( xVector[ 0 ], 0.0 );
+   EXPECT_EQ( xVector[ 1 ], 0.0 );
+   EXPECT_EQ( xVector[ 2 ], 0.0 );
+   EXPECT_EQ( xVector[ 3 ], 1.0 );
 
-// This test is only for AdEllpack
-template< typename Matrix >
-void test_OperatorEquals()
-{
-   using RealType = typename Matrix::RealType;
-   using DeviceType = typename Matrix::DeviceType;
-   using IndexType = typename Matrix::IndexType;
+   m.performSORIteration( bVector, row++, xVector, omega);
 
-   if( std::is_same< DeviceType, TNL::Devices::Cuda >::value )
-       return;
-   else
-   {
-       using AdELL_host = TNL::Matrices::AdEllpack< RealType, TNL::Devices::Host, IndexType >;
-       using AdELL_cuda = TNL::Matrices::AdEllpack< RealType, TNL::Devices::Cuda, IndexType >;
-
-       /*
-        * Sets up the following 8x8 sparse matrix:
-        *
-        *    /  1  2  3  0  4  5  0  1 \   6
-        *    |  0  6  0  7  0  0  0  1 |   3
-        *    |  0  8  9  0 10  0  0  1 |   4
-        *    |  0 11 12 13 14  0  0  1 |   5
-        *    |  0 15  0  0  0  0  0  1 |   2
-        *    |  0 16 17 18 19 20 21  1 |   7
-        *    | 22 23 24 25 26 27 28  1 |   8
-        *    \ 29 30 31 32 33 34 35 36 /   8
-        */
-
-        const IndexType m_rows = 8;
-        const IndexType m_cols = 8;
-
-        AdELL_host m_host;
-
-        m_host.reset();
-        m_host.setDimensions( m_rows, m_cols );
-        typename AdELL_host::CompressedRowLengthsVector rowLengths;
-        rowLengths.setSize( m_rows );
-        rowLengths.setElement(0, 6);
-        rowLengths.setElement(1, 3);
-        rowLengths.setElement(2, 4);
-        rowLengths.setElement(3, 5);
-        rowLengths.setElement(4, 2);
-        rowLengths.setElement(5, 7);
-        rowLengths.setElement(6, 8);
-        rowLengths.setElement(7, 8);
-        m_host.setCompressedRowLengths( rowLengths );
-
-        RealType value = 1;
-        for( IndexType i = 0; i < 3; i++ )   // 0th row
-            m_host.setElement( 0, i, value++ );
-
-        m_host.setElement( 0, 4, value++ );           // 0th row
-        m_host.setElement( 0, 5, value++ );
-
-        m_host.setElement( 1, 1, value++ );           // 1st row
-        m_host.setElement( 1, 3, value++ );
-
-        for( IndexType i = 1; i < 3; i++ )            // 2nd row
-            m_host.setElement( 2, i, value++ );
-
-        m_host.setElement( 2, 4, value++ );           // 2nd row
-
-
-        for( IndexType i = 1; i < 5; i++ )            // 3rd row
-            m_host.setElement( 3, i, value++ );
-
-        m_host.setElement( 4, 1, value++ );           // 4th row
-
-        for( IndexType i = 1; i < 7; i++ )            // 5th row
-            m_host.setElement( 5, i, value++ );
-
-        for( IndexType i = 0; i < 7; i++ )            // 6th row
-            m_host.setElement( 6, i, value++ );
-
-        for( IndexType i = 0; i < 8; i++ )            // 7th row
-            m_host.setElement( 7, i, value++ );
-
-        for( IndexType i = 0; i < 7; i++ )            // 1s at the end or rows: 5, 6
-            m_host.setElement( i, 7, 1);
-
-        EXPECT_EQ( m_host.getElement( 0, 0 ),  1 );
-        EXPECT_EQ( m_host.getElement( 0, 1 ),  2 );
-        EXPECT_EQ( m_host.getElement( 0, 2 ),  3 );
-        EXPECT_EQ( m_host.getElement( 0, 3 ),  0 );
-        EXPECT_EQ( m_host.getElement( 0, 4 ),  4 );
-        EXPECT_EQ( m_host.getElement( 0, 5 ),  5 );
-        EXPECT_EQ( m_host.getElement( 0, 6 ),  0 );
-        EXPECT_EQ( m_host.getElement( 0, 7 ),  1 );
-
-        EXPECT_EQ( m_host.getElement( 1, 0 ),  0 );
-        EXPECT_EQ( m_host.getElement( 1, 1 ),  6 );
-        EXPECT_EQ( m_host.getElement( 1, 2 ),  0 );
-        EXPECT_EQ( m_host.getElement( 1, 3 ),  7 );
-        EXPECT_EQ( m_host.getElement( 1, 4 ),  0 );
-        EXPECT_EQ( m_host.getElement( 1, 5 ),  0 );
-        EXPECT_EQ( m_host.getElement( 1, 6 ),  0 );
-        EXPECT_EQ( m_host.getElement( 1, 7 ),  1 );
-
-        EXPECT_EQ( m_host.getElement( 2, 0 ),  0 );
-        EXPECT_EQ( m_host.getElement( 2, 1 ),  8 );
-        EXPECT_EQ( m_host.getElement( 2, 2 ),  9 );
-        EXPECT_EQ( m_host.getElement( 2, 3 ),  0 );
-        EXPECT_EQ( m_host.getElement( 2, 4 ), 10 );
-        EXPECT_EQ( m_host.getElement( 2, 5 ),  0 );
-        EXPECT_EQ( m_host.getElement( 2, 6 ),  0 );
-        EXPECT_EQ( m_host.getElement( 2, 7 ),  1 );
-
-        EXPECT_EQ( m_host.getElement( 3, 0 ),  0 );
-        EXPECT_EQ( m_host.getElement( 3, 1 ), 11 );
-        EXPECT_EQ( m_host.getElement( 3, 2 ), 12 );
-        EXPECT_EQ( m_host.getElement( 3, 3 ), 13 );
-        EXPECT_EQ( m_host.getElement( 3, 4 ), 14 );
-        EXPECT_EQ( m_host.getElement( 3, 5 ),  0 );
-        EXPECT_EQ( m_host.getElement( 3, 6 ),  0 );
-        EXPECT_EQ( m_host.getElement( 3, 7 ),  1 );
-
-        EXPECT_EQ( m_host.getElement( 4, 0 ),  0 );
-        EXPECT_EQ( m_host.getElement( 4, 1 ), 15 );
-        EXPECT_EQ( m_host.getElement( 4, 2 ),  0 );
-        EXPECT_EQ( m_host.getElement( 4, 3 ),  0 );
-        EXPECT_EQ( m_host.getElement( 4, 4 ),  0 );
-        EXPECT_EQ( m_host.getElement( 4, 5 ),  0 );
-        EXPECT_EQ( m_host.getElement( 4, 6 ),  0 );
-        EXPECT_EQ( m_host.getElement( 4, 7 ),  1 );
-
-        EXPECT_EQ( m_host.getElement( 5, 0 ),  0 );
-        EXPECT_EQ( m_host.getElement( 5, 1 ), 16 );
-        EXPECT_EQ( m_host.getElement( 5, 2 ), 17 );
-        EXPECT_EQ( m_host.getElement( 5, 3 ), 18 );
-        EXPECT_EQ( m_host.getElement( 5, 4 ), 19 );
-        EXPECT_EQ( m_host.getElement( 5, 5 ), 20 );
-        EXPECT_EQ( m_host.getElement( 5, 6 ), 21 );
-        EXPECT_EQ( m_host.getElement( 5, 7 ),  1 );
-
-        EXPECT_EQ( m_host.getElement( 6, 0 ), 22 );
-        EXPECT_EQ( m_host.getElement( 6, 1 ), 23 );
-        EXPECT_EQ( m_host.getElement( 6, 2 ), 24 );
-        EXPECT_EQ( m_host.getElement( 6, 3 ), 25 );
-        EXPECT_EQ( m_host.getElement( 6, 4 ), 26 );
-        EXPECT_EQ( m_host.getElement( 6, 5 ), 27 );
-        EXPECT_EQ( m_host.getElement( 6, 6 ), 28 );
-        EXPECT_EQ( m_host.getElement( 6, 7 ),  1 );
-
-        EXPECT_EQ( m_host.getElement( 7, 0 ), 29 );
-        EXPECT_EQ( m_host.getElement( 7, 1 ), 30 );
-        EXPECT_EQ( m_host.getElement( 7, 2 ), 31 );
-        EXPECT_EQ( m_host.getElement( 7, 3 ), 32 );
-        EXPECT_EQ( m_host.getElement( 7, 4 ), 33 );
-        EXPECT_EQ( m_host.getElement( 7, 5 ), 34 );
-        EXPECT_EQ( m_host.getElement( 7, 6 ), 35 );
-        EXPECT_EQ( m_host.getElement( 7, 7 ), 36 );
-
-        AdELL_cuda m_cuda;
-
-        // Copy the host matrix into the cuda matrix
-        m_cuda = m_host;
-
-        // Reset the host matrix
-        m_host.reset();
-
-        // Copy the cuda matrix back into the host matrix
-        m_host = m_cuda;
-
-        // Check the newly created double-copy host matrix
-        EXPECT_EQ( m_host.getElement( 0, 0 ),  1 );
-        EXPECT_EQ( m_host.getElement( 0, 1 ),  2 );
-        EXPECT_EQ( m_host.getElement( 0, 2 ),  3 );
-        EXPECT_EQ( m_host.getElement( 0, 3 ),  0 );
-        EXPECT_EQ( m_host.getElement( 0, 4 ),  4 );
-        EXPECT_EQ( m_host.getElement( 0, 5 ),  5 );
-        EXPECT_EQ( m_host.getElement( 0, 6 ),  0 );
-        EXPECT_EQ( m_host.getElement( 0, 7 ),  1 );
-
-        EXPECT_EQ( m_host.getElement( 1, 0 ),  0 );
-        EXPECT_EQ( m_host.getElement( 1, 1 ),  6 );
-        EXPECT_EQ( m_host.getElement( 1, 2 ),  0 );
-        EXPECT_EQ( m_host.getElement( 1, 3 ),  7 );
-        EXPECT_EQ( m_host.getElement( 1, 4 ),  0 );
-        EXPECT_EQ( m_host.getElement( 1, 5 ),  0 );
-        EXPECT_EQ( m_host.getElement( 1, 6 ),  0 );
-        EXPECT_EQ( m_host.getElement( 1, 7 ),  1 );
-
-        EXPECT_EQ( m_host.getElement( 2, 0 ),  0 );
-        EXPECT_EQ( m_host.getElement( 2, 1 ),  8 );
-        EXPECT_EQ( m_host.getElement( 2, 2 ),  9 );
-        EXPECT_EQ( m_host.getElement( 2, 3 ),  0 );
-        EXPECT_EQ( m_host.getElement( 2, 4 ), 10 );
-        EXPECT_EQ( m_host.getElement( 2, 5 ),  0 );
-        EXPECT_EQ( m_host.getElement( 2, 6 ),  0 );
-        EXPECT_EQ( m_host.getElement( 2, 7 ),  1 );
-
-        EXPECT_EQ( m_host.getElement( 3, 0 ),  0 );
-        EXPECT_EQ( m_host.getElement( 3, 1 ), 11 );
-        EXPECT_EQ( m_host.getElement( 3, 2 ), 12 );
-        EXPECT_EQ( m_host.getElement( 3, 3 ), 13 );
-        EXPECT_EQ( m_host.getElement( 3, 4 ), 14 );
-        EXPECT_EQ( m_host.getElement( 3, 5 ),  0 );
-        EXPECT_EQ( m_host.getElement( 3, 6 ),  0 );
-        EXPECT_EQ( m_host.getElement( 3, 7 ),  1 );
-
-        EXPECT_EQ( m_host.getElement( 4, 0 ),  0 );
-        EXPECT_EQ( m_host.getElement( 4, 1 ), 15 );
-        EXPECT_EQ( m_host.getElement( 4, 2 ),  0 );
-        EXPECT_EQ( m_host.getElement( 4, 3 ),  0 );
-        EXPECT_EQ( m_host.getElement( 4, 4 ),  0 );
-        EXPECT_EQ( m_host.getElement( 4, 5 ),  0 );
-        EXPECT_EQ( m_host.getElement( 4, 6 ),  0 );
-        EXPECT_EQ( m_host.getElement( 4, 7 ),  1 );
-
-        EXPECT_EQ( m_host.getElement( 5, 0 ),  0 );
-        EXPECT_EQ( m_host.getElement( 5, 1 ), 16 );
-        EXPECT_EQ( m_host.getElement( 5, 2 ), 17 );
-        EXPECT_EQ( m_host.getElement( 5, 3 ), 18 );
-        EXPECT_EQ( m_host.getElement( 5, 4 ), 19 );
-        EXPECT_EQ( m_host.getElement( 5, 5 ), 20 );
-        EXPECT_EQ( m_host.getElement( 5, 6 ), 21 );
-        EXPECT_EQ( m_host.getElement( 5, 7 ),  1 );
-
-        EXPECT_EQ( m_host.getElement( 6, 0 ), 22 );
-        EXPECT_EQ( m_host.getElement( 6, 1 ), 23 );
-        EXPECT_EQ( m_host.getElement( 6, 2 ), 24 );
-        EXPECT_EQ( m_host.getElement( 6, 3 ), 25 );
-        EXPECT_EQ( m_host.getElement( 6, 4 ), 26 );
-        EXPECT_EQ( m_host.getElement( 6, 5 ), 27 );
-        EXPECT_EQ( m_host.getElement( 6, 6 ), 28 );
-        EXPECT_EQ( m_host.getElement( 6, 7 ),  1 );
-
-        EXPECT_EQ( m_host.getElement( 7, 0 ), 29 );
-        EXPECT_EQ( m_host.getElement( 7, 1 ), 30 );
-        EXPECT_EQ( m_host.getElement( 7, 2 ), 31 );
-        EXPECT_EQ( m_host.getElement( 7, 3 ), 32 );
-        EXPECT_EQ( m_host.getElement( 7, 4 ), 33 );
-        EXPECT_EQ( m_host.getElement( 7, 5 ), 34 );
-        EXPECT_EQ( m_host.getElement( 7, 6 ), 35 );
-        EXPECT_EQ( m_host.getElement( 7, 7 ), 36 );
-
-        // Try vectorProduct with copied cuda matrix to see if it works correctly.
-        using VectorType = TNL::Containers::Vector< RealType, TNL::Devices::Cuda, IndexType >;
-
-        VectorType inVector;
-        inVector.setSize( m_cols );
-        for( IndexType i = 0; i < inVector.getSize(); i++ )
-            inVector.setElement( i, 2 );
-
-        VectorType outVector;
-        outVector.setSize( m_rows );
-        for( IndexType j = 0; j < outVector.getSize(); j++ )
-            outVector.setElement( j, 0 );
-
-        m_cuda.vectorProduct( inVector, outVector );
-
-        EXPECT_EQ( outVector.getElement( 0 ),  32 );
-        EXPECT_EQ( outVector.getElement( 1 ),  28 );
-        EXPECT_EQ( outVector.getElement( 2 ),  56 );
-        EXPECT_EQ( outVector.getElement( 3 ), 102 );
-        EXPECT_EQ( outVector.getElement( 4 ),  32 );
-        EXPECT_EQ( outVector.getElement( 5 ), 224 );
-        EXPECT_EQ( outVector.getElement( 6 ), 352 );
-        EXPECT_EQ( outVector.getElement( 7 ), 520 );
-   }
+   EXPECT_EQ( xVector[ 0 ], 0.0 );
+   EXPECT_EQ( xVector[ 1 ], 0.0 );
+   EXPECT_EQ( xVector[ 2 ], 0.0 );
+   EXPECT_EQ( xVector[ 3 ], 0.25 );
 }
 
 template< typename Matrix >
@@ -1583,149 +1206,136 @@ void test_SaveAndLoad( const char* filename )
     *    \  0  9 10 11 /
     */
 
-    const IndexType m_rows = 4;
-    const IndexType m_cols = 4;
-
-    Matrix savedMatrix;
-    savedMatrix.reset();
-    savedMatrix.setDimensions( m_rows, m_cols );
-    typename Matrix::CompressedRowLengthsVector rowLengths;
-    rowLengths.setSize( m_rows );
-    rowLengths.setValue( 3 );
-    savedMatrix.setCompressedRowLengths( rowLengths );
-
-    RealType value = 1;
-    for( IndexType i = 0; i < m_cols - 1; i++ )   // 0th row
-        savedMatrix.setElement( 0, i, value++ );
+   const IndexType m_rows = 4;
+   const IndexType m_cols = 4;
 
-    savedMatrix.setElement( 1, 1, value++ );
-    savedMatrix.setElement( 1, 3, value++ );      // 1st row
+   Matrix savedMatrix( m_rows, m_cols );
+   typename Matrix::CompressedRowLengthsVector rowLengths( m_rows );
+   rowLengths = 3;
+   savedMatrix.setCompressedRowLengths( rowLengths );
 
-    for( IndexType i = 0; i < m_cols - 1; i++ )   // 2nd row
-        savedMatrix.setElement( 2, i, value++ );
+   RealType value = 1;
+   for( IndexType i = 0; i < m_cols - 1; i++ )   // 0th row
+      savedMatrix.setElement( 0, i, value++ );
 
-    for( IndexType i = 1; i < m_cols; i++ )       // 3rd row
-        savedMatrix.setElement( 3, i, value++ );
+   savedMatrix.setElement( 1, 1, value++ );
+   savedMatrix.setElement( 1, 3, value++ );      // 1st row
 
-    ASSERT_NO_THROW( savedMatrix.save( filename ) );
+   for( IndexType i = 0; i < m_cols - 1; i++ )   // 2nd row
+      savedMatrix.setElement( 2, i, value++ );
 
-    Matrix loadedMatrix;
-    loadedMatrix.reset();
-    loadedMatrix.setDimensions( m_rows, m_cols );
-    typename Matrix::CompressedRowLengthsVector rowLengths2;
-    rowLengths2.setSize( m_rows );
-    rowLengths2.setValue( 3 );
-    loadedMatrix.setCompressedRowLengths( rowLengths2 );
+   for( IndexType i = 1; i < m_cols; i++ )       // 3rd row
+      savedMatrix.setElement( 3, i, value++ );
 
+   ASSERT_NO_THROW( savedMatrix.save( filename ) );
 
-    ASSERT_NO_THROW( loadedMatrix.load( filename ) );
+   Matrix loadedMatrix( m_rows, m_cols );
+   typename Matrix::CompressedRowLengthsVector rowLengths2( m_rows );
+   rowLengths2 = 3;
+   loadedMatrix.setCompressedRowLengths( rowLengths2 );
 
+   ASSERT_NO_THROW( loadedMatrix.load( filename ) );
 
-    EXPECT_EQ( savedMatrix.getElement( 0, 0 ), loadedMatrix.getElement( 0, 0 ) );
-    EXPECT_EQ( savedMatrix.getElement( 0, 1 ), loadedMatrix.getElement( 0, 1 ) );
-    EXPECT_EQ( savedMatrix.getElement( 0, 2 ), loadedMatrix.getElement( 0, 2 ) );
-    EXPECT_EQ( savedMatrix.getElement( 0, 3 ), loadedMatrix.getElement( 0, 3 ) );
+   EXPECT_EQ( savedMatrix.getElement( 0, 0 ), loadedMatrix.getElement( 0, 0 ) );
+   EXPECT_EQ( savedMatrix.getElement( 0, 1 ), loadedMatrix.getElement( 0, 1 ) );
+   EXPECT_EQ( savedMatrix.getElement( 0, 2 ), loadedMatrix.getElement( 0, 2 ) );
+   EXPECT_EQ( savedMatrix.getElement( 0, 3 ), loadedMatrix.getElement( 0, 3 ) );
 
-    EXPECT_EQ( savedMatrix.getElement( 1, 0 ), loadedMatrix.getElement( 1, 0 ) );
-    EXPECT_EQ( savedMatrix.getElement( 1, 1 ), loadedMatrix.getElement( 1, 1 ) );
-    EXPECT_EQ( savedMatrix.getElement( 1, 2 ), loadedMatrix.getElement( 1, 2 ) );
-    EXPECT_EQ( savedMatrix.getElement( 1, 3 ), loadedMatrix.getElement( 1, 3 ) );
+   EXPECT_EQ( savedMatrix.getElement( 1, 0 ), loadedMatrix.getElement( 1, 0 ) );
+   EXPECT_EQ( savedMatrix.getElement( 1, 1 ), loadedMatrix.getElement( 1, 1 ) );
+   EXPECT_EQ( savedMatrix.getElement( 1, 2 ), loadedMatrix.getElement( 1, 2 ) );
+   EXPECT_EQ( savedMatrix.getElement( 1, 3 ), loadedMatrix.getElement( 1, 3 ) );
 
-    EXPECT_EQ( savedMatrix.getElement( 2, 0 ), loadedMatrix.getElement( 2, 0 ) );
-    EXPECT_EQ( savedMatrix.getElement( 2, 1 ), loadedMatrix.getElement( 2, 1 ) );
-    EXPECT_EQ( savedMatrix.getElement( 2, 2 ), loadedMatrix.getElement( 2, 2 ) );
-    EXPECT_EQ( savedMatrix.getElement( 2, 3 ), loadedMatrix.getElement( 2, 3 ) );
+   EXPECT_EQ( savedMatrix.getElement( 2, 0 ), loadedMatrix.getElement( 2, 0 ) );
+   EXPECT_EQ( savedMatrix.getElement( 2, 1 ), loadedMatrix.getElement( 2, 1 ) );
+   EXPECT_EQ( savedMatrix.getElement( 2, 2 ), loadedMatrix.getElement( 2, 2 ) );
+   EXPECT_EQ( savedMatrix.getElement( 2, 3 ), loadedMatrix.getElement( 2, 3 ) );
 
-    EXPECT_EQ( savedMatrix.getElement( 3, 0 ), loadedMatrix.getElement( 3, 0 ) );
-    EXPECT_EQ( savedMatrix.getElement( 3, 1 ), loadedMatrix.getElement( 3, 1 ) );
-    EXPECT_EQ( savedMatrix.getElement( 3, 2 ), loadedMatrix.getElement( 3, 2 ) );
-    EXPECT_EQ( savedMatrix.getElement( 3, 3 ), loadedMatrix.getElement( 3, 3 ) );
+   EXPECT_EQ( savedMatrix.getElement( 3, 0 ), loadedMatrix.getElement( 3, 0 ) );
+   EXPECT_EQ( savedMatrix.getElement( 3, 1 ), loadedMatrix.getElement( 3, 1 ) );
+   EXPECT_EQ( savedMatrix.getElement( 3, 2 ), loadedMatrix.getElement( 3, 2 ) );
+   EXPECT_EQ( savedMatrix.getElement( 3, 3 ), loadedMatrix.getElement( 3, 3 ) );
 
-    EXPECT_EQ( savedMatrix.getElement( 0, 0 ),  1 );
-    EXPECT_EQ( savedMatrix.getElement( 0, 1 ),  2 );
-    EXPECT_EQ( savedMatrix.getElement( 0, 2 ),  3 );
-    EXPECT_EQ( savedMatrix.getElement( 0, 3 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 0, 0 ),  1 );
+   EXPECT_EQ( savedMatrix.getElement( 0, 1 ),  2 );
+   EXPECT_EQ( savedMatrix.getElement( 0, 2 ),  3 );
+   EXPECT_EQ( savedMatrix.getElement( 0, 3 ),  0 );
 
-    EXPECT_EQ( savedMatrix.getElement( 1, 0 ),  0 );
-    EXPECT_EQ( savedMatrix.getElement( 1, 1 ),  4 );
-    EXPECT_EQ( savedMatrix.getElement( 1, 2 ),  0 );
-    EXPECT_EQ( savedMatrix.getElement( 1, 3 ),  5 );
+   EXPECT_EQ( savedMatrix.getElement( 1, 0 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 1, 1 ),  4 );
+   EXPECT_EQ( savedMatrix.getElement( 1, 2 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 1, 3 ),  5 );
 
-    EXPECT_EQ( savedMatrix.getElement( 2, 0 ),  6 );
-    EXPECT_EQ( savedMatrix.getElement( 2, 1 ),  7 );
-    EXPECT_EQ( savedMatrix.getElement( 2, 2 ),  8 );
-    EXPECT_EQ( savedMatrix.getElement( 2, 3 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 2, 0 ),  6 );
+   EXPECT_EQ( savedMatrix.getElement( 2, 1 ),  7 );
+   EXPECT_EQ( savedMatrix.getElement( 2, 2 ),  8 );
+   EXPECT_EQ( savedMatrix.getElement( 2, 3 ),  0 );
 
-    EXPECT_EQ( savedMatrix.getElement( 3, 0 ),  0 );
-    EXPECT_EQ( savedMatrix.getElement( 3, 1 ),  9 );
-    EXPECT_EQ( savedMatrix.getElement( 3, 2 ), 10 );
-    EXPECT_EQ( savedMatrix.getElement( 3, 3 ), 11 );
+   EXPECT_EQ( savedMatrix.getElement( 3, 0 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 3, 1 ),  9 );
+   EXPECT_EQ( savedMatrix.getElement( 3, 2 ), 10 );
+   EXPECT_EQ( savedMatrix.getElement( 3, 3 ), 11 );
 
-    EXPECT_EQ( std::remove( filename ), 0 );
+   EXPECT_EQ( std::remove( filename ), 0 );
 }
 
 template< typename Matrix >
 void test_Print()
 {
-    using RealType = typename Matrix::RealType;
-    using DeviceType = typename Matrix::DeviceType;
-    using IndexType = typename Matrix::IndexType;
-
-/*
- * Sets up the following 5x4 sparse matrix:
- *
- *    /  1  2  3  0 \
- *    |  0  0  0  4 |
- *    |  5  6  7  0 |
- *    |  0  8  9 10 |
- *    \  0  0 11 12 /
- */
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
 
-    const IndexType m_rows = 5;
-    const IndexType m_cols = 4;
+   /*
+    * Sets up the following 5x4 sparse matrix:
+    *
+    *    /  1  2  3  0 \
+    *    |  0  0  0  4 |
+    *    |  5  6  7  0 |
+    *    |  0  8  9 10 |
+    *    \  0  0 11 12 /
+    */
 
-    Matrix m;
-    m.reset();
-    m.setDimensions( m_rows, m_cols );
-    typename Matrix::CompressedRowLengthsVector rowLengths;
-    rowLengths.setSize( m_rows );
-    rowLengths.setValue( 3 );
-    m.setCompressedRowLengths( rowLengths );
+   const IndexType m_rows = 5;
+   const IndexType m_cols = 4;
 
-    RealType value = 1;
-    for( IndexType i = 0; i < m_cols - 1; i++ )   // 0th row
-        m.setElement( 0, i, value++ );
+   Matrix m( m_rows, m_cols );
+   typename Matrix::CompressedRowLengthsVector rowLengths( m_rows );
+   rowLengths = 3;
+   m.setCompressedRowLengths( rowLengths );
 
-    m.setElement( 1, 3, value++ );      // 1st row
+   RealType value = 1;
+   for( IndexType i = 0; i < m_cols - 1; i++ )   // 0th row
+      m.setElement( 0, i, value++ );
 
-    for( IndexType i = 0; i < m_cols - 1; i++ )   // 2nd row
-        m.setElement( 2, i, value++ );
+   m.setElement( 1, 3, value++ );                // 1st row
 
-    for( IndexType i = 1; i < m_cols; i++ )       // 3rd row
-        m.setElement( 3, i, value++ );
+   for( IndexType i = 0; i < m_cols - 1; i++ )   // 2nd row
+      m.setElement( 2, i, value++ );
 
-    for( IndexType i = 2; i < m_cols; i++ )       // 4th row
-        m.setElement( 4, i, value++ );
+   for( IndexType i = 1; i < m_cols; i++ )       // 3rd row
+      m.setElement( 3, i, value++ );
 
-    #include <sstream>
-    std::stringstream printed;
-    std::stringstream couted;
+   for( IndexType i = 2; i < m_cols; i++ )       // 4th row
+      m.setElement( 4, i, value++ );
 
-    //change the underlying buffer and save the old buffer
-    auto old_buf = std::cout.rdbuf(printed.rdbuf());
+   std::stringstream printed;
+   std::stringstream couted;
 
-    m.print( std::cout ); //all the std::cout goes to ss
+   //change the underlying buffer and save the old buffer
+   auto old_buf = std::cout.rdbuf(printed.rdbuf());
 
-    std::cout.rdbuf(old_buf); //reset
+   m.print( std::cout ); //all the std::cout goes to ss
 
-    couted << "Row: 0 ->  Col:0->1	 Col:1->2	 Col:2->3\t\n"
-               "Row: 1 ->  Col:3->4\t\n"
-               "Row: 2 ->  Col:0->5	 Col:1->6	 Col:2->7\t\n"
-               "Row: 3 ->  Col:1->8	 Col:2->9	 Col:3->10\t\n"
-               "Row: 4 ->  Col:2->11	 Col:3->12\t\n";
+   std::cout.rdbuf(old_buf); //reset
 
+   couted << "Row: 0 ->  Col:0->1	 Col:1->2	 Col:2->3\t\n"
+             "Row: 1 ->  Col:3->4\t\n"
+             "Row: 2 ->  Col:0->5	 Col:1->6	 Col:2->7\t\n"
+             "Row: 3 ->  Col:1->8	 Col:2->9	 Col:3->10\t\n"
+             "Row: 4 ->  Col:2->11	 Col:3->12\t\n";
 
-    EXPECT_EQ( printed.str(), couted.str() );
+   EXPECT_EQ( printed.str(), couted.str() );
 }
 
 #endif
-- 
GitLab


From 52c9d170fd45dd27850c89d9d280cb1b5515c4bc Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Fri, 7 Feb 2020 15:29:18 +0100
Subject: [PATCH 134/179] Added Array constructor with size and value.

---
 src/TNL/Containers/Array.h           |  9 +++++++++
 src/TNL/Containers/Array.hpp         | 12 ++++++++++++
 src/UnitTests/Containers/ArrayTest.h |  5 +++++
 3 files changed, 26 insertions(+)

diff --git a/src/TNL/Containers/Array.h b/src/TNL/Containers/Array.h
index a9705e66f..bf69f4888 100644
--- a/src/TNL/Containers/Array.h
+++ b/src/TNL/Containers/Array.h
@@ -137,6 +137,15 @@ class Array
        */
       explicit Array( const IndexType& size, const AllocatorType& allocator = AllocatorType() );
 
+      /**
+       * \brief Constructs an array with given size and value.
+       *
+       * \param size The number of array elements to be allocated.
+       * \param value The value all elements will be set to.
+       * \param allocator The allocator to be associated with this array.
+       */
+      explicit Array( const IndexType& size, const Value& value, const AllocatorType& allocator = AllocatorType() );
+
       /**
        * \brief Constructs an array with given size and copies data from given
        * pointer.
diff --git a/src/TNL/Containers/Array.hpp b/src/TNL/Containers/Array.hpp
index 2a60986f5..4dd8d5a2f 100644
--- a/src/TNL/Containers/Array.hpp
+++ b/src/TNL/Containers/Array.hpp
@@ -62,6 +62,18 @@ Array( const IndexType& size, const AllocatorType& allocator )
    this->setSize( size );
 }
 
+template< typename Value,
+          typename Device,
+          typename Index,
+          typename Allocator >
+Array< Value, Device, Index, Allocator >::
+Array( const IndexType& size, const Value& value, const AllocatorType& allocator )
+: allocator( allocator )
+{
+   this->setSize( size );
+   ( *this ) = value;
+}
+
 template< typename Value,
           typename Device,
           typename Index,
diff --git a/src/UnitTests/Containers/ArrayTest.h b/src/UnitTests/Containers/ArrayTest.h
index ef3119365..255a67fb9 100644
--- a/src/UnitTests/Containers/ArrayTest.h
+++ b/src/UnitTests/Containers/ArrayTest.h
@@ -135,6 +135,11 @@ TYPED_TEST( ArrayTest, constructors )
    v = 0;
    EXPECT_EQ( v.getSize(), 10 );
 
+   ArrayType vv( 10, 4 );
+   EXPECT_EQ( vv.getSize(), 10 );
+   for( int i = 0; i < 10; i++ )
+      EXPECT_EQ( vv.getElement( i ), 4 );
+
    // deep copy
    ArrayType w( v );
    EXPECT_NE( w.getData(), v.getData() );
-- 
GitLab


From 5d79ecbc2e7470ae246cd568b3b389a37514111d Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Fri, 7 Feb 2020 23:02:07 +0100
Subject: [PATCH 135/179] Added SparseMatrix constructors with initializer
 lists.

---
 src/TNL/Matrices/SparseMatrix.h           |  11 +++
 src/TNL/Matrices/SparseMatrix.hpp         |  42 ++++++++
 src/UnitTests/Matrices/SparseMatrixTest.h | 115 ++++++++++++++++------
 3 files changed, 136 insertions(+), 32 deletions(-)

diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h
index 7072ce3c4..15f585716 100644
--- a/src/TNL/Matrices/SparseMatrix.h
+++ b/src/TNL/Matrices/SparseMatrix.h
@@ -74,6 +74,17 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
                     const RealAllocatorType& realAllocator = RealAllocatorType(),
                     const IndexAllocatorType& indexAllocator = IndexAllocatorType() );
 
+      SparseMatrix( const std::initializer_list< std::tuple< IndexType > >& rowCapacities,
+                    const IndexType columns,
+                    const RealAllocatorType& realAllocator = RealAllocatorType(),
+                    const IndexAllocatorType& indexAllocator = IndexAllocatorType() );
+
+      SparseMatrix( const IndexType rows,
+                    const IndexType columns,
+                    const std::initializer_list< std::tuple< IndexType, IndexType, RealType > >& data,
+                    const RealAllocatorType& realAllocator = RealAllocatorType(),
+                    const IndexAllocatorType& indexAllocator = IndexAllocatorType() );
+
       ViewType getView() const; // TODO: remove const
 
       ConstViewType getConstView() const;
diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp
index 992443434..938d883af 100644
--- a/src/TNL/Matrices/SparseMatrix.hpp
+++ b/src/TNL/Matrices/SparseMatrix.hpp
@@ -73,6 +73,48 @@ SparseMatrix( const IndexType rows,
 {
 }
 
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
+          typename RealAllocator,
+          typename IndexAllocator >
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
+SparseMatrix( const std::initializer_list< std::tuple< IndexType > >& rowCapacities,
+              const IndexType columns,
+              const RealAllocatorType& realAllocator,
+              const IndexAllocatorType& indexAllocator )
+: BaseType( rowCapacities.size(), columns, realAllocator ), columnIndexes( indexAllocator )
+{
+   this->setCompressedRowLengths( RowCapacitiesType ( rowCapacities ) );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
+          typename RealAllocator,
+          typename IndexAllocator >
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
+SparseMatrix( const IndexType rows,
+              const IndexType columns,
+              const std::initializer_list< std::tuple< IndexType, IndexType, RealType > >& data,
+              const RealAllocatorType& realAllocator,
+              const IndexAllocatorType& indexAllocator )
+: BaseType( rows, columns, realAllocator ), columnIndexes( indexAllocator )
+{
+   Containers::Vector< IndexType, Devices::Host, IndexType > rowCapacities( rows, 0 );
+   for( const auto& i : data )
+      rowCapacities[ std::get< 0 >( i ) ]++;
+   SparseMatrix< Real, Devices::Host, Index, MatrixType, Segments > hostMatrix( rows, columns );
+   hostMatrix.setCompressedRowLength( rowCapacities );
+   for( const auto& i : data )
+      hostMatrix.setElement( std::get< 0 >( i ), std::get< 1 >( i ), std::get< 2 >( i ) );
+   ( *this ) = hostMatrix;
+}
+
 template< typename Real,
           typename Device,
           typename Index,
diff --git a/src/UnitTests/Matrices/SparseMatrixTest.h b/src/UnitTests/Matrices/SparseMatrixTest.h
index 04a9b065f..26b15fafd 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest.h
+++ b/src/UnitTests/Matrices/SparseMatrixTest.h
@@ -36,6 +36,79 @@ void cuda_test_GetType()
    std::cerr << "This test has not been implemented properly yet.\n" << std::endl;
 }
 
+template< typename Matrix >
+void test_Constructors()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   Matrix m1( 5, 6 );
+   EXPECT_EQ( m1.getRows(), 5 );
+   EXPECT_EQ( m1.getColumns(), 6 );
+
+   Matrix m2( {1, 2, 2, 2, 1 }, 5 );
+   typename Matrix::RowCapacitiesType v1, v2{ 1, 2, 2, 2, 1 }; 
+   m2.getCompressedRowLength( v1 );
+   EXPECT_EQ( v1, v2 );
+
+   /*
+    * Sets up the following 6x5 sparse matrix:
+    *
+    *    /  1  2  3  0  0 \
+    *    |  0  4  5  6  0 |
+    *    |  0  0  7  8  9 |
+    *    | 10  0  0  0  0 |
+    *    |  0 11  0  0  0 |
+    *    \  0  0  0 12  0 /
+    */
+
+   Matrix m3( 6, 5, {
+      { 0, 0,  1.0 }, { 0, 1, 2.0 }, { 0, 2, 3.0 },
+      { 1, 1,  4.0 }, { 1, 2, 5.0 }, { 1, 3, 6.0 },
+      { 2, 2,  7.0 }, { 2, 3, 8.0 }, { 2, 4, 9.0 },
+      { 3, 0, 10.0 },
+      { 4, 1, 11.0 },
+      { 5, 3, 12.0 } } );
+
+   // Check the set elements
+   EXPECT_EQ( m3.getElement( 0, 0 ),  1 );
+   EXPECT_EQ( m3.getElement( 0, 1 ),  2 );
+   EXPECT_EQ( m3.getElement( 0, 2 ),  3 );
+   EXPECT_EQ( m3.getElement( 0, 3 ),  0 );
+   EXPECT_EQ( m3.getElement( 0, 4 ),  0 );
+
+   EXPECT_EQ( m3.getElement( 1, 0 ),  0 );
+   EXPECT_EQ( m3.getElement( 1, 1 ),  4 );
+   EXPECT_EQ( m3.getElement( 1, 2 ),  5 );
+   EXPECT_EQ( m3.getElement( 1, 3 ),  6 );
+   EXPECT_EQ( m3.getElement( 1, 4 ),  0 );
+
+   EXPECT_EQ( m3.getElement( 2, 0 ),  0 );
+   EXPECT_EQ( m3.getElement( 2, 1 ),  0 );
+   EXPECT_EQ( m3.getElement( 2, 2 ),  7 );
+   EXPECT_EQ( m3.getElement( 2, 3 ),  8 );
+   EXPECT_EQ( m3.getElement( 2, 4 ),  9 );
+
+   EXPECT_EQ( m3.getElement( 3, 0 ), 10 );
+   EXPECT_EQ( m3.getElement( 3, 1 ),  0 );
+   EXPECT_EQ( m3.getElement( 3, 2 ),  0 );
+   EXPECT_EQ( m3.getElement( 3, 3 ),  0 );
+   EXPECT_EQ( m3.getElement( 3, 4 ),  0 );
+
+   EXPECT_EQ( m3.getElement( 4, 0 ),  0 );
+   EXPECT_EQ( m3.getElement( 4, 1 ), 11 );
+   EXPECT_EQ( m3.getElement( 4, 2 ),  0 );
+   EXPECT_EQ( m3.getElement( 4, 3 ),  0 );
+   EXPECT_EQ( m3.getElement( 4, 4 ),  0 );
+
+   EXPECT_EQ( m3.getElement( 5, 0 ),  0 );
+   EXPECT_EQ( m3.getElement( 5, 1 ),  0 );
+   EXPECT_EQ( m3.getElement( 5, 2 ),  0 );
+   EXPECT_EQ( m3.getElement( 5, 3 ), 12 );
+   EXPECT_EQ( m3.getElement( 5, 4 ),  0 );
+}
+
 template< typename Matrix >
 void test_SetDimensions()
 {
@@ -64,9 +137,7 @@ void test_SetCompressedRowLengths()
    const IndexType cols = 11;
 
    Matrix m( rows, cols );
-   typename Matrix::CompressedRowLengthsVector rowLengths;
-   rowLengths.setSize( rows );
-   rowLengths = 3;
+   typename Matrix::CompressedRowLengthsVector rowLengths( rows, 3 );
 
    IndexType rowLength = 1;
    for( IndexType i = 2; i < rows; i++ )
@@ -592,8 +663,7 @@ void test_AddElement()
    const IndexType cols = 5;
 
    Matrix m( rows, cols );
-   typename Matrix::CompressedRowLengthsVector rowLengths( rows );
-   rowLengths = 3;
+   typename Matrix::CompressedRowLengthsVector rowLengths( rows, 3 );
    m.setCompressedRowLengths( rowLengths );
 
    RealType value = 1;
@@ -742,12 +812,7 @@ void test_VectorProduct()
    Matrix m_1;
    m_1.reset();
    m_1.setDimensions( m_rows_1, m_cols_1 );
-   typename Matrix::CompressedRowLengthsVector rowLengths_1;
-   rowLengths_1.setSize( m_rows_1 );
-   rowLengths_1.setElement( 0, 1 );
-   rowLengths_1.setElement( 1, 2 );
-   rowLengths_1.setElement( 2, 1 );
-   rowLengths_1.setElement( 3, 1 );
+   typename Matrix::CompressedRowLengthsVector rowLengths_1{ 1, 2, 1, 1 };
    m_1.setCompressedRowLengths( rowLengths_1 );
 
    RealType value_1 = 1;
@@ -770,10 +835,8 @@ void test_VectorProduct()
    for( IndexType j = 0; j < outVector_1.getSize(); j++ )
        outVector_1.setElement( j, 0 );
 
-
    m_1.vectorProduct( inVector_1, outVector_1 );
 
-
    EXPECT_EQ( outVector_1.getElement( 0 ),  2 );
    EXPECT_EQ( outVector_1.getElement( 1 ), 10 );
    EXPECT_EQ( outVector_1.getElement( 2 ),  8 );
@@ -793,21 +856,18 @@ void test_VectorProduct()
 
    Matrix m_2( m_rows_2, m_cols_2 );
    typename Matrix::CompressedRowLengthsVector rowLengths_2{ 3, 1, 3, 1 };
-   /*rowLengths_2 = 3;
-   rowLengths_2.setElement( 1, 1 );
-   rowLengths_2.setElement( 3, 1 );*/
    m_2.setCompressedRowLengths( rowLengths_2 );
 
    RealType value_2 = 1;
-   for( IndexType i = 0; i < 3; i++ )   // 0th row
+   for( IndexType i = 0; i < 3; i++ )      // 0th row
       m_2.setElement( 0, i, value_2++ );
 
    m_2.setElement( 1, 3, value_2++ );      // 1st row
 
-   for( IndexType i = 0; i < 3; i++ )   // 2nd row
+   for( IndexType i = 0; i < 3; i++ )      // 2nd row
       m_2.setElement( 2, i, value_2++ );
 
-   for( IndexType i = 1; i < 2; i++ )       // 3rd row
+   for( IndexType i = 1; i < 2; i++ )      // 3rd row
       m_2.setElement( 3, i, value_2++ );
 
    VectorType inVector_2;
@@ -891,11 +951,6 @@ void test_VectorProduct()
 
    Matrix m_4( m_rows_4, m_cols_4 );
    typename Matrix::CompressedRowLengthsVector rowLengths_4{ 4, 4, 5, 4, 4, 4, 5, 5 };
-   /*rowLengths_4.setSize( m_rows_4 );
-   rowLengths_4.setValue( 4 );
-   rowLengths_4.setElement( 2, 5 );
-   rowLengths_4.setElement( 6, 5 );
-   rowLengths_4.setElement( 7, 5 );*/
    m_4.setCompressedRowLengths( rowLengths_4 );
 
    RealType value_4 = 1;
@@ -1137,8 +1192,7 @@ void test_PerformSORIteration()
    const IndexType m_cols = 4;
 
    Matrix m( m_rows, m_cols );
-   typename Matrix::CompressedRowLengthsVector rowLengths( m_rows );
-   rowLengths = 3;
+   typename Matrix::CompressedRowLengthsVector rowLengths( m_rows, 3 );
    m.setCompressedRowLengths( rowLengths );
 
    m.setElement( 0, 0, 4.0 );        // 0th row
@@ -1210,8 +1264,7 @@ void test_SaveAndLoad( const char* filename )
    const IndexType m_cols = 4;
 
    Matrix savedMatrix( m_rows, m_cols );
-   typename Matrix::CompressedRowLengthsVector rowLengths( m_rows );
-   rowLengths = 3;
+   typename Matrix::CompressedRowLengthsVector rowLengths( m_rows, 3 );
    savedMatrix.setCompressedRowLengths( rowLengths );
 
    RealType value = 1;
@@ -1230,8 +1283,7 @@ void test_SaveAndLoad( const char* filename )
    ASSERT_NO_THROW( savedMatrix.save( filename ) );
 
    Matrix loadedMatrix( m_rows, m_cols );
-   typename Matrix::CompressedRowLengthsVector rowLengths2( m_rows );
-   rowLengths2 = 3;
+   typename Matrix::CompressedRowLengthsVector rowLengths2( m_rows, 3 );
    loadedMatrix.setCompressedRowLengths( rowLengths2 );
 
    ASSERT_NO_THROW( loadedMatrix.load( filename ) );
@@ -1300,8 +1352,7 @@ void test_Print()
    const IndexType m_cols = 4;
 
    Matrix m( m_rows, m_cols );
-   typename Matrix::CompressedRowLengthsVector rowLengths( m_rows );
-   rowLengths = 3;
+   typename Matrix::CompressedRowLengthsVector rowLengths( m_rows, 3 );
    m.setCompressedRowLengths( rowLengths );
 
    RealType value = 1;
-- 
GitLab


From c869e97679ebe72877d1ce94c19a1a5ccb9aae13 Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Sat, 8 Feb 2020 11:17:15 +0100
Subject: [PATCH 136/179] Fixing sparse matrix constructors with initializer
 list together with unit tests.

---
 src/TNL/Matrices/SparseMatrix.h               |  2 +-
 src/TNL/Matrices/SparseMatrix.hpp             |  6 ++---
 src/UnitTests/Matrices/SparseMatrixTest.h     | 25 +++++++++++++++----
 src/UnitTests/Matrices/SparseMatrixTest_CSR.h |  7 ++++++
 .../Matrices/SparseMatrixTest_Ellpack.h       |  7 ++++++
 .../Matrices/SparseMatrixTest_SlicedEllpack.h |  7 ++++++
 6 files changed, 45 insertions(+), 9 deletions(-)

diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h
index 15f585716..573d382ce 100644
--- a/src/TNL/Matrices/SparseMatrix.h
+++ b/src/TNL/Matrices/SparseMatrix.h
@@ -74,7 +74,7 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
                     const RealAllocatorType& realAllocator = RealAllocatorType(),
                     const IndexAllocatorType& indexAllocator = IndexAllocatorType() );
 
-      SparseMatrix( const std::initializer_list< std::tuple< IndexType > >& rowCapacities,
+      SparseMatrix( const std::initializer_list< IndexType >& rowCapacities,
                     const IndexType columns,
                     const RealAllocatorType& realAllocator = RealAllocatorType(),
                     const IndexAllocatorType& indexAllocator = IndexAllocatorType() );
diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp
index 938d883af..df841230c 100644
--- a/src/TNL/Matrices/SparseMatrix.hpp
+++ b/src/TNL/Matrices/SparseMatrix.hpp
@@ -81,13 +81,13 @@ template< typename Real,
           typename RealAllocator,
           typename IndexAllocator >
 SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
-SparseMatrix( const std::initializer_list< std::tuple< IndexType > >& rowCapacities,
+SparseMatrix( const std::initializer_list< IndexType >& rowCapacities,
               const IndexType columns,
               const RealAllocatorType& realAllocator,
               const IndexAllocatorType& indexAllocator )
 : BaseType( rowCapacities.size(), columns, realAllocator ), columnIndexes( indexAllocator )
 {
-   this->setCompressedRowLengths( RowCapacitiesType ( rowCapacities ) );
+   this->setCompressedRowLengths( RowsCapacitiesType( rowCapacities ) );
 }
 
 template< typename Real,
@@ -109,7 +109,7 @@ SparseMatrix( const IndexType rows,
    for( const auto& i : data )
       rowCapacities[ std::get< 0 >( i ) ]++;
    SparseMatrix< Real, Devices::Host, Index, MatrixType, Segments > hostMatrix( rows, columns );
-   hostMatrix.setCompressedRowLength( rowCapacities );
+   hostMatrix.setCompressedRowLengths( rowCapacities );
    for( const auto& i : data )
       hostMatrix.setElement( std::get< 0 >( i ), std::get< 1 >( i ), std::get< 2 >( i ) );
    ( *this ) = hostMatrix;
diff --git a/src/UnitTests/Matrices/SparseMatrixTest.h b/src/UnitTests/Matrices/SparseMatrixTest.h
index 26b15fafd..72eb0b33b 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest.h
+++ b/src/UnitTests/Matrices/SparseMatrixTest.h
@@ -48,8 +48,17 @@ void test_Constructors()
    EXPECT_EQ( m1.getColumns(), 6 );
 
    Matrix m2( {1, 2, 2, 2, 1 }, 5 );
-   typename Matrix::RowCapacitiesType v1, v2{ 1, 2, 2, 2, 1 }; 
-   m2.getCompressedRowLength( v1 );
+   typename Matrix::RowsCapacitiesType v1, v2{ 1, 2, 2, 2, 1 }; 
+   m2.setElement( 0, 0, 1 );   // 0th row
+   m2.setElement( 1, 0, 1 );   // 1st row
+   m2.setElement( 1, 1, 1 );   
+   m2.setElement( 2, 1, 1 );   // 2nd row
+   m2.setElement( 2, 2, 1 );
+   m2.setElement( 3, 2, 1 );   // 3rd row
+   m2.setElement( 3, 3, 1 );
+   m2.setElement( 4, 4, 1 );   // 4th row
+   m2.getCompressedRowLengths( v1 );
+   
    EXPECT_EQ( v1, v2 );
 
    /*
@@ -662,8 +671,14 @@ void test_AddElement()
    const IndexType rows = 6;
    const IndexType cols = 5;
 
-   Matrix m( rows, cols );
-   typename Matrix::CompressedRowLengthsVector rowLengths( rows, 3 );
+   Matrix m( rows, cols, {
+      { 0, 0,  1 }, { 0, 1,  2 }, { 0, 2, 3 },
+                    { 1, 1,  4 }, { 1, 2, 5 }, { 1, 3,  6 },
+                                  { 2, 2, 7 }, { 2, 3,  8 }, { 2, 4, 9 },
+      { 3, 0, 10 }, { 3, 1,  0 }, { 3, 2, 0 },
+                    { 4, 1, 11 }, { 4, 2, 0 }, { 4, 3,  0 },
+                                  { 5, 2, 0 }, { 5, 3, 12 }, { 5, 4, 0 } } );
+   /*typename Matrix::CompressedRowLengthsVector rowLengths( rows, 3 );
    m.setCompressedRowLengths( rowLengths );
 
    RealType value = 1;
@@ -680,7 +695,7 @@ void test_AddElement()
 
    m.setElement( 4, 1, value++ );      // 4th row
 
-   m.setElement( 5, 3, value++ );      // 5th row
+   m.setElement( 5, 3, value++ );      // 5th row*/
 
 
    // Check the set elements
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR.h b/src/UnitTests/Matrices/SparseMatrixTest_CSR.h
index 781735e7f..f029c3bc7 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_CSR.h
+++ b/src/UnitTests/Matrices/SparseMatrixTest_CSR.h
@@ -59,6 +59,13 @@ using CSRMatrixTypes = ::testing::Types
 
 TYPED_TEST_SUITE( CSRMatrixTest, CSRMatrixTypes);
 
+TYPED_TEST( CSRMatrixTest, Constructors )
+{
+    using CSRMatrixType = typename TestFixture::CSRMatrixType;
+
+    test_Constructors< CSRMatrixType >();
+}
+
 TYPED_TEST( CSRMatrixTest, setDimensionsTest )
 {
     using CSRMatrixType = typename TestFixture::CSRMatrixType;
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.h b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.h
index 9650105f6..2bf5fe20d 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.h
+++ b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.h
@@ -70,6 +70,13 @@ using EllpackMatrixTypes = ::testing::Types
 
 TYPED_TEST_SUITE( EllpackMatrixTest, EllpackMatrixTypes);
 
+TYPED_TEST( EllpackMatrixTest, Constructors )
+{
+    using EllpackMatrixType = typename TestFixture::EllpackMatrixType;
+
+    test_Constructors< EllpackMatrixType >();
+}
+
 TYPED_TEST( EllpackMatrixTest, setDimensionsTest )
 {
     using EllpackMatrixType = typename TestFixture::EllpackMatrixType;
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.h b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.h
index 11365cc5b..190839fd5 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.h
+++ b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.h
@@ -71,6 +71,13 @@ using SlicedEllpackMatrixTypes = ::testing::Types
 
 TYPED_TEST_SUITE( SlicedEllpackMatrixTest, SlicedEllpackMatrixTypes);
 
+TYPED_TEST( SlicedEllpackMatrixTest, Constructors )
+{
+    using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
+
+    test_Constructors< SlicedEllpackMatrixType >();
+}
+
 TYPED_TEST( SlicedEllpackMatrixTest, setDimensionsTest )
 {
     using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
-- 
GitLab


From 57c3a68104de9a748293cbaac0ae975f8bf19a58 Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Sat, 8 Feb 2020 14:23:27 +0100
Subject: [PATCH 137/179] Implementing symmetric sparse matrix unit tests.

---
 .../Matrices/SymmetricSparseMatrixTest.h      | 1304 +++++++++++++++++
 1 file changed, 1304 insertions(+)
 create mode 100644 src/UnitTests/Matrices/SymmetricSparseMatrixTest.h

diff --git a/src/UnitTests/Matrices/SymmetricSparseMatrixTest.h b/src/UnitTests/Matrices/SymmetricSparseMatrixTest.h
new file mode 100644
index 000000000..fd6bd8464
--- /dev/null
+++ b/src/UnitTests/Matrices/SymmetricSparseMatrixTest.h
@@ -0,0 +1,1304 @@
+/***************************************************************************
+                          SymmetricSparseMatrixTest.h -  description
+                             -------------------
+    begin                : Feb 7, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include <TNL/Containers/Vector.h>
+#include <TNL/Containers/VectorView.h>
+#include <TNL/Math.h>
+#include <TNL/Algorithms/ParallelFor.h>
+#include <iostream>
+#include <sstream>
+
+#ifdef HAVE_GTEST
+#include <gtest/gtest.h>
+
+template< typename MatrixHostFloat, typename MatrixHostInt >
+void host_test_GetType()
+{
+   bool testRan = false;
+   EXPECT_TRUE( testRan );
+   std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n";
+   std::cerr << "This test has not been implemented properly yet.\n" << std::endl;
+}
+
+template< typename MatrixCudaFloat, typename MatrixCudaInt >
+void cuda_test_GetType()
+{
+   bool testRan = false;
+   EXPECT_TRUE( testRan );
+   std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n";
+   std::cerr << "This test has not been implemented properly yet.\n" << std::endl;
+}
+
+template< typename Matrix >
+void test_SetDimensions()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   const IndexType rows = 9;
+   const IndexType cols = 8;
+
+   Matrix m;
+   m.setDimensions( rows, cols );
+
+   EXPECT_EQ( m.getRows(), 9 );
+   EXPECT_EQ( m.getColumns(), 8 );
+}
+
+template< typename Matrix >
+void test_SetCompressedRowLengths()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /*
+    /  1  2  4  7                       \
+    |  2  3  5  8 10 13 16 19           |
+    |  4  5  6    11 14       21 24 27  |
+    |  7  8     9       17 20           |
+    |    10 11    12          22 25     |
+    |    13 14       15             28  |
+    |    16    17       18              |
+    |    19    20          21           |
+    |       21    22          23        |
+    |       24    25             26     |
+    \       27       28             30  /
+    */
+   const IndexType rows = 10;
+   const IndexType cols = 11;
+
+   Matrix m( rows, cols );
+   typename Matrix::CompressedRowLengthsVector rowLengths { 1, 2, 3, 3, 3, 3, 3, 3, 3, 3  };
+   m.setCompressedRowLengths( rowLengths );
+
+   // Insert values into the rows.
+   RealType value = 1;
+
+   // 0th row - lower part
+   m.setElement( 0, 0, value++ );
+
+   // 1st row - lower part
+   m.setElement( 1, 0, value++ );
+   m.setElement( 1, 1, value++ );
+
+   // 2nd row - lower part
+   m.setElement( 2, 0, value++ );
+   m.setElement( 2, 1, value++ );
+   m.setElement( 2, 2, value++ );
+
+   // 3rd row - lower part
+   m.setElement( 3, 0, value++ );
+   m.setElement( 3, 1, value++ );
+   m.setElement( 3, 3, value++ );
+
+   // 4th row - lower part
+   m.setElement( 4, 1, value++ );
+   m.setElement( 4, 2, value++ );
+   m.setElement( 4, 4, value++ );
+
+   // 5th row - lower part
+   m.setElement( 5, 1, value++ );
+   m.setElement( 5, 2, value++ );
+   m.setElement( 5, 5, value++ );
+
+   // 6th row - lower part
+   m.setElement( 6, 1, value++ );
+   m.setElement( 6, 3, value++ );
+   m.setElement( 6, 6, value++ );
+
+   // 7th row - lower part
+   m.setElement( 7, 1, value++ );
+   m.setElement( 7, 3, value++ );
+   m.setElement( 7, 7, value++ );
+
+   rowLengths = 0;
+   m.getCompressedRowLengths( rowLengths );
+   typename Matrix::CompressedRowLengthsVector correctRowLengths{ 1, 2, 3, 3, 3, 3, 3, 3, 3, 3 };
+   EXPECT_EQ( rowLengths, correctRowLengths );
+}
+
+template< typename Matrix1, typename Matrix2 >
+void test_SetLike()
+{
+   using RealType = typename Matrix1::RealType;
+   using DeviceType = typename Matrix1::DeviceType;
+   using IndexType = typename Matrix1::IndexType;
+
+   const IndexType rows = 8;
+   const IndexType cols = 7;
+
+   Matrix1 m1( rows + 1, cols + 2 );
+   Matrix2 m2( rows, cols );
+
+   m1.setLike( m2 );
+
+   EXPECT_EQ( m1.getRows(), m2.getRows() );
+   EXPECT_EQ( m1.getColumns(), m2.getColumns() );
+}
+
+template< typename Matrix >
+void test_GetNumberOfNonzeroMatrixElements()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /*
+    * Sets up the following 10x10 sparse matrix:
+    *
+    /  1  2  4  7                       \ -> 4
+    |  2  3  5  8 10 13 16 19           | -> 8
+    |  4  5  6    11 14       21 25 28  | -> 8
+    |  7  8     9       17 20           | -> 5
+    |    10 11    12          23 26     | -> 5
+    |    13 14       15             29  | -> 4
+    |    16    17       18              | -> 3
+    |    19    20          21           | -> 3
+    |       22    23          24        | -> 3
+    |       25    26             27     | -> 3
+    \       28       29             30  / -> 3
+                                          ----
+                                            49
+    */
+
+   const IndexType rows = 10;
+   const IndexType cols = 10;
+
+   Matrix m( rows, cols, {
+      { 0, 0,  1 },
+      { 1, 0,  2 }, { 1, 1,  3 },
+      { 2, 0,  4 }, { 2, 1,  5 }, {  2, 2,  6 },
+      { 3, 0,  7 }, { 3, 1,  8 },              , { 3, 3,  9 },
+                    { 4, 1, 10 }, {  4, 2, 11 },               { 4, 4, 12 },
+                    { 5, 1, 13 }, {  5, 2, 14 },                              {  5, 5, 15 },
+                    { 6, 1, 16 },                { 6, 3, 17 },                              { 6, 6, 18 },
+                    { 7, 1, 19 },                { 7, 3, 20 },                                            { 7, 7, 21 },
+                                  {  8, 2, 22 },               { 8, 4, 23 },                                           { 8, 8, 24 },
+                                  {  9, 2, 25 },               { 9, 4, 26 },                                                         { 9, 9, 27 }
+                                  { 10, 2, 28 },                              { 10, 4, 29 },                                                      { 10, 10, 30 }
+   } );
+
+   EXPECT_EQ( m.getNumberOfNonzeroMatrixElements(), 49 );
+}
+
+template< typename Matrix >
+void test_Reset()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /*
+    * Sets up the following 5x4 sparse matrix:
+    *
+    *    /  0  0  0  0 \
+    *    |  0  0  0  0 |
+    *    |  0  0  0  0 |
+    *    |  0  0  0  0 |
+    *    \  0  0  0  0 /
+    */
+
+   const IndexType rows = 5;
+   const IndexType cols = 4;
+
+   Matrix m( rows, cols );
+   m.reset();
+
+   EXPECT_EQ( m.getRows(), 0 );
+   EXPECT_EQ( m.getColumns(), 0 );
+}
+
+template< typename Matrix >
+void test_GetRow()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+   /*
+    * Sets up the following 11x11 sparse matrix:
+    *
+    /  1  2  4  7                       \
+    |  2  3  5  8 10 13 16 19           |
+    |  4  5  6    11 14       22 25 28  |
+    |  7  8     9       17 20           |
+    |    10 11    12          23 26     |
+    |    13 14       15             29  |
+    |    16    17       18              |
+    |    19    20          21           |
+    |       22    23          24        |
+    |       25    26             27     |
+    \       28       29             30  /
+    */
+
+   Matrix m( { 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3 }, 11 );
+
+   auto matrixView = m.getView();
+   auto f = [=] __cuda_callable__ ( const IndexType rowIdx ) mutable {
+      auto row = matrixView.getRow( rowIdx );
+      RealType val;
+      switch( rowIdx )
+      {
+         case  0: row.setElement( 0, 0,  1 ); break;
+         case  1: row.setElement( 0, 0,  2 ); row.setElement( 1, 1,  3 ); break;
+         case  2: row.setElement( 0, 0,  4 ); row.setElement( 1, 1,  5 ); row.setElement( 2,  2,  6 ); break;
+         case  3: row.setElement( 0, 0,  7 ); row.setElement( 1, 1,  8 ); row.setElement( 2,  3,  9 ); break;
+         case  4: row.setElement( 0, 1, 10 ); row.setElement( 1, 2, 11 ); row.setElement( 2,  4, 12 ); break;
+         case  5: row.setElement( 0, 1, 13 ); row.setElement( 1, 2, 14 ); row.setElement( 2,  5, 15 ); break;
+         case  6: row.setElement( 0, 1, 16 ); row.setElement( 1, 3, 17 ); row.setElement( 2,  6, 18 ); break;
+         case  7: row.setElement( 0, 1, 19 ); row.setElement( 1, 3, 20 ); row.setElement( 2,  7, 21 ); break;
+         case  8: row.setElement( 0, 2, 22 ); row.setElement( 1, 4, 23 ); row.setElement( 2,  8, 24 ); break;
+         case  9: row.setElement( 0, 2, 25 ); row.setElement( 1, 4, 26 ); row.setElement( 2,  9, 27 ); break;
+         case 10: row.setElement( 0, 2, 28 ); row.setElement( 1, 5, 29 ); row.setElement( 2, 10, 30 ); break;
+      }
+   };
+   TNL::Algorithms::ParallelFor< DeviceType >::exec( ( IndexType ) 0, rows, f );
+
+   EXPECT_EQ( m.getElement( 0,  0 ),  1 );
+   EXPECT_EQ( m.getElement( 0,  1 ),  2 );
+   EXPECT_EQ( m.getElement( 0,  2 ),  4 );
+   EXPECT_EQ( m.getElement( 0,  3 ),  7 );
+   EXPECT_EQ( m.getElement( 0,  4 ),  0 );
+   EXPECT_EQ( m.getElement( 0,  5 ),  0 );
+   EXPECT_EQ( m.getElement( 0,  6 ),  0 );
+   EXPECT_EQ( m.getElement( 0,  7 ),  0 );
+   EXPECT_EQ( m.getElement( 0,  8 ),  0 );
+   EXPECT_EQ( m.getElement( 0,  9 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 10 ),  0 );
+
+   EXPECT_EQ( m.getElement( 1,  0 ),  2 );
+   EXPECT_EQ( m.getElement( 1,  1 ),  3 );
+   EXPECT_EQ( m.getElement( 1,  2 ),  4 );
+   EXPECT_EQ( m.getElement( 1,  3 ),  8 );
+   EXPECT_EQ( m.getElement( 1,  4 ), 10 );
+   EXPECT_EQ( m.getElement( 1,  5 ), 13 );
+   EXPECT_EQ( m.getElement( 1,  6 ), 16 );
+   EXPECT_EQ( m.getElement( 1,  7 ), 19 );
+   EXPECT_EQ( m.getElement( 1,  8 ),  0 );
+   EXPECT_EQ( m.getElement( 1,  9 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 10 ),  0 );
+
+   EXPECT_EQ( m.getElement( 2,  0 ),  4 );
+   EXPECT_EQ( m.getElement( 2,  1 ),  5 );
+   EXPECT_EQ( m.getElement( 2,  2 ),  6 );
+   EXPECT_EQ( m.getElement( 2,  3 ),  0 );
+   EXPECT_EQ( m.getElement( 2,  4 ), 11 );
+   EXPECT_EQ( m.getElement( 2,  5 ), 14 );
+   EXPECT_EQ( m.getElement( 2,  6 ),  0 );
+   EXPECT_EQ( m.getElement( 2,  7 ),  0 );
+   EXPECT_EQ( m.getElement( 2,  8 ), 22 );
+   EXPECT_EQ( m.getElement( 2,  9 ), 25 );
+   EXPECT_EQ( m.getElement( 2, 10 ), 28 );
+
+   EXPECT_EQ( m.getElement( 3,  0 ),  7 );
+   EXPECT_EQ( m.getElement( 3,  1 ),  8 );
+   EXPECT_EQ( m.getElement( 3,  2 ),  0 );
+   EXPECT_EQ( m.getElement( 3,  3 ),  9 );
+   EXPECT_EQ( m.getElement( 3,  4 ),  0 );
+   EXPECT_EQ( m.getElement( 3,  5 ),  0 );
+   EXPECT_EQ( m.getElement( 3,  6 ), 17 );
+   EXPECT_EQ( m.getElement( 3,  7 ), 20 );
+   EXPECT_EQ( m.getElement( 3,  8 ),  0 );
+   EXPECT_EQ( m.getElement( 3,  9 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 10 ),  0 );
+
+   EXPECT_EQ( m.getElement( 4,  0 ),  0 );
+   EXPECT_EQ( m.getElement( 4,  1 ), 10 );
+   EXPECT_EQ( m.getElement( 4,  2 ), 11 );
+   EXPECT_EQ( m.getElement( 4,  3 ),  0 );
+   EXPECT_EQ( m.getElement( 4,  4 ), 12 );
+   EXPECT_EQ( m.getElement( 4,  5 ),  0 );
+   EXPECT_EQ( m.getElement( 4,  6 ),  0 );
+   EXPECT_EQ( m.getElement( 4,  7 ),  0 );
+   EXPECT_EQ( m.getElement( 4,  8 ), 23 );
+   EXPECT_EQ( m.getElement( 4,  9 ), 26 );
+   EXPECT_EQ( m.getElement( 4, 10 ),  0 );
+
+   EXPECT_EQ( m.getElement( 5,  0 ),  0 );
+   EXPECT_EQ( m.getElement( 5,  1 ), 13 );
+   EXPECT_EQ( m.getElement( 5,  2 ), 14 );
+   EXPECT_EQ( m.getElement( 5,  3 ),  0 );
+   EXPECT_EQ( m.getElement( 5,  4 ),  0 );
+   EXPECT_EQ( m.getElement( 5,  5 ), 15 );
+   EXPECT_EQ( m.getElement( 5,  6 ),  0 );
+   EXPECT_EQ( m.getElement( 5,  7 ),  0 );
+   EXPECT_EQ( m.getElement( 5,  8 ),  0 );
+   EXPECT_EQ( m.getElement( 5,  9 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 10 ), 29 );
+
+   EXPECT_EQ( m.getElement( 6,  0 ),  0 );
+   EXPECT_EQ( m.getElement( 6,  1 ), 16 );
+   EXPECT_EQ( m.getElement( 6,  2 ),  0 );
+   EXPECT_EQ( m.getElement( 6,  3 ), 17 );
+   EXPECT_EQ( m.getElement( 6,  4 ),  0 );
+   EXPECT_EQ( m.getElement( 6,  5 ),  0 );
+   EXPECT_EQ( m.getElement( 6,  6 ), 18 );
+   EXPECT_EQ( m.getElement( 6,  7 ),  0 );
+   EXPECT_EQ( m.getElement( 6,  8 ),  0 );
+   EXPECT_EQ( m.getElement( 6,  9 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 10 ),  0 );
+
+   EXPECT_EQ( m.getElement( 7,  0 ),  0 );
+   EXPECT_EQ( m.getElement( 7,  1 ), 19 );
+   EXPECT_EQ( m.getElement( 7,  2 ),  0 );
+   EXPECT_EQ( m.getElement( 7,  3 ), 20 );
+   EXPECT_EQ( m.getElement( 7,  4 ),  0 );
+   EXPECT_EQ( m.getElement( 7,  5 ),  0 );
+   EXPECT_EQ( m.getElement( 7,  6 ),  0 );
+   EXPECT_EQ( m.getElement( 7,  7 ), 21 );
+   EXPECT_EQ( m.getElement( 7,  8 ),  0 );
+   EXPECT_EQ( m.getElement( 7,  9 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 10 ),  0 );
+
+   EXPECT_EQ( m.getElement( 8,  0 ),  0 );
+   EXPECT_EQ( m.getElement( 8,  1 ),  0 );
+   EXPECT_EQ( m.getElement( 8,  2 ), 22 );
+   EXPECT_EQ( m.getElement( 8,  3 ),  0 );
+   EXPECT_EQ( m.getElement( 8,  4 ), 23 );
+   EXPECT_EQ( m.getElement( 8,  5 ),  0 );
+   EXPECT_EQ( m.getElement( 8,  6 ),  0 );
+   EXPECT_EQ( m.getElement( 8,  7 ),  0 );
+   EXPECT_EQ( m.getElement( 8,  8 ), 24 );
+   EXPECT_EQ( m.getElement( 8,  9 ),  0 );
+   EXPECT_EQ( m.getElement( 8, 10 ),  0 );
+
+   EXPECT_EQ( m.getElement( 9,  0 ),  0 );
+   EXPECT_EQ( m.getElement( 9,  1 ),  0 );
+   EXPECT_EQ( m.getElement( 9,  2 ), 25 );
+   EXPECT_EQ( m.getElement( 9,  3 ),  0 );
+   EXPECT_EQ( m.getElement( 9,  4 ), 26 );
+   EXPECT_EQ( m.getElement( 9,  5 ),  0 );
+   EXPECT_EQ( m.getElement( 9,  6 ),  0 );
+   EXPECT_EQ( m.getElement( 9,  7 ),  0 );
+   EXPECT_EQ( m.getElement( 9,  8 ),  0 );
+   EXPECT_EQ( m.getElement( 9,  9 ), 27 );
+   EXPECT_EQ( m.getElement( 9, 10 ),  0 );
+
+   EXPECT_EQ( m.getElement( 10,  0 ),  0 );
+   EXPECT_EQ( m.getElement( 10,  1 ),  0 );
+   EXPECT_EQ( m.getElement( 10,  2 ), 28 );
+   EXPECT_EQ( m.getElement( 10,  3 ),  0 );
+   EXPECT_EQ( m.getElement( 10,  4 ),  0 );
+   EXPECT_EQ( m.getElement( 10,  5 ), 29 );
+   EXPECT_EQ( m.getElement( 10,  6 ),  0 );
+   EXPECT_EQ( m.getElement( 10,  7 ),  0 );
+   EXPECT_EQ( m.getElement( 10,  8 ),  0 );
+   EXPECT_EQ( m.getElement( 10,  9 ),  0 );
+   EXPECT_EQ( m.getElement( 10, 10 ), 30 );
+}
+
+
+template< typename Matrix >
+void test_SetElement()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /*
+    * Sets up the following 10x10 sparse matrix:
+    *
+    *    /  1  0  0  4  0  0 10  0  0  0  \
+    *    |  0  2  0  5  0  0 11  0  0  0  |
+    *    |  0  0  3  6  0  0 12  0  0  0  |
+    *    |  4  5  6  7  0  0 13  0  0  0  |
+    *    |  0  0  0  0  8  0 14  0  0  0  |
+    *    |  0  0  0  0  0  9 15  0  0  0  |
+    *    | 10 11 12 13 14 15 16  0  0  0  |
+    *    |  0  0  0  0  0  0  0 17  0  0  |
+    *    |  0  0  0  0  0  0  0  0 18  0  |
+    *    \  0  0  0  0  0  0  0  0  0 19 /
+    */
+
+   Matrix m( { 1, 1, 1, 4, 1, 1, 7, 1, 1, 1 }, 10 );
+
+   RealType value = 1;
+   for( IndexType i = 0; i < 4; i++ )
+      m.setElement( i, i, value++ );
+
+   for( IndexType i = 0; i < 4; i++ )
+      m.setElement( 3, i, value++ );
+
+   for( IndexType i = 4; i < 6; i++ )
+      m.setElement( i, i, value++ );
+
+   for( IndexType i = 0; i < 7; i++ )
+      m.setElement( 6, i, value++ );
+
+   for( IndexType i = 7; i < 10; i++ )
+      m.setElement( i, i, value++ );
+
+   EXPECT_EQ( m.getElement( 0, 0 ),  1 );
+   EXPECT_EQ( m.getElement( 0, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 3 ),  4 );
+   EXPECT_EQ( m.getElement( 0, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 6 ), 10 );
+   EXPECT_EQ( m.getElement( 0, 7 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 8 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 9 ),  0 );
+
+   EXPECT_EQ( m.getElement( 1, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 1 ),  2 );
+   EXPECT_EQ( m.getElement( 1, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 3 ),  5 );
+   EXPECT_EQ( m.getElement( 1, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 6 ), 11 );
+   EXPECT_EQ( m.getElement( 1, 7 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 8 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 9 ),  0 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 2 ),  3 );
+   EXPECT_EQ( m.getElement( 2, 3 ),  6 );
+   EXPECT_EQ( m.getElement( 2, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 6 ), 12 );
+   EXPECT_EQ( m.getElement( 2, 7 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 8 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 9 ),  0 );
+
+   EXPECT_EQ( m.getElement( 3, 0 ),  4 );
+   EXPECT_EQ( m.getElement( 3, 1 ),  5 );
+   EXPECT_EQ( m.getElement( 3, 2 ),  6 );
+   EXPECT_EQ( m.getElement( 3, 3 ),  7 );
+   EXPECT_EQ( m.getElement( 3, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 6 ), 13 );
+   EXPECT_EQ( m.getElement( 3, 7 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 8 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 9 ),  0 );
+
+   EXPECT_EQ( m.getElement( 4, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 4 ),  8 );
+   EXPECT_EQ( m.getElement( 4, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 6 ), 14 );
+   EXPECT_EQ( m.getElement( 4, 7 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 8 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 9 ),  0 );
+
+   EXPECT_EQ( m.getElement( 5, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 5 ),  9 );
+   EXPECT_EQ( m.getElement( 5, 6 ), 15 );
+   EXPECT_EQ( m.getElement( 5, 7 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 8 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 9 ),  0 );
+
+   EXPECT_EQ( m.getElement( 6, 0 ), 10 );
+   EXPECT_EQ( m.getElement( 6, 1 ), 11 );
+   EXPECT_EQ( m.getElement( 6, 2 ), 12 );
+   EXPECT_EQ( m.getElement( 6, 3 ), 13 );
+   EXPECT_EQ( m.getElement( 6, 4 ), 14 );
+   EXPECT_EQ( m.getElement( 6, 5 ), 15 );
+   EXPECT_EQ( m.getElement( 6, 6 ), 16 );
+   EXPECT_EQ( m.getElement( 6, 7 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 8 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 9 ),  0 );
+
+   EXPECT_EQ( m.getElement( 7, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 6 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 7 ), 17 );
+   EXPECT_EQ( m.getElement( 7, 8 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 9 ),  0 );
+
+   EXPECT_EQ( m.getElement( 8, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 8, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 8, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 8, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 8, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 8, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 8, 6 ),  0 );
+   EXPECT_EQ( m.getElement( 8, 7 ),  0 );
+   EXPECT_EQ( m.getElement( 8, 8 ), 18 );
+   EXPECT_EQ( m.getElement( 8, 9 ),  0 );
+
+   EXPECT_EQ( m.getElement( 9, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 9, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 9, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 9, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 9, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 9, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 9, 6 ),  0 );
+   EXPECT_EQ( m.getElement( 9, 7 ),  0 );
+   EXPECT_EQ( m.getElement( 9, 8 ),  0 );
+   EXPECT_EQ( m.getElement( 9, 9 ), 19 );
+}
+
+template< typename Matrix >
+void test_AddElement()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /*
+    * Sets up the following 6x5 sparse matrix:
+    *
+    *    /  1  2  0  0  0 \
+    *    |  2  3  4  0  0 |
+    *    |  0  4  5  6  0 |
+    *    |  0  0  6  7  8 |
+    *    |  0  0  0  8  9 |
+    *    \  0  0  0  0 10 /
+    */
+
+   const IndexType rows = 6;
+   const IndexType cols = 5;
+
+   Matrix m( 6, 5, {
+      { 0, 0, 1 }, 
+      { 1, 0, 2 }, { 1, 1, 3 },
+                   { 2, 1, 4 }, { 2, 2, 5 },
+                                { 3, 2, 6 }, { 3, 3, 7 },
+                                             { 4, 3, 8 }, { 4, 4,  9 },
+                                                          { 5, 5, 10 } } );
+
+   // Check the set elements
+   EXPECT_EQ( m.getElement( 0, 0 ),  1 );
+   EXPECT_EQ( m.getElement( 0, 1 ),  2 );
+   EXPECT_EQ( m.getElement( 0, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 4 ),  0 );
+
+   EXPECT_EQ( m.getElement( 1, 0 ),  2 );
+   EXPECT_EQ( m.getElement( 1, 1 ),  3 );
+   EXPECT_EQ( m.getElement( 1, 2 ),  4 );
+   EXPECT_EQ( m.getElement( 1, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 4 ),  0 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 1 ),  4 );
+   EXPECT_EQ( m.getElement( 2, 2 ),  5 );
+   EXPECT_EQ( m.getElement( 2, 3 ),  6 );
+   EXPECT_EQ( m.getElement( 2, 4 ),  0 );
+
+   EXPECT_EQ( m.getElement( 3, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 2 ),  6 );
+   EXPECT_EQ( m.getElement( 3, 3 ),  7 );
+   EXPECT_EQ( m.getElement( 3, 4 ),  8 );
+
+   EXPECT_EQ( m.getElement( 4, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 3 ),  8 );
+   EXPECT_EQ( m.getElement( 4, 4 ),  9 );
+
+   EXPECT_EQ( m.getElement( 5, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 4 ), 10 );
+############################################################################
+   // Add new elements to the old elements with a multiplying factor applied to the old elements.
+   /*
+    * The following setup results in the following 6x5 sparse matrix:
+    *
+    *    /  3  6  9  0  0 \
+    *    |  0 12 15 18  0 |
+    *    |  0  0 21 24 27 |
+    *    | 30 11 12  0  0 |
+    *    |  0 35 14 15  0 |
+    *    \  0  0 16 41 18 /
+    */
+
+   RealType newValue = 1;
+   for( IndexType i = 0; i < cols - 2; i++ )         // 0th row
+      m.addElement( 0, i, newValue++, 2.0 );
+
+   for( IndexType i = 1; i < cols - 1; i++ )         // 1st row
+      m.addElement( 1, i, newValue++, 2.0 );
+
+   for( IndexType i = 2; i < cols; i++ )             // 2nd row
+      m.addElement( 2, i, newValue++, 2.0 );
+
+   for( IndexType i = 0; i < cols - 2; i++ )         // 3rd row
+      m.addElement( 3, i, newValue++, 2.0 );
+
+   for( IndexType i = 1; i < cols - 1; i++ )         // 4th row
+      m.addElement( 4, i, newValue++, 2.0 );
+
+   for( IndexType i = 2; i < cols; i++ )             // 5th row
+      m.addElement( 5, i, newValue++, 2.0 );
+
+
+   EXPECT_EQ( m.getElement( 0, 0 ),  3 );
+   EXPECT_EQ( m.getElement( 0, 1 ),  6 );
+   EXPECT_EQ( m.getElement( 0, 2 ),  9 );
+   EXPECT_EQ( m.getElement( 0, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 4 ),  0 );
+
+   EXPECT_EQ( m.getElement( 1, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 1 ), 12 );
+   EXPECT_EQ( m.getElement( 1, 2 ), 15 );
+   EXPECT_EQ( m.getElement( 1, 3 ), 18 );
+   EXPECT_EQ( m.getElement( 1, 4 ),  0 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 2 ), 21 );
+   EXPECT_EQ( m.getElement( 2, 3 ), 24 );
+   EXPECT_EQ( m.getElement( 2, 4 ), 27 );
+
+   EXPECT_EQ( m.getElement( 3, 0 ), 30 );
+   EXPECT_EQ( m.getElement( 3, 1 ), 11 );
+   EXPECT_EQ( m.getElement( 3, 2 ), 12 );
+   EXPECT_EQ( m.getElement( 3, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 4 ),  0 );
+
+   EXPECT_EQ( m.getElement( 4, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 1 ), 35 );
+   EXPECT_EQ( m.getElement( 4, 2 ), 14 );
+   EXPECT_EQ( m.getElement( 4, 3 ), 15 );
+   EXPECT_EQ( m.getElement( 4, 4 ),  0 );
+
+   EXPECT_EQ( m.getElement( 5, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 2 ), 16 );
+   EXPECT_EQ( m.getElement( 5, 3 ), 41 );
+   EXPECT_EQ( m.getElement( 5, 4 ), 18 );
+}
+
+template< typename Matrix >
+void test_VectorProduct()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+   using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >;
+
+   /*
+    * Sets up the following 4x4 sparse matrix:
+    *
+    *    /  1  0  0  0 \
+    *    |  0  2  0  3 |
+    *    |  0  4  0  0 |
+    *    \  0  0  5  0 /
+    */
+
+   const IndexType m_rows_1 = 4;
+   const IndexType m_cols_1 = 4;
+
+   Matrix m_1;
+   m_1.reset();
+   m_1.setDimensions( m_rows_1, m_cols_1 );
+   typename Matrix::CompressedRowLengthsVector rowLengths_1;
+   rowLengths_1.setSize( m_rows_1 );
+   rowLengths_1.setElement( 0, 1 );
+   rowLengths_1.setElement( 1, 2 );
+   rowLengths_1.setElement( 2, 1 );
+   rowLengths_1.setElement( 3, 1 );
+   m_1.setCompressedRowLengths( rowLengths_1 );
+
+   RealType value_1 = 1;
+   m_1.setElement( 0, 0, value_1++ );      // 0th row
+
+   m_1.setElement( 1, 1, value_1++ );      // 1st row
+   m_1.setElement( 1, 3, value_1++ );
+
+   m_1.setElement( 2, 1, value_1++ );      // 2nd row
+
+   m_1.setElement( 3, 2, value_1++ );      // 3rd row
+
+   VectorType inVector_1;
+   inVector_1.setSize( m_cols_1 );
+   for( IndexType i = 0; i < inVector_1.getSize(); i++ )
+       inVector_1.setElement( i, 2 );
+
+   VectorType outVector_1;
+   outVector_1.setSize( m_rows_1 );
+   for( IndexType j = 0; j < outVector_1.getSize(); j++ )
+       outVector_1.setElement( j, 0 );
+
+
+   m_1.vectorProduct( inVector_1, outVector_1 );
+
+
+   EXPECT_EQ( outVector_1.getElement( 0 ),  2 );
+   EXPECT_EQ( outVector_1.getElement( 1 ), 10 );
+   EXPECT_EQ( outVector_1.getElement( 2 ),  8 );
+   EXPECT_EQ( outVector_1.getElement( 3 ), 10 );
+
+   /*
+    * Sets up the following 4x4 sparse matrix:
+    *
+    *    /  1  2  3  0 \
+    *    |  0  0  0  4 |
+    *    |  5  6  7  0 |
+    *    \  0  8  0  0 /
+    */
+
+   const IndexType m_rows_2 = 4;
+   const IndexType m_cols_2 = 4;
+
+   Matrix m_2( m_rows_2, m_cols_2 );
+   typename Matrix::CompressedRowLengthsVector rowLengths_2{ 3, 1, 3, 1 };
+   /*rowLengths_2 = 3;
+   rowLengths_2.setElement( 1, 1 );
+   rowLengths_2.setElement( 3, 1 );*/
+   m_2.setCompressedRowLengths( rowLengths_2 );
+
+   RealType value_2 = 1;
+   for( IndexType i = 0; i < 3; i++ )   // 0th row
+      m_2.setElement( 0, i, value_2++ );
+
+   m_2.setElement( 1, 3, value_2++ );      // 1st row
+
+   for( IndexType i = 0; i < 3; i++ )   // 2nd row
+      m_2.setElement( 2, i, value_2++ );
+
+   for( IndexType i = 1; i < 2; i++ )       // 3rd row
+      m_2.setElement( 3, i, value_2++ );
+
+   VectorType inVector_2;
+   inVector_2.setSize( m_cols_2 );
+   for( IndexType i = 0; i < inVector_2.getSize(); i++ )
+      inVector_2.setElement( i, 2 );
+
+   VectorType outVector_2;
+   outVector_2.setSize( m_rows_2 );
+   for( IndexType j = 0; j < outVector_2.getSize(); j++ )
+      outVector_2.setElement( j, 0 );
+
+   m_2.vectorProduct( inVector_2, outVector_2 );
+
+   EXPECT_EQ( outVector_2.getElement( 0 ), 12 );
+   EXPECT_EQ( outVector_2.getElement( 1 ),  8 );
+   EXPECT_EQ( outVector_2.getElement( 2 ), 36 );
+   EXPECT_EQ( outVector_2.getElement( 3 ), 16 );
+
+   /*
+    * Sets up the following 4x4 sparse matrix:
+    *
+    *    /  1  2  3  0 \
+    *    |  0  4  5  6 |
+    *    |  7  8  9  0 |
+    *    \  0 10 11 12 /
+    */
+
+   const IndexType m_rows_3 = 4;
+   const IndexType m_cols_3 = 4;
+
+   Matrix m_3( m_rows_3, m_cols_3 );
+   typename Matrix::CompressedRowLengthsVector rowLengths_3{ 3, 3, 3, 3 };
+   m_3.setCompressedRowLengths( rowLengths_3 );
+
+   RealType value_3 = 1;
+   for( IndexType i = 0; i < 3; i++ )          // 0th row
+      m_3.setElement( 0, i, value_3++ );
+
+   for( IndexType i = 1; i < 4; i++ )
+      m_3.setElement( 1, i, value_3++ );      // 1st row
+
+   for( IndexType i = 0; i < 3; i++ )          // 2nd row
+      m_3.setElement( 2, i, value_3++ );
+
+   for( IndexType i = 1; i < 4; i++ )          // 3rd row
+      m_3.setElement( 3, i, value_3++ );
+
+   VectorType inVector_3;
+   inVector_3.setSize( m_cols_3 );
+   for( IndexType i = 0; i < inVector_3.getSize(); i++ )
+      inVector_3.setElement( i, 2 );
+
+   VectorType outVector_3;
+   outVector_3.setSize( m_rows_3 );
+   for( IndexType j = 0; j < outVector_3.getSize(); j++ )
+      outVector_3.setElement( j, 0 );
+
+   m_3.vectorProduct( inVector_3, outVector_3 );
+
+   EXPECT_EQ( outVector_3.getElement( 0 ), 12 );
+   EXPECT_EQ( outVector_3.getElement( 1 ), 30 );
+   EXPECT_EQ( outVector_3.getElement( 2 ), 48 );
+   EXPECT_EQ( outVector_3.getElement( 3 ), 66 );
+
+   /*
+    * Sets up the following 8x8 sparse matrix:
+    *
+    *    /  1  2  3  0  0  4  0  0 \
+    *    |  0  5  6  7  8  0  0  0 |
+    *    |  9 10 11 12 13  0  0  0 |
+    *    |  0 14 15 16 17  0  0  0 |
+    *    |  0  0 18 19 20 21  0  0 |
+    *    |  0  0  0 22 23 24 25  0 |
+    *    | 26 27 28 29 30  0  0  0 |
+    *    \ 31 32 33 34 35  0  0  0 /
+    */
+
+   const IndexType m_rows_4 = 8;
+   const IndexType m_cols_4 = 8;
+
+   Matrix m_4( m_rows_4, m_cols_4 );
+   typename Matrix::CompressedRowLengthsVector rowLengths_4{ 4, 4, 5, 4, 4, 4, 5, 5 };
+   /*rowLengths_4.setSize( m_rows_4 );
+   rowLengths_4.setValue( 4 );
+   rowLengths_4.setElement( 2, 5 );
+   rowLengths_4.setElement( 6, 5 );
+   rowLengths_4.setElement( 7, 5 );*/
+   m_4.setCompressedRowLengths( rowLengths_4 );
+
+   RealType value_4 = 1;
+   for( IndexType i = 0; i < 3; i++ )       // 0th row
+      m_4.setElement( 0, i, value_4++ );
+
+   m_4.setElement( 0, 5, value_4++ );
+
+   for( IndexType i = 1; i < 5; i++ )       // 1st row
+      m_4.setElement( 1, i, value_4++ );
+
+   for( IndexType i = 0; i < 5; i++ )       // 2nd row
+      m_4.setElement( 2, i, value_4++ );
+
+   for( IndexType i = 1; i < 5; i++ )       // 3rd row
+      m_4.setElement( 3, i, value_4++ );
+
+   for( IndexType i = 2; i < 6; i++ )       // 4th row
+      m_4.setElement( 4, i, value_4++ );
+
+   for( IndexType i = 3; i < 7; i++ )       // 5th row
+      m_4.setElement( 5, i, value_4++ );
+
+   for( IndexType i = 0; i < 5; i++ )       // 6th row
+      m_4.setElement( 6, i, value_4++ );
+
+   for( IndexType i = 0; i < 5; i++ )       // 7th row
+      m_4.setElement( 7, i, value_4++ );
+
+   VectorType inVector_4;
+   inVector_4.setSize( m_cols_4 );
+   for( IndexType i = 0; i < inVector_4.getSize(); i++ )
+      inVector_4.setElement( i, 2 );
+
+   VectorType outVector_4;
+   outVector_4.setSize( m_rows_4 );
+   for( IndexType j = 0; j < outVector_4.getSize(); j++ )
+      outVector_4.setElement( j, 0 );
+
+   m_4.vectorProduct( inVector_4, outVector_4 );
+
+   EXPECT_EQ( outVector_4.getElement( 0 ),  20 );
+   EXPECT_EQ( outVector_4.getElement( 1 ),  52 );
+   EXPECT_EQ( outVector_4.getElement( 2 ), 110 );
+   EXPECT_EQ( outVector_4.getElement( 3 ), 124 );
+   EXPECT_EQ( outVector_4.getElement( 4 ), 156 );
+   EXPECT_EQ( outVector_4.getElement( 5 ), 188 );
+   EXPECT_EQ( outVector_4.getElement( 6 ), 280 );
+   EXPECT_EQ( outVector_4.getElement( 7 ), 330 );
+
+
+   /*
+    * Sets up the following 8x8 sparse matrix:
+    *
+    *    /  1  2  3  0  4  5  0  1 \   6
+    *    |  0  6  0  7  0  0  0  1 |   3
+    *    |  0  8  9  0 10  0  0  1 |   4
+    *    |  0 11 12 13 14  0  0  1 |   5
+    *    |  0 15  0  0  0  0  0  1 |   2
+    *    |  0 16 17 18 19 20 21  1 |   7
+    *    | 22 23 24 25 26 27 28  1 |   8
+    *    \ 29 30 31 32 33 34 35 36 /   8
+    */
+
+   const IndexType m_rows_5 = 8;
+   const IndexType m_cols_5 = 8;
+
+   Matrix m_5( m_rows_5, m_cols_5 );
+   typename Matrix::CompressedRowLengthsVector rowLengths_5{ 6, 3, 4, 5, 2, 7, 8, 8 };
+   m_5.setCompressedRowLengths( rowLengths_5 );
+
+   RealType value_5 = 1;
+   for( IndexType i = 0; i < 3; i++ )   // 0th row
+      m_5.setElement( 0, i, value_5++ );
+
+   m_5.setElement( 0, 4, value_5++ );           // 0th row
+   m_5.setElement( 0, 5, value_5++ );
+
+   m_5.setElement( 1, 1, value_5++ );           // 1st row
+   m_5.setElement( 1, 3, value_5++ );
+
+   for( IndexType i = 1; i < 3; i++ )            // 2nd row
+      m_5.setElement( 2, i, value_5++ );
+
+   m_5.setElement( 2, 4, value_5++ );           // 2nd row
+
+   for( IndexType i = 1; i < 5; i++ )            // 3rd row
+      m_5.setElement( 3, i, value_5++ );
+
+   m_5.setElement( 4, 1, value_5++ );           // 4th row
+
+   for( IndexType i = 1; i < 7; i++ )            // 5th row
+      m_5.setElement( 5, i, value_5++ );
+
+   for( IndexType i = 0; i < 7; i++ )            // 6th row
+      m_5.setElement( 6, i, value_5++ );
+
+   for( IndexType i = 0; i < 8; i++ )            // 7th row
+      m_5.setElement( 7, i, value_5++ );
+
+   for( IndexType i = 0; i < 7; i++ )            // 1s at the end of rows
+      m_5.setElement( i, 7, 1);
+
+   VectorType inVector_5;
+   inVector_5.setSize( m_cols_5 );
+   for( IndexType i = 0; i < inVector_5.getSize(); i++ )
+       inVector_5.setElement( i, 2 );
+
+   VectorType outVector_5;
+   outVector_5.setSize( m_rows_5 );
+   for( IndexType j = 0; j < outVector_5.getSize(); j++ )
+       outVector_5.setElement( j, 0 );
+
+   m_5.vectorProduct( inVector_5, outVector_5 );
+
+   EXPECT_EQ( outVector_5.getElement( 0 ),  32 );
+   EXPECT_EQ( outVector_5.getElement( 1 ),  28 );
+   EXPECT_EQ( outVector_5.getElement( 2 ),  56 );
+   EXPECT_EQ( outVector_5.getElement( 3 ), 102 );
+   EXPECT_EQ( outVector_5.getElement( 4 ),  32 );
+   EXPECT_EQ( outVector_5.getElement( 5 ), 224 );
+   EXPECT_EQ( outVector_5.getElement( 6 ), 352 );
+   EXPECT_EQ( outVector_5.getElement( 7 ), 520 );
+}
+
+template< typename Matrix >
+void test_RowsReduction()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /*
+    * Sets up the following 8x8 sparse matrix:
+    *
+    *    /  1  2  3  0  4  5  0  1 \   6
+    *    |  0  6  0  7  0  0  0  1 |   3
+    *    |  0  8  9  0 10  0  0  1 |   4
+    *    |  0 11 12 13 14  0  0  1 |   5
+    *    |  0 15  0  0  0  0  0  1 |   2
+    *    |  0 16 17 18 19 20 21  1 |   7
+    *    | 22 23 24 25 26 27 28  1 |   8
+    *    \ 29 30 31 32 33 34 35 36 /   8
+    */
+
+   const IndexType rows = 8;
+   const IndexType cols = 8;
+
+   Matrix m;
+   m.setDimensions( rows, cols );
+   typename Matrix::RowsCapacitiesType rowsCapacities{ 6, 3, 4, 5, 2, 7, 8, 8 };
+   m.setCompressedRowLengths( rowsCapacities );
+
+   RealType value = 1;
+   for( IndexType i = 0; i < 3; i++ )   // 0th row
+      m.setElement( 0, i, value++ );
+
+   m.setElement( 0, 4, value++ );       // 0th row
+   m.setElement( 0, 5, value++ );
+
+   m.setElement( 1, 1, value++ );       // 1st row
+   m.setElement( 1, 3, value++ );
+
+   for( IndexType i = 1; i < 3; i++ )   // 2nd row
+      m.setElement( 2, i, value++ );
+
+   m.setElement( 2, 4, value++ );       // 2nd row
+
+   for( IndexType i = 1; i < 5; i++ )   // 3rd row
+      m.setElement( 3, i, value++ );
+
+   m.setElement( 4, 1, value++ );       // 4th row
+
+   for( IndexType i = 1; i < 7; i++ )   // 5th row
+      m.setElement( 5, i, value++ );
+
+   for( IndexType i = 0; i < 7; i++ )   // 6th row
+      m.setElement( 6, i, value++ );
+
+   for( IndexType i = 0; i < 8; i++ )   // 7th row
+       m.setElement( 7, i, value++ );
+
+   for( IndexType i = 0; i < 7; i++ )   // 1s at the end of rows
+      m.setElement( i, 7, 1);
+
+   ////
+   // Compute number of non-zero elements in rows.
+   typename Matrix::RowsCapacitiesType rowLengths( rows );
+   auto rowLengths_view = rowLengths.getView();
+   auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, IndexType globalIdx, const RealType& value ) -> IndexType {
+      return ( value != 0.0 );
+   };
+   auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) {
+      aux += a;
+   };
+   auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable {
+      rowLengths_view[ rowIdx ] = value;
+   };
+   m.allRowsReduction( fetch, reduce, keep, 0 );
+   EXPECT_EQ( rowsCapacities, rowLengths );
+   m.getCompressedRowLengths( rowLengths );
+   EXPECT_EQ( rowsCapacities, rowLengths );
+
+   ////
+   // Compute max norm
+   TNL::Containers::Vector< RealType, DeviceType, IndexType > rowSums( rows );
+   auto rowSums_view = rowSums.getView();
+   auto max_fetch = [] __cuda_callable__ ( IndexType row, IndexType column, IndexType globalIdx, const RealType& value ) -> IndexType {
+      return abs( value );
+   };
+   auto max_reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) {
+      aux += a;
+   };
+   auto max_keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable {
+      rowSums_view[ rowIdx ] = value;
+   };
+   m.allRowsReduction( max_fetch, max_reduce, max_keep, 0 );
+   const RealType maxNorm = TNL::max( rowSums );
+   EXPECT_EQ( maxNorm, 260 ) ; // 29+30+31+32+33+34+35+36
+}
+
+template< typename Matrix >
+void test_PerformSORIteration()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /*
+    * Sets up the following 4x4 sparse matrix:
+    *
+    *    /  4  1  0  0 \
+    *    |  1  4  1  0 |
+    *    |  0  1  4  1 |
+    *    \  0  0  1  4 /
+    */
+
+   const IndexType m_rows = 4;
+   const IndexType m_cols = 4;
+
+   Matrix m( m_rows, m_cols );
+   typename Matrix::CompressedRowLengthsVector rowLengths( m_rows );
+   rowLengths = 3;
+   m.setCompressedRowLengths( rowLengths );
+
+   m.setElement( 0, 0, 4.0 );        // 0th row
+   m.setElement( 0, 1, 1.0);
+
+   m.setElement( 1, 0, 1.0 );        // 1st row
+   m.setElement( 1, 1, 4.0 );
+   m.setElement( 1, 2, 1.0 );
+
+   m.setElement( 2, 1, 1.0 );        // 2nd row
+   m.setElement( 2, 2, 4.0 );
+   m.setElement( 2, 3, 1.0 );
+
+   m.setElement( 3, 2, 1.0 );        // 3rd row
+   m.setElement( 3, 3, 4.0 );
+
+   RealType bVector [ 4 ] = { 1, 1, 1, 1 };
+   RealType xVector [ 4 ] = { 1, 1, 1, 1 };
+
+   IndexType row = 0;
+   RealType omega = 1;
+
+   m.performSORIteration( bVector, row++, xVector, omega);
+
+   EXPECT_EQ( xVector[ 0 ], 0.0 );
+   EXPECT_EQ( xVector[ 1 ], 1.0 );
+   EXPECT_EQ( xVector[ 2 ], 1.0 );
+   EXPECT_EQ( xVector[ 3 ], 1.0 );
+
+   m.performSORIteration( bVector, row++, xVector, omega);
+
+   EXPECT_EQ( xVector[ 0 ], 0.0 );
+   EXPECT_EQ( xVector[ 1 ], 0.0 );
+   EXPECT_EQ( xVector[ 2 ], 1.0 );
+   EXPECT_EQ( xVector[ 3 ], 1.0 );
+
+   m.performSORIteration( bVector, row++, xVector, omega);
+
+   EXPECT_EQ( xVector[ 0 ], 0.0 );
+   EXPECT_EQ( xVector[ 1 ], 0.0 );
+   EXPECT_EQ( xVector[ 2 ], 0.0 );
+   EXPECT_EQ( xVector[ 3 ], 1.0 );
+
+   m.performSORIteration( bVector, row++, xVector, omega);
+
+   EXPECT_EQ( xVector[ 0 ], 0.0 );
+   EXPECT_EQ( xVector[ 1 ], 0.0 );
+   EXPECT_EQ( xVector[ 2 ], 0.0 );
+   EXPECT_EQ( xVector[ 3 ], 0.25 );
+}
+
+template< typename Matrix >
+void test_SaveAndLoad( const char* filename )
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /*
+    * Sets up the following 4x4 sparse matrix:
+    *
+    *    /  1  2  3  0 \
+    *    |  0  4  0  5 |
+    *    |  6  7  8  0 |
+    *    \  0  9 10 11 /
+    */
+
+   const IndexType m_rows = 4;
+   const IndexType m_cols = 4;
+
+   Matrix savedMatrix( m_rows, m_cols );
+   typename Matrix::CompressedRowLengthsVector rowLengths( m_rows );
+   rowLengths = 3;
+   savedMatrix.setCompressedRowLengths( rowLengths );
+
+   RealType value = 1;
+   for( IndexType i = 0; i < m_cols - 1; i++ )   // 0th row
+      savedMatrix.setElement( 0, i, value++ );
+
+   savedMatrix.setElement( 1, 1, value++ );
+   savedMatrix.setElement( 1, 3, value++ );      // 1st row
+
+   for( IndexType i = 0; i < m_cols - 1; i++ )   // 2nd row
+      savedMatrix.setElement( 2, i, value++ );
+
+   for( IndexType i = 1; i < m_cols; i++ )       // 3rd row
+      savedMatrix.setElement( 3, i, value++ );
+
+   ASSERT_NO_THROW( savedMatrix.save( filename ) );
+
+   Matrix loadedMatrix( m_rows, m_cols );
+   typename Matrix::CompressedRowLengthsVector rowLengths2( m_rows );
+   rowLengths2 = 3;
+   loadedMatrix.setCompressedRowLengths( rowLengths2 );
+
+   ASSERT_NO_THROW( loadedMatrix.load( filename ) );
+
+   EXPECT_EQ( savedMatrix.getElement( 0, 0 ), loadedMatrix.getElement( 0, 0 ) );
+   EXPECT_EQ( savedMatrix.getElement( 0, 1 ), loadedMatrix.getElement( 0, 1 ) );
+   EXPECT_EQ( savedMatrix.getElement( 0, 2 ), loadedMatrix.getElement( 0, 2 ) );
+   EXPECT_EQ( savedMatrix.getElement( 0, 3 ), loadedMatrix.getElement( 0, 3 ) );
+
+   EXPECT_EQ( savedMatrix.getElement( 1, 0 ), loadedMatrix.getElement( 1, 0 ) );
+   EXPECT_EQ( savedMatrix.getElement( 1, 1 ), loadedMatrix.getElement( 1, 1 ) );
+   EXPECT_EQ( savedMatrix.getElement( 1, 2 ), loadedMatrix.getElement( 1, 2 ) );
+   EXPECT_EQ( savedMatrix.getElement( 1, 3 ), loadedMatrix.getElement( 1, 3 ) );
+
+   EXPECT_EQ( savedMatrix.getElement( 2, 0 ), loadedMatrix.getElement( 2, 0 ) );
+   EXPECT_EQ( savedMatrix.getElement( 2, 1 ), loadedMatrix.getElement( 2, 1 ) );
+   EXPECT_EQ( savedMatrix.getElement( 2, 2 ), loadedMatrix.getElement( 2, 2 ) );
+   EXPECT_EQ( savedMatrix.getElement( 2, 3 ), loadedMatrix.getElement( 2, 3 ) );
+
+   EXPECT_EQ( savedMatrix.getElement( 3, 0 ), loadedMatrix.getElement( 3, 0 ) );
+   EXPECT_EQ( savedMatrix.getElement( 3, 1 ), loadedMatrix.getElement( 3, 1 ) );
+   EXPECT_EQ( savedMatrix.getElement( 3, 2 ), loadedMatrix.getElement( 3, 2 ) );
+   EXPECT_EQ( savedMatrix.getElement( 3, 3 ), loadedMatrix.getElement( 3, 3 ) );
+
+   EXPECT_EQ( savedMatrix.getElement( 0, 0 ),  1 );
+   EXPECT_EQ( savedMatrix.getElement( 0, 1 ),  2 );
+   EXPECT_EQ( savedMatrix.getElement( 0, 2 ),  3 );
+   EXPECT_EQ( savedMatrix.getElement( 0, 3 ),  0 );
+
+   EXPECT_EQ( savedMatrix.getElement( 1, 0 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 1, 1 ),  4 );
+   EXPECT_EQ( savedMatrix.getElement( 1, 2 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 1, 3 ),  5 );
+
+   EXPECT_EQ( savedMatrix.getElement( 2, 0 ),  6 );
+   EXPECT_EQ( savedMatrix.getElement( 2, 1 ),  7 );
+   EXPECT_EQ( savedMatrix.getElement( 2, 2 ),  8 );
+   EXPECT_EQ( savedMatrix.getElement( 2, 3 ),  0 );
+
+   EXPECT_EQ( savedMatrix.getElement( 3, 0 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 3, 1 ),  9 );
+   EXPECT_EQ( savedMatrix.getElement( 3, 2 ), 10 );
+   EXPECT_EQ( savedMatrix.getElement( 3, 3 ), 11 );
+
+   EXPECT_EQ( std::remove( filename ), 0 );
+}
+
+template< typename Matrix >
+void test_Print()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /*
+    * Sets up the following 5x4 sparse matrix:
+    *
+    *    /  1  2  3  0 \
+    *    |  0  0  0  4 |
+    *    |  5  6  7  0 |
+    *    |  0  8  9 10 |
+    *    \  0  0 11 12 /
+    */
+
+   const IndexType m_rows = 5;
+   const IndexType m_cols = 4;
+
+   Matrix m( m_rows, m_cols );
+   typename Matrix::CompressedRowLengthsVector rowLengths( m_rows );
+   rowLengths = 3;
+   m.setCompressedRowLengths( rowLengths );
+
+   RealType value = 1;
+   for( IndexType i = 0; i < m_cols - 1; i++ )   // 0th row
+      m.setElement( 0, i, value++ );
+
+   m.setElement( 1, 3, value++ );                // 1st row
+
+   for( IndexType i = 0; i < m_cols - 1; i++ )   // 2nd row
+      m.setElement( 2, i, value++ );
+
+   for( IndexType i = 1; i < m_cols; i++ )       // 3rd row
+      m.setElement( 3, i, value++ );
+
+   for( IndexType i = 2; i < m_cols; i++ )       // 4th row
+      m.setElement( 4, i, value++ );
+
+   std::stringstream printed;
+   std::stringstream couted;
+
+   //change the underlying buffer and save the old buffer
+   auto old_buf = std::cout.rdbuf(printed.rdbuf());
+
+   m.print( std::cout ); //all the std::cout goes to ss
+
+   std::cout.rdbuf(old_buf); //reset
+
+   couted << "Row: 0 ->  Col:0->1	 Col:1->2	 Col:2->3\t\n"
+             "Row: 1 ->  Col:3->4\t\n"
+             "Row: 2 ->  Col:0->5	 Col:1->6	 Col:2->7\t\n"
+             "Row: 3 ->  Col:1->8	 Col:2->9	 Col:3->10\t\n"
+             "Row: 4 ->  Col:2->11	 Col:3->12\t\n";
+
+   EXPECT_EQ( printed.str(), couted.str() );
+}
+
+#endif
-- 
GitLab


From 49f09e5ff82a44bde3644a11cc4de41a83f95151 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Sun, 9 Feb 2020 19:01:14 +0100
Subject: [PATCH 138/179] Implementing symmetric sparse matrix unit tests.

---
 .../Matrices/SymmetricSparseMatrixTest.h      | 325 ++++++------------
 1 file changed, 106 insertions(+), 219 deletions(-)

diff --git a/src/UnitTests/Matrices/SymmetricSparseMatrixTest.h b/src/UnitTests/Matrices/SymmetricSparseMatrixTest.h
index fd6bd8464..1beddaf7e 100644
--- a/src/UnitTests/Matrices/SymmetricSparseMatrixTest.h
+++ b/src/UnitTests/Matrices/SymmetricSparseMatrixTest.h
@@ -612,74 +612,58 @@ void test_AddElement()
    EXPECT_EQ( m.getElement( 5, 2 ),  0 );
    EXPECT_EQ( m.getElement( 5, 3 ),  0 );
    EXPECT_EQ( m.getElement( 5, 4 ), 10 );
-############################################################################
+
    // Add new elements to the old elements with a multiplying factor applied to the old elements.
    /*
     * The following setup results in the following 6x5 sparse matrix:
     *
-    *    /  3  6  9  0  0 \
-    *    |  0 12 15 18  0 |
-    *    |  0  0 21 24 27 |
-    *    | 30 11 12  0  0 |
-    *    |  0 35 14 15  0 |
-    *    \  0  0 16 41 18 /
+    *    /  1  2  0  0  0 \   /  0  1  0  0  0 \   /  2  5  0  0  0 \
+    *    |  2  3  4  0  0 |   |  1  0  1  0  0 |   |  5  6  9  0  0 |
+    * 2  |  0  4  5  6  0 | + |  0  1  0  1  0 | = |  0  9 10 13  0 |
+    *    |  0  0  6  7  8 |   |  0  0  1  0  1 |   |  0  0 13 14 17 |
+    *    |  0  0  0  8  9 |   |  0  0  0  1  0 |   |  0  0  0 17 18 |
+    *    \  0  0  0  0 10 /   \  0  0  0  0  1 /   \  0  0  0  0 21 /
     */
 
-   RealType newValue = 1;
-   for( IndexType i = 0; i < cols - 2; i++ )         // 0th row
-      m.addElement( 0, i, newValue++, 2.0 );
-
-   for( IndexType i = 1; i < cols - 1; i++ )         // 1st row
-      m.addElement( 1, i, newValue++, 2.0 );
-
-   for( IndexType i = 2; i < cols; i++ )             // 2nd row
-      m.addElement( 2, i, newValue++, 2.0 );
-
-   for( IndexType i = 0; i < cols - 2; i++ )         // 3rd row
-      m.addElement( 3, i, newValue++, 2.0 );
+   for( IndexType i = 1; i < rows; i++ )
+      m.addElement( i, i - 1, 1.0, 2.0 );
 
-   for( IndexType i = 1; i < cols - 1; i++ )         // 4th row
-      m.addElement( 4, i, newValue++, 2.0 );
 
-   for( IndexType i = 2; i < cols; i++ )             // 5th row
-      m.addElement( 5, i, newValue++, 2.0 );
-
-
-   EXPECT_EQ( m.getElement( 0, 0 ),  3 );
-   EXPECT_EQ( m.getElement( 0, 1 ),  6 );
-   EXPECT_EQ( m.getElement( 0, 2 ),  9 );
+   EXPECT_EQ( m.getElement( 0, 0 ),  2 );
+   EXPECT_EQ( m.getElement( 0, 1 ),  5 );
+   EXPECT_EQ( m.getElement( 0, 2 ),  0 );
    EXPECT_EQ( m.getElement( 0, 3 ),  0 );
    EXPECT_EQ( m.getElement( 0, 4 ),  0 );
 
-   EXPECT_EQ( m.getElement( 1, 0 ),  0 );
-   EXPECT_EQ( m.getElement( 1, 1 ), 12 );
-   EXPECT_EQ( m.getElement( 1, 2 ), 15 );
-   EXPECT_EQ( m.getElement( 1, 3 ), 18 );
+   EXPECT_EQ( m.getElement( 1, 0 ),  5 );
+   EXPECT_EQ( m.getElement( 1, 1 ),  6 );
+   EXPECT_EQ( m.getElement( 1, 2 ),  9 );
+   EXPECT_EQ( m.getElement( 1, 3 ),  0 );
    EXPECT_EQ( m.getElement( 1, 4 ),  0 );
 
    EXPECT_EQ( m.getElement( 2, 0 ),  0 );
-   EXPECT_EQ( m.getElement( 2, 1 ),  0 );
-   EXPECT_EQ( m.getElement( 2, 2 ), 21 );
-   EXPECT_EQ( m.getElement( 2, 3 ), 24 );
-   EXPECT_EQ( m.getElement( 2, 4 ), 27 );
-
-   EXPECT_EQ( m.getElement( 3, 0 ), 30 );
-   EXPECT_EQ( m.getElement( 3, 1 ), 11 );
-   EXPECT_EQ( m.getElement( 3, 2 ), 12 );
-   EXPECT_EQ( m.getElement( 3, 3 ),  0 );
-   EXPECT_EQ( m.getElement( 3, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 1 ),  9 );
+   EXPECT_EQ( m.getElement( 2, 2 ), 10 );
+   EXPECT_EQ( m.getElement( 2, 3 ), 13 );
+   EXPECT_EQ( m.getElement( 2, 4 ),  0 );
+
+   EXPECT_EQ( m.getElement( 3, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 2 ), 13 );
+   EXPECT_EQ( m.getElement( 3, 3 ), 14 );
+   EXPECT_EQ( m.getElement( 3, 4 ), 17 );
 
    EXPECT_EQ( m.getElement( 4, 0 ),  0 );
-   EXPECT_EQ( m.getElement( 4, 1 ), 35 );
-   EXPECT_EQ( m.getElement( 4, 2 ), 14 );
-   EXPECT_EQ( m.getElement( 4, 3 ), 15 );
-   EXPECT_EQ( m.getElement( 4, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 3 ), 17 );
+   EXPECT_EQ( m.getElement( 4, 4 ), 18 );
 
    EXPECT_EQ( m.getElement( 5, 0 ),  0 );
    EXPECT_EQ( m.getElement( 5, 1 ),  0 );
-   EXPECT_EQ( m.getElement( 5, 2 ), 16 );
-   EXPECT_EQ( m.getElement( 5, 3 ), 41 );
-   EXPECT_EQ( m.getElement( 5, 4 ), 18 );
+   EXPECT_EQ( m.getElement( 5, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 4 ), 21 );
 }
 
 template< typename Matrix >
@@ -690,226 +674,129 @@ void test_VectorProduct()
    using IndexType = typename Matrix::IndexType;
    using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >;
 
-   /*
+   /**
     * Sets up the following 4x4 sparse matrix:
     *
     *    /  1  0  0  0 \
-    *    |  0  2  0  3 |
-    *    |  0  4  0  0 |
-    *    \  0  0  5  0 /
+    *    |  0  2  3  4 |
+    *    |  0  3  0  5 |
+    *    \  0  4  5  0 /
     */
 
    const IndexType m_rows_1 = 4;
    const IndexType m_cols_1 = 4;
 
-   Matrix m_1;
-   m_1.reset();
-   m_1.setDimensions( m_rows_1, m_cols_1 );
-   typename Matrix::CompressedRowLengthsVector rowLengths_1;
-   rowLengths_1.setSize( m_rows_1 );
-   rowLengths_1.setElement( 0, 1 );
-   rowLengths_1.setElement( 1, 2 );
-   rowLengths_1.setElement( 2, 1 );
-   rowLengths_1.setElement( 3, 1 );
-   m_1.setCompressedRowLengths( rowLengths_1 );
-
-   RealType value_1 = 1;
-   m_1.setElement( 0, 0, value_1++ );      // 0th row
-
-   m_1.setElement( 1, 1, value_1++ );      // 1st row
-   m_1.setElement( 1, 3, value_1++ );
-
-   m_1.setElement( 2, 1, value_1++ );      // 2nd row
-
-   m_1.setElement( 3, 2, value_1++ );      // 3rd row
-
-   VectorType inVector_1;
-   inVector_1.setSize( m_cols_1 );
-   for( IndexType i = 0; i < inVector_1.getSize(); i++ )
-       inVector_1.setElement( i, 2 );
-
-   VectorType outVector_1;
-   outVector_1.setSize( m_rows_1 );
-   for( IndexType j = 0; j < outVector_1.getSize(); j++ )
-       outVector_1.setElement( j, 0 );
-
+   Matrix m_1( m_rows_1, m_cols_1, {
+      { 0, 0, 1 },
+                   { 1, 1, 2 },
+                   { 2, 1, 3 },
+                   { 3, 1, 4 }, { 3, 2, 5 } } );
 
+   VectorType inVector_1( m_cols, 2 );
+   VectorType outVector_1( m_rows, 1 );
    m_1.vectorProduct( inVector_1, outVector_1 );
 
-
    EXPECT_EQ( outVector_1.getElement( 0 ),  2 );
-   EXPECT_EQ( outVector_1.getElement( 1 ), 10 );
-   EXPECT_EQ( outVector_1.getElement( 2 ),  8 );
-   EXPECT_EQ( outVector_1.getElement( 3 ), 10 );
+   EXPECT_EQ( outVector_1.getElement( 1 ), 18 );
+   EXPECT_EQ( outVector_1.getElement( 2 ), 16 );
+   EXPECT_EQ( outVector_1.getElement( 3 ), 18 );
 
-   /*
+   /**
     * Sets up the following 4x4 sparse matrix:
     *
     *    /  1  2  3  0 \
-    *    |  0  0  0  4 |
-    *    |  5  6  7  0 |
-    *    \  0  8  0  0 /
+    *    |  2  0  6  8 |
+    *    |  3  6  7  0 |
+    *    \  0  8  0  9 /
     */
 
    const IndexType m_rows_2 = 4;
    const IndexType m_cols_2 = 4;
 
-   Matrix m_2( m_rows_2, m_cols_2 );
-   typename Matrix::CompressedRowLengthsVector rowLengths_2{ 3, 1, 3, 1 };
-   /*rowLengths_2 = 3;
-   rowLengths_2.setElement( 1, 1 );
-   rowLengths_2.setElement( 3, 1 );*/
-   m_2.setCompressedRowLengths( rowLengths_2 );
-
-   RealType value_2 = 1;
-   for( IndexType i = 0; i < 3; i++ )   // 0th row
-      m_2.setElement( 0, i, value_2++ );
-
-   m_2.setElement( 1, 3, value_2++ );      // 1st row
-
-   for( IndexType i = 0; i < 3; i++ )   // 2nd row
-      m_2.setElement( 2, i, value_2++ );
-
-   for( IndexType i = 1; i < 2; i++ )       // 3rd row
-      m_2.setElement( 3, i, value_2++ );
-
-   VectorType inVector_2;
-   inVector_2.setSize( m_cols_2 );
-   for( IndexType i = 0; i < inVector_2.getSize(); i++ )
-      inVector_2.setElement( i, 2 );
-
-   VectorType outVector_2;
-   outVector_2.setSize( m_rows_2 );
-   for( IndexType j = 0; j < outVector_2.getSize(); j++ )
-      outVector_2.setElement( j, 0 );
+   Matrix m_2( m_rows_2, m_cols_2, {
+      { 0, 0, 1 }, { 0, 1, 2 }, { 0, 2, 3 },
+      { 1, 0, 2 },              { 1, 2, 6 }, { 1, 3, 8 },
+      { 2, 0, 3 }, { 2, 1, 6 }, { 2, 2, 7 },
+                   { 3, 2, 8 },              { 3, 3, 9 } } );
 
+   VectorType inVector_2( m_cols_2, 2 );
+   VectorType outVector_2( m_rows_2, 0 );
    m_2.vectorProduct( inVector_2, outVector_2 );
 
    EXPECT_EQ( outVector_2.getElement( 0 ), 12 );
-   EXPECT_EQ( outVector_2.getElement( 1 ),  8 );
-   EXPECT_EQ( outVector_2.getElement( 2 ), 36 );
-   EXPECT_EQ( outVector_2.getElement( 3 ), 16 );
+   EXPECT_EQ( outVector_2.getElement( 1 ), 32 );
+   EXPECT_EQ( outVector_2.getElement( 2 ), 32 );
+   EXPECT_EQ( outVector_2.getElement( 3 ), 34 );
 
    /*
     * Sets up the following 4x4 sparse matrix:
     *
-    *    /  1  2  3  0 \
-    *    |  0  4  5  6 |
-    *    |  7  8  9  0 |
-    *    \  0 10 11 12 /
+    *    /  1  2  3  4 \
+    *    |  2  5  0  0 |
+    *    |  3  0  6  0 |
+    *    \  4  0  0  7 /
     */
 
    const IndexType m_rows_3 = 4;
    const IndexType m_cols_3 = 4;
 
-   Matrix m_3( m_rows_3, m_cols_3 );
-   typename Matrix::CompressedRowLengthsVector rowLengths_3{ 3, 3, 3, 3 };
-   m_3.setCompressedRowLengths( rowLengths_3 );
-
-   RealType value_3 = 1;
-   for( IndexType i = 0; i < 3; i++ )          // 0th row
-      m_3.setElement( 0, i, value_3++ );
-
-   for( IndexType i = 1; i < 4; i++ )
-      m_3.setElement( 1, i, value_3++ );      // 1st row
-
-   for( IndexType i = 0; i < 3; i++ )          // 2nd row
-      m_3.setElement( 2, i, value_3++ );
-
-   for( IndexType i = 1; i < 4; i++ )          // 3rd row
-      m_3.setElement( 3, i, value_3++ );
-
-   VectorType inVector_3;
-   inVector_3.setSize( m_cols_3 );
-   for( IndexType i = 0; i < inVector_3.getSize(); i++ )
-      inVector_3.setElement( i, 2 );
-
-   VectorType outVector_3;
-   outVector_3.setSize( m_rows_3 );
-   for( IndexType j = 0; j < outVector_3.getSize(); j++ )
-      outVector_3.setElement( j, 0 );
+   Matrix m_3( m_rows_3, m_cols_3, {
+      { 0, 0, 1 }, { 0, 1, 2 }, { 0, 2, 3 }, { 0, 3, 4 },
+      { 1, 0, 2 }, { 1, 1, 5 },
+      { 2, 0, 3 }, { 2, 2, 6 },
+      { 3, 0, 4 }, { 3, 3, 7 }
+   } );
 
+   VectorType inVector_3( { 0, 1, 2, 3 } );
+   VectorType outVector_3( m_rows_3, 0 );
    m_3.vectorProduct( inVector_3, outVector_3 );
 
-   EXPECT_EQ( outVector_3.getElement( 0 ), 12 );
-   EXPECT_EQ( outVector_3.getElement( 1 ), 30 );
-   EXPECT_EQ( outVector_3.getElement( 2 ), 48 );
-   EXPECT_EQ( outVector_3.getElement( 3 ), 66 );
+   EXPECT_EQ( outVector_3.getElement( 0 ), 20 );
+   EXPECT_EQ( outVector_3.getElement( 1 ),  5 );
+   EXPECT_EQ( outVector_3.getElement( 2 ), 12 );
+   EXPECT_EQ( outVector_3.getElement( 3 ), 21 );
 
    /*
     * Sets up the following 8x8 sparse matrix:
     *
-    *    /  1  2  3  0  0  4  0  0 \
-    *    |  0  5  6  7  8  0  0  0 |
-    *    |  9 10 11 12 13  0  0  0 |
-    *    |  0 14 15 16 17  0  0  0 |
-    *    |  0  0 18 19 20 21  0  0 |
-    *    |  0  0  0 22 23 24 25  0 |
-    *    | 26 27 28 29 30  0  0  0 |
-    *    \ 31 32 33 34 35  0  0  0 /
+    *    /  1  0  3  0  9  0 15  0 \
+    *    |  0  2  0  6  0 12  0 19 |
+    *    |  3  0  5  0 10  0 16  0 |
+    *    |  0  6  0  8  0 13  0 20 |
+    *    |  9  0 10  0 11  0 17  0 |
+    *    |  0 12  0 13  0 14  0 21 |
+    *    | 15  0 16  0 17  0 18  0 |
+    *    \  0 19  0 20  0 21  0 22 /
     */
 
    const IndexType m_rows_4 = 8;
    const IndexType m_cols_4 = 8;
 
-   Matrix m_4( m_rows_4, m_cols_4 );
-   typename Matrix::CompressedRowLengthsVector rowLengths_4{ 4, 4, 5, 4, 4, 4, 5, 5 };
-   /*rowLengths_4.setSize( m_rows_4 );
-   rowLengths_4.setValue( 4 );
-   rowLengths_4.setElement( 2, 5 );
-   rowLengths_4.setElement( 6, 5 );
-   rowLengths_4.setElement( 7, 5 );*/
-   m_4.setCompressedRowLengths( rowLengths_4 );
-
-   RealType value_4 = 1;
-   for( IndexType i = 0; i < 3; i++ )       // 0th row
-      m_4.setElement( 0, i, value_4++ );
-
-   m_4.setElement( 0, 5, value_4++ );
-
-   for( IndexType i = 1; i < 5; i++ )       // 1st row
-      m_4.setElement( 1, i, value_4++ );
-
-   for( IndexType i = 0; i < 5; i++ )       // 2nd row
-      m_4.setElement( 2, i, value_4++ );
-
-   for( IndexType i = 1; i < 5; i++ )       // 3rd row
-      m_4.setElement( 3, i, value_4++ );
-
-   for( IndexType i = 2; i < 6; i++ )       // 4th row
-      m_4.setElement( 4, i, value_4++ );
-
-   for( IndexType i = 3; i < 7; i++ )       // 5th row
-      m_4.setElement( 5, i, value_4++ );
-
-   for( IndexType i = 0; i < 5; i++ )       // 6th row
-      m_4.setElement( 6, i, value_4++ );
-
-   for( IndexType i = 0; i < 5; i++ )       // 7th row
-      m_4.setElement( 7, i, value_4++ );
-
-   VectorType inVector_4;
-   inVector_4.setSize( m_cols_4 );
-   for( IndexType i = 0; i < inVector_4.getSize(); i++ )
-      inVector_4.setElement( i, 2 );
-
-   VectorType outVector_4;
-   outVector_4.setSize( m_rows_4 );
-   for( IndexType j = 0; j < outVector_4.getSize(); j++ )
-      outVector_4.setElement( j, 0 );
+   Matrix m_4( m_rows_4, m_cols_4, {
+      { 0, 0,  1 },
+                    { 1, 1,  2 },
+      { 2, 0,  3 },               { 2, 2, 5 },
+                    { 3, 1,  6 },               { 3, 3, 8 },
+      { 4, 0,  9 },               { 4, 2, 10 },               { 4, 4, 11 },
+                    { 5, 1, 12 },               { 5, 3, 13 },               { 5, 5, 14 },
+      { 6, 0, 15 },               { 6, 2, 16 },               { 6, 4, 17 },               { 6, 6, 18 },
+                    { 7, 1, 19 },               { 7, 3, 20 },               { 7, 5, 21 },               { 7, 7, 22 }
+   } );
 
+   VectorType inVector_4 { 1, 2, 1, 2, 1, 2, 1, 2 };
+   VectorType outVector_4( m_rows_4, 0 );
    m_4.vectorProduct( inVector_4, outVector_4 );
 
-   EXPECT_EQ( outVector_4.getElement( 0 ),  20 );
-   EXPECT_EQ( outVector_4.getElement( 1 ),  52 );
-   EXPECT_EQ( outVector_4.getElement( 2 ), 110 );
-   EXPECT_EQ( outVector_4.getElement( 3 ), 124 );
-   EXPECT_EQ( outVector_4.getElement( 4 ), 156 );
-   EXPECT_EQ( outVector_4.getElement( 5 ), 188 );
-   EXPECT_EQ( outVector_4.getElement( 6 ), 280 );
-   EXPECT_EQ( outVector_4.getElement( 7 ), 330 );
-
+   EXPECT_EQ( outVector_4.getElement( 0 ),  28 );
+   EXPECT_EQ( outVector_4.getElement( 1 ),  78 );
+   EXPECT_EQ( outVector_4.getElement( 2 ),  34 );
+   EXPECT_EQ( outVector_4.getElement( 3 ),  94 );
+   EXPECT_EQ( outVector_4.getElement( 4 ),  47 );
+   EXPECT_EQ( outVector_4.getElement( 5 ), 120 );
+   EXPECT_EQ( outVector_4.getElement( 6 ),  66 );
+   EXPECT_EQ( outVector_4.getElement( 7 ), 164 );
 
+############################################
    /*
     * Sets up the following 8x8 sparse matrix:
     *
-- 
GitLab


From e841b7d7e2c30e3acf3bc87578a1d3370c0cd22c Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Tue, 11 Feb 2020 10:58:19 +0100
Subject: [PATCH 139/179] Fixed save and load in sparse and dense matrix.

---
 src/TNL/Matrices/Dense.hpp                |  2 ++
 src/TNL/Matrices/DenseMatrixView.hpp      |  1 +
 src/TNL/Matrices/SparseMatrix.hpp         |  1 +
 src/UnitTests/Matrices/DenseMatrixTest.h  |  2 +-
 src/UnitTests/Matrices/SparseMatrixTest.h | 10 ++++------
 5 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/src/TNL/Matrices/Dense.hpp b/src/TNL/Matrices/Dense.hpp
index 91a98e7f9..0d7037b1f 100644
--- a/src/TNL/Matrices/Dense.hpp
+++ b/src/TNL/Matrices/Dense.hpp
@@ -1128,6 +1128,8 @@ template< typename Real,
 void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::load( File& file )
 {
    Matrix< Real, Device, Index >::load( file );
+   this->segments.load( file );
+   this->view = this->getView();
 }
 
 template< typename Real,
diff --git a/src/TNL/Matrices/DenseMatrixView.hpp b/src/TNL/Matrices/DenseMatrixView.hpp
index a11ff263c..00ca5edc2 100644
--- a/src/TNL/Matrices/DenseMatrixView.hpp
+++ b/src/TNL/Matrices/DenseMatrixView.hpp
@@ -920,6 +920,7 @@ template< typename Real,
 void DenseMatrixView< Real, Device, Index, RowMajorOrder >::save( File& file ) const
 {
    MatrixView< Real, Device, Index >::save( file );
+   this->segments.save( file );
 }
 
 template< typename Real,
diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp
index df841230c..03273c98b 100644
--- a/src/TNL/Matrices/SparseMatrix.hpp
+++ b/src/TNL/Matrices/SparseMatrix.hpp
@@ -858,6 +858,7 @@ load( File& file )
    Matrix< RealType, DeviceType, IndexType >::load( file );
    file >> this->columnIndexes;
    this->segments.load( file );
+   this->view = this->getView();
 }
 
 template< typename Real,
diff --git a/src/UnitTests/Matrices/DenseMatrixTest.h b/src/UnitTests/Matrices/DenseMatrixTest.h
index a3e7e8f61..37ae58bf1 100644
--- a/src/UnitTests/Matrices/DenseMatrixTest.h
+++ b/src/UnitTests/Matrices/DenseMatrixTest.h
@@ -1234,7 +1234,7 @@ void test_SaveAndLoad()
 
     ASSERT_NO_THROW( savedMatrix.save( TEST_FILE_NAME ) );
 
-    Matrix loadedMatrix( rows, cols );
+    Matrix loadedMatrix;
 
     ASSERT_NO_THROW( loadedMatrix.load( TEST_FILE_NAME ) );
 
diff --git a/src/UnitTests/Matrices/SparseMatrixTest.h b/src/UnitTests/Matrices/SparseMatrixTest.h
index 72eb0b33b..45dc40578 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest.h
+++ b/src/UnitTests/Matrices/SparseMatrixTest.h
@@ -48,17 +48,17 @@ void test_Constructors()
    EXPECT_EQ( m1.getColumns(), 6 );
 
    Matrix m2( {1, 2, 2, 2, 1 }, 5 );
-   typename Matrix::RowsCapacitiesType v1, v2{ 1, 2, 2, 2, 1 }; 
+   typename Matrix::RowsCapacitiesType v1, v2{ 1, 2, 2, 2, 1 };
    m2.setElement( 0, 0, 1 );   // 0th row
    m2.setElement( 1, 0, 1 );   // 1st row
-   m2.setElement( 1, 1, 1 );   
+   m2.setElement( 1, 1, 1 );
    m2.setElement( 2, 1, 1 );   // 2nd row
    m2.setElement( 2, 2, 1 );
    m2.setElement( 3, 2, 1 );   // 3rd row
    m2.setElement( 3, 3, 1 );
    m2.setElement( 4, 4, 1 );   // 4th row
    m2.getCompressedRowLengths( v1 );
-   
+
    EXPECT_EQ( v1, v2 );
 
    /*
@@ -1297,9 +1297,7 @@ void test_SaveAndLoad( const char* filename )
 
    ASSERT_NO_THROW( savedMatrix.save( filename ) );
 
-   Matrix loadedMatrix( m_rows, m_cols );
-   typename Matrix::CompressedRowLengthsVector rowLengths2( m_rows, 3 );
-   loadedMatrix.setCompressedRowLengths( rowLengths2 );
+   Matrix loadedMatrix;
 
    ASSERT_NO_THROW( loadedMatrix.load( filename ) );
 
-- 
GitLab


From 058f8ae9654fe8ccf3783cd1976a1a4c5bb95aa3 Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Tue, 11 Feb 2020 10:59:04 +0100
Subject: [PATCH 140/179] Implementing unit tests for symmetric sparse matrix.

---
 .../Matrices/SymmetricSparseMatrixTest.h      | 351 ++++++++----------
 1 file changed, 148 insertions(+), 203 deletions(-)

diff --git a/src/UnitTests/Matrices/SymmetricSparseMatrixTest.h b/src/UnitTests/Matrices/SymmetricSparseMatrixTest.h
index 1beddaf7e..8593ff3f1 100644
--- a/src/UnitTests/Matrices/SymmetricSparseMatrixTest.h
+++ b/src/UnitTests/Matrices/SymmetricSparseMatrixTest.h
@@ -569,7 +569,7 @@ void test_AddElement()
    const IndexType cols = 5;
 
    Matrix m( 6, 5, {
-      { 0, 0, 1 }, 
+      { 0, 0, 1 },
       { 1, 0, 2 }, { 1, 1, 3 },
                    { 2, 1, 4 }, { 2, 2, 5 },
                                 { 3, 2, 6 }, { 3, 3, 7 },
@@ -796,79 +796,45 @@ void test_VectorProduct()
    EXPECT_EQ( outVector_4.getElement( 6 ),  66 );
    EXPECT_EQ( outVector_4.getElement( 7 ), 164 );
 
-############################################
    /*
     * Sets up the following 8x8 sparse matrix:
     *
-    *    /  1  2  3  0  4  5  0  1 \   6
-    *    |  0  6  0  7  0  0  0  1 |   3
-    *    |  0  8  9  0 10  0  0  1 |   4
-    *    |  0 11 12 13 14  0  0  1 |   5
-    *    |  0 15  0  0  0  0  0  1 |   2
-    *    |  0 16 17 18 19 20 21  1 |   7
-    *    | 22 23 24 25 26 27 28  1 |   8
-    *    \ 29 30 31 32 33 34 35 36 /   8
+    *    /  1  0  0  0  0  0  0  0 \
+    *    |  0  2  0  0  0  0  0  0 |
+    *    |  0  0  3  4  6  9  0  0 |
+    *    |  0  0  4  5  7 10  0  0 |
+    *    |  0  0  6  7  8 11  0  0 |
+    *    |  0  0  9 10 11 12  0  0 |
+    *    |  0  0  0  0  0  0 13  0 |
+    *    \  0  0  0  0  0  0  0 14 /
     */
 
    const IndexType m_rows_5 = 8;
    const IndexType m_cols_5 = 8;
 
-   Matrix m_5( m_rows_5, m_cols_5 );
-   typename Matrix::CompressedRowLengthsVector rowLengths_5{ 6, 3, 4, 5, 2, 7, 8, 8 };
-   m_5.setCompressedRowLengths( rowLengths_5 );
-
-   RealType value_5 = 1;
-   for( IndexType i = 0; i < 3; i++ )   // 0th row
-      m_5.setElement( 0, i, value_5++ );
-
-   m_5.setElement( 0, 4, value_5++ );           // 0th row
-   m_5.setElement( 0, 5, value_5++ );
-
-   m_5.setElement( 1, 1, value_5++ );           // 1st row
-   m_5.setElement( 1, 3, value_5++ );
-
-   for( IndexType i = 1; i < 3; i++ )            // 2nd row
-      m_5.setElement( 2, i, value_5++ );
-
-   m_5.setElement( 2, 4, value_5++ );           // 2nd row
-
-   for( IndexType i = 1; i < 5; i++ )            // 3rd row
-      m_5.setElement( 3, i, value_5++ );
-
-   m_5.setElement( 4, 1, value_5++ );           // 4th row
-
-   for( IndexType i = 1; i < 7; i++ )            // 5th row
-      m_5.setElement( 5, i, value_5++ );
-
-   for( IndexType i = 0; i < 7; i++ )            // 6th row
-      m_5.setElement( 6, i, value_5++ );
-
-   for( IndexType i = 0; i < 8; i++ )            // 7th row
-      m_5.setElement( 7, i, value_5++ );
-
-   for( IndexType i = 0; i < 7; i++ )            // 1s at the end of rows
-      m_5.setElement( i, 7, 1);
-
-   VectorType inVector_5;
-   inVector_5.setSize( m_cols_5 );
-   for( IndexType i = 0; i < inVector_5.getSize(); i++ )
-       inVector_5.setElement( i, 2 );
-
-   VectorType outVector_5;
-   outVector_5.setSize( m_rows_5 );
-   for( IndexType j = 0; j < outVector_5.getSize(); j++ )
-       outVector_5.setElement( j, 0 );
+   Matrix m_5( m_rows_5, m_cols_5,{
+      { 0, 0, 1 },
+                   { 1, 1, 2, },
+                                 { 2, 2, 3 }, { 2, 3,  4 }, { 2, 4,  6 }, { 2, 5,  9 },
+                                 { 3, 2, 4 }, { 3, 3,  5 }, { 3, 4,  7 }, { 3, 5, 10 },
+                                 { 4, 2, 6 }, { 4, 3,  7 }, { 4, 4,  8 }, { 4, 5, 11 },
+                                 { 5, 2, 9 }, { 5, 3, 10 }, { 5, 4, 11 }, { 5, 5, 12 },
+                                                                                        { 6, 6, 13 },
+                                                                                                      { 7, 7, 14 }
+   } );
 
+   VectorType inVector_5( m_cols_5, { 1, 2, 3, 4, 5, 6, 7, 8 } );
+   VectorType outVector_5( m_rows_5, 0.0 );
    m_5.vectorProduct( inVector_5, outVector_5 );
 
-   EXPECT_EQ( outVector_5.getElement( 0 ),  32 );
-   EXPECT_EQ( outVector_5.getElement( 1 ),  28 );
-   EXPECT_EQ( outVector_5.getElement( 2 ),  56 );
-   EXPECT_EQ( outVector_5.getElement( 3 ), 102 );
-   EXPECT_EQ( outVector_5.getElement( 4 ),  32 );
-   EXPECT_EQ( outVector_5.getElement( 5 ), 224 );
-   EXPECT_EQ( outVector_5.getElement( 6 ), 352 );
-   EXPECT_EQ( outVector_5.getElement( 7 ), 520 );
+   EXPECT_EQ( outVector_5.getElement( 0 ), 1*1 );
+   EXPECT_EQ( outVector_5.getElement( 1 ), 2*2 );
+   EXPECT_EQ( outVector_5.getElement( 2 ), 3*3 + 4*4  + 5*6  + 6*9 );
+   EXPECT_EQ( outVector_5.getElement( 3 ), 3*4 + 4*5  + 5*7  + 6*10 );
+   EXPECT_EQ( outVector_5.getElement( 4 ), 3*6 + 4*7  + 5*8  + 6*11 );
+   EXPECT_EQ( outVector_5.getElement( 5 ), 3*9 + 4*10 + 5*11 + 6*12 );
+   EXPECT_EQ( outVector_5.getElement( 6 ), 7*13 );
+   EXPECT_EQ( outVector_5.getElement( 7 ), 8*14 );
 }
 
 template< typename Matrix >
@@ -881,55 +847,29 @@ void test_RowsReduction()
    /*
     * Sets up the following 8x8 sparse matrix:
     *
-    *    /  1  2  3  0  4  5  0  1 \   6
-    *    |  0  6  0  7  0  0  0  1 |   3
-    *    |  0  8  9  0 10  0  0  1 |   4
-    *    |  0 11 12 13 14  0  0  1 |   5
-    *    |  0 15  0  0  0  0  0  1 |   2
-    *    |  0 16 17 18 19 20 21  1 |   7
-    *    | 22 23 24 25 26 27 28  1 |   8
-    *    \ 29 30 31 32 33 34 35 36 /   8
+    *    /  1  0  0  0  0  0  0  0 \
+    *    |  0  2  0  0  0  0  0  0 |
+    *    |  0  0  3  4  6  9  0  0 |
+    *    |  0  0  4  5  7 10  0  0 |
+    *    |  0  0  6  7  8 11  0  0 |
+    *    |  0  0  9 10 11 12  0  0 |
+    *    |  0  0  0  0  0  0 13  0 |
+    *    \  0  0  0  0  0  0  0 14 /
     */
 
-   const IndexType rows = 8;
-   const IndexType cols = 8;
-
-   Matrix m;
-   m.setDimensions( rows, cols );
-   typename Matrix::RowsCapacitiesType rowsCapacities{ 6, 3, 4, 5, 2, 7, 8, 8 };
-   m.setCompressedRowLengths( rowsCapacities );
-
-   RealType value = 1;
-   for( IndexType i = 0; i < 3; i++ )   // 0th row
-      m.setElement( 0, i, value++ );
-
-   m.setElement( 0, 4, value++ );       // 0th row
-   m.setElement( 0, 5, value++ );
-
-   m.setElement( 1, 1, value++ );       // 1st row
-   m.setElement( 1, 3, value++ );
-
-   for( IndexType i = 1; i < 3; i++ )   // 2nd row
-      m.setElement( 2, i, value++ );
-
-   m.setElement( 2, 4, value++ );       // 2nd row
-
-   for( IndexType i = 1; i < 5; i++ )   // 3rd row
-      m.setElement( 3, i, value++ );
-
-   m.setElement( 4, 1, value++ );       // 4th row
-
-   for( IndexType i = 1; i < 7; i++ )   // 5th row
-      m.setElement( 5, i, value++ );
-
-   for( IndexType i = 0; i < 7; i++ )   // 6th row
-      m.setElement( 6, i, value++ );
-
-   for( IndexType i = 0; i < 8; i++ )   // 7th row
-       m.setElement( 7, i, value++ );
+   const IndexType m_rows_5 = 8;
+   const IndexType m_cols_5 = 8;
 
-   for( IndexType i = 0; i < 7; i++ )   // 1s at the end of rows
-      m.setElement( i, 7, 1);
+   Matrix m_5( m_rows_5, m_cols_5,{
+      { 0, 0, 1 },
+                   { 1, 1, 2, },
+                                 { 2, 2, 3 }, { 2, 3,  4 }, { 2, 4,  6 }, { 2, 5,  9 },
+                                 { 3, 2, 4 }, { 3, 3,  5 }, { 3, 4,  7 }, { 3, 5, 10 },
+                                 { 4, 2, 6 }, { 4, 3,  7 }, { 4, 4,  8 }, { 4, 5, 11 },
+                                 { 5, 2, 9 }, { 5, 3, 10 }, { 5, 4, 11 }, { 5, 5, 12 },
+                                                                                        { 6, 6, 13 },
+                                                                                                      { 7, 7, 14 }
+   } );
 
    ////
    // Compute number of non-zero elements in rows.
@@ -986,24 +926,13 @@ void test_PerformSORIteration()
    const IndexType m_rows = 4;
    const IndexType m_cols = 4;
 
-   Matrix m( m_rows, m_cols );
-   typename Matrix::CompressedRowLengthsVector rowLengths( m_rows );
-   rowLengths = 3;
-   m.setCompressedRowLengths( rowLengths );
-
-   m.setElement( 0, 0, 4.0 );        // 0th row
-   m.setElement( 0, 1, 1.0);
-
-   m.setElement( 1, 0, 1.0 );        // 1st row
-   m.setElement( 1, 1, 4.0 );
-   m.setElement( 1, 2, 1.0 );
-
-   m.setElement( 2, 1, 1.0 );        // 2nd row
-   m.setElement( 2, 2, 4.0 );
-   m.setElement( 2, 3, 1.0 );
-
-   m.setElement( 3, 2, 1.0 );        // 3rd row
-   m.setElement( 3, 3, 4.0 );
+   Matrix m( m_rows, m_cols, {
+      { 0, 0, 4 }, { 0, 1, 1 },
+      { 1, 0, 1 }, { 1, 1, 4 }, { 1, 2, 1 },
+                   { 2, 1, 1 }, { 2, 2, 4 }, { 2, 3, 1 },
+                                { 3, 2, 1 }, { 3, 3, 4 }, { 3, 4, 1 },
+                                             { 4, 3, 1 }, { 4, 4, 4 }
+   } );
 
    RealType bVector [ 4 ] = { 1, 1, 1, 1 };
    RealType xVector [ 4 ] = { 1, 1, 1, 1 };
@@ -1048,41 +977,70 @@ void test_SaveAndLoad( const char* filename )
    using IndexType = typename Matrix::IndexType;
 
    /*
-    * Sets up the following 4x4 sparse matrix:
+    * Sets up the following 6x5 sparse matrix:
     *
-    *    /  1  2  3  0 \
-    *    |  0  4  0  5 |
-    *    |  6  7  8  0 |
-    *    \  0  9 10 11 /
+    *    /  1  2  0  0  0 \
+    *    |  2  3  4  0  0 |
+    *    |  0  4  5  6  0 |
+    *    |  0  0  6  7  8 |
+    *    |  0  0  0  8  9 |
+    *    \  0  0  0  0 10 /
     */
 
-   const IndexType m_rows = 4;
-   const IndexType m_cols = 4;
+   const IndexType rows = 6;
+   const IndexType cols = 5;
 
-   Matrix savedMatrix( m_rows, m_cols );
-   typename Matrix::CompressedRowLengthsVector rowLengths( m_rows );
-   rowLengths = 3;
-   savedMatrix.setCompressedRowLengths( rowLengths );
+   Matrix savedMatrix( 6, 5, {
+      { 0, 0, 1 },
+      { 1, 0, 2 }, { 1, 1, 3 },
+                   { 2, 1, 4 }, { 2, 2, 5 },
+                                { 3, 2, 6 }, { 3, 3, 7 },
+                                             { 4, 3, 8 }, { 4, 4,  9 },
+                                                          { 5, 5, 10 } } );
 
-   RealType value = 1;
-   for( IndexType i = 0; i < m_cols - 1; i++ )   // 0th row
-      savedMatrix.setElement( 0, i, value++ );
+   // Check the set elements
+   EXPECT_EQ( m.getElement( 0, 0 ),  1 );
+   EXPECT_EQ( m.getElement( 0, 1 ),  2 );
+   EXPECT_EQ( m.getElement( 0, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 4 ),  0 );
 
-   savedMatrix.setElement( 1, 1, value++ );
-   savedMatrix.setElement( 1, 3, value++ );      // 1st row
+   EXPECT_EQ( m.getElement( 1, 0 ),  2 );
+   EXPECT_EQ( m.getElement( 1, 1 ),  3 );
+   EXPECT_EQ( m.getElement( 1, 2 ),  4 );
+   EXPECT_EQ( m.getElement( 1, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 4 ),  0 );
 
-   for( IndexType i = 0; i < m_cols - 1; i++ )   // 2nd row
-      savedMatrix.setElement( 2, i, value++ );
+   EXPECT_EQ( m.getElement( 2, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 1 ),  4 );
+   EXPECT_EQ( m.getElement( 2, 2 ),  5 );
+   EXPECT_EQ( m.getElement( 2, 3 ),  6 );
+   EXPECT_EQ( m.getElement( 2, 4 ),  0 );
 
-   for( IndexType i = 1; i < m_cols; i++ )       // 3rd row
-      savedMatrix.setElement( 3, i, value++ );
+   EXPECT_EQ( m.getElement( 3, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 2 ),  6 );
+   EXPECT_EQ( m.getElement( 3, 3 ),  7 );
+   EXPECT_EQ( m.getElement( 3, 4 ),  8 );
+
+   EXPECT_EQ( m.getElement( 4, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 3 ),  8 );
+   EXPECT_EQ( m.getElement( 4, 4 ),  9 );
+
+   EXPECT_EQ( m.getElement( 5, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 4 ), 10 );
 
    ASSERT_NO_THROW( savedMatrix.save( filename ) );
 
-   Matrix loadedMatrix( m_rows, m_cols );
-   typename Matrix::CompressedRowLengthsVector rowLengths2( m_rows );
-   rowLengths2 = 3;
-   loadedMatrix.setCompressedRowLengths( rowLengths2 );
+   Matrix loadedMatrix;
+   //typename Matrix::CompressedRowLengthsVector rowLengths2( m_rows );
+   //rowLengths2 = 3;
+   //loadedMatrix.setCompressedRowLengths( rowLengths2 );
 
    ASSERT_NO_THROW( loadedMatrix.load( filename ) );
 
@@ -1090,42 +1048,43 @@ void test_SaveAndLoad( const char* filename )
    EXPECT_EQ( savedMatrix.getElement( 0, 1 ), loadedMatrix.getElement( 0, 1 ) );
    EXPECT_EQ( savedMatrix.getElement( 0, 2 ), loadedMatrix.getElement( 0, 2 ) );
    EXPECT_EQ( savedMatrix.getElement( 0, 3 ), loadedMatrix.getElement( 0, 3 ) );
+   EXPECT_EQ( savedMatrix.getElement( 0, 4 ), loadedMatrix.getElement( 0, 4 ) );
+   EXPECT_EQ( savedMatrix.getElement( 0, 5 ), loadedMatrix.getElement( 0, 5 ) );
 
    EXPECT_EQ( savedMatrix.getElement( 1, 0 ), loadedMatrix.getElement( 1, 0 ) );
    EXPECT_EQ( savedMatrix.getElement( 1, 1 ), loadedMatrix.getElement( 1, 1 ) );
    EXPECT_EQ( savedMatrix.getElement( 1, 2 ), loadedMatrix.getElement( 1, 2 ) );
    EXPECT_EQ( savedMatrix.getElement( 1, 3 ), loadedMatrix.getElement( 1, 3 ) );
+   EXPECT_EQ( savedMatrix.getElement( 1, 4 ), loadedMatrix.getElement( 1, 4 ) );
+   EXPECT_EQ( savedMatrix.getElement( 1, 5 ), loadedMatrix.getElement( 1, 5 ) );
 
    EXPECT_EQ( savedMatrix.getElement( 2, 0 ), loadedMatrix.getElement( 2, 0 ) );
    EXPECT_EQ( savedMatrix.getElement( 2, 1 ), loadedMatrix.getElement( 2, 1 ) );
    EXPECT_EQ( savedMatrix.getElement( 2, 2 ), loadedMatrix.getElement( 2, 2 ) );
    EXPECT_EQ( savedMatrix.getElement( 2, 3 ), loadedMatrix.getElement( 2, 3 ) );
+   EXPECT_EQ( savedMatrix.getElement( 2, 4 ), loadedMatrix.getElement( 2, 4 ) );
+   EXPECT_EQ( savedMatrix.getElement( 2, 5 ), loadedMatrix.getElement( 2, 5 ) );
 
    EXPECT_EQ( savedMatrix.getElement( 3, 0 ), loadedMatrix.getElement( 3, 0 ) );
    EXPECT_EQ( savedMatrix.getElement( 3, 1 ), loadedMatrix.getElement( 3, 1 ) );
    EXPECT_EQ( savedMatrix.getElement( 3, 2 ), loadedMatrix.getElement( 3, 2 ) );
    EXPECT_EQ( savedMatrix.getElement( 3, 3 ), loadedMatrix.getElement( 3, 3 ) );
-
-   EXPECT_EQ( savedMatrix.getElement( 0, 0 ),  1 );
-   EXPECT_EQ( savedMatrix.getElement( 0, 1 ),  2 );
-   EXPECT_EQ( savedMatrix.getElement( 0, 2 ),  3 );
-   EXPECT_EQ( savedMatrix.getElement( 0, 3 ),  0 );
-
-   EXPECT_EQ( savedMatrix.getElement( 1, 0 ),  0 );
-   EXPECT_EQ( savedMatrix.getElement( 1, 1 ),  4 );
-   EXPECT_EQ( savedMatrix.getElement( 1, 2 ),  0 );
-   EXPECT_EQ( savedMatrix.getElement( 1, 3 ),  5 );
-
-   EXPECT_EQ( savedMatrix.getElement( 2, 0 ),  6 );
-   EXPECT_EQ( savedMatrix.getElement( 2, 1 ),  7 );
-   EXPECT_EQ( savedMatrix.getElement( 2, 2 ),  8 );
-   EXPECT_EQ( savedMatrix.getElement( 2, 3 ),  0 );
-
-   EXPECT_EQ( savedMatrix.getElement( 3, 0 ),  0 );
-   EXPECT_EQ( savedMatrix.getElement( 3, 1 ),  9 );
-   EXPECT_EQ( savedMatrix.getElement( 3, 2 ), 10 );
-   EXPECT_EQ( savedMatrix.getElement( 3, 3 ), 11 );
-
+   EXPECT_EQ( savedMatrix.getElement( 3, 4 ), loadedMatrix.getElement( 3, 4 ) );
+   EXPECT_EQ( savedMatrix.getElement( 3, 5 ), loadedMatrix.getElement( 3, 5 ) );
+
+   EXPECT_EQ( savedMatrix.getElement( 4, 0 ), loadedMatrix.getElement( 4, 0 ) );
+   EXPECT_EQ( savedMatrix.getElement( 4, 1 ), loadedMatrix.getElement( 4, 1 ) );
+   EXPECT_EQ( savedMatrix.getElement( 4, 2 ), loadedMatrix.getElement( 4, 2 ) );
+   EXPECT_EQ( savedMatrix.getElement( 4, 3 ), loadedMatrix.getElement( 4, 3 ) );
+   EXPECT_EQ( savedMatrix.getElement( 4, 4 ), loadedMatrix.getElement( 4, 4 ) );
+   EXPECT_EQ( savedMatrix.getElement( 4, 5 ), loadedMatrix.getElement( 4, 5 ) );
+
+   EXPECT_EQ( savedMatrix.getElement( 5, 0 ), loadedMatrix.getElement( 5, 0 ) );
+   EXPECT_EQ( savedMatrix.getElement( 5, 1 ), loadedMatrix.getElement( 5, 1 ) );
+   EXPECT_EQ( savedMatrix.getElement( 5, 2 ), loadedMatrix.getElement( 5, 2 ) );
+   EXPECT_EQ( savedMatrix.getElement( 5, 3 ), loadedMatrix.getElement( 5, 3 ) );
+   EXPECT_EQ( savedMatrix.getElement( 5, 4 ), loadedMatrix.getElement( 5, 4 ) );
+   EXPECT_EQ( savedMatrix.getElement( 5, 5 ), loadedMatrix.getElement( 5, 5 ) );
    EXPECT_EQ( std::remove( filename ), 0 );
 }
 
@@ -1137,37 +1096,24 @@ void test_Print()
    using IndexType = typename Matrix::IndexType;
 
    /*
-    * Sets up the following 5x4 sparse matrix:
+    * Sets up the following 4x4 sparse matrix:
     *
-    *    /  1  2  3  0 \
-    *    |  0  0  0  4 |
-    *    |  5  6  7  0 |
-    *    |  0  8  9 10 |
-    *    \  0  0 11 12 /
+    *    /  4  1  0  0 \
+    *    |  1  4  1  0 |
+    *    |  0  1  4  1 |
+    *    \  0  0  1  4 /
     */
 
-   const IndexType m_rows = 5;
+   const IndexType m_rows = 4;
    const IndexType m_cols = 4;
 
-   Matrix m( m_rows, m_cols );
-   typename Matrix::CompressedRowLengthsVector rowLengths( m_rows );
-   rowLengths = 3;
-   m.setCompressedRowLengths( rowLengths );
-
-   RealType value = 1;
-   for( IndexType i = 0; i < m_cols - 1; i++ )   // 0th row
-      m.setElement( 0, i, value++ );
-
-   m.setElement( 1, 3, value++ );                // 1st row
-
-   for( IndexType i = 0; i < m_cols - 1; i++ )   // 2nd row
-      m.setElement( 2, i, value++ );
-
-   for( IndexType i = 1; i < m_cols; i++ )       // 3rd row
-      m.setElement( 3, i, value++ );
-
-   for( IndexType i = 2; i < m_cols; i++ )       // 4th row
-      m.setElement( 4, i, value++ );
+   Matrix m( m_rows, m_cols, {
+      { 0, 0, 4 }, { 0, 1, 1 },
+      { 1, 0, 1 }, { 1, 1, 4 }, { 1, 2, 1 },
+                   { 2, 1, 1 }, { 2, 2, 4 }, { 2, 3, 1 },
+                                { 3, 2, 1 }, { 3, 3, 4 }, { 3, 4, 1 },
+                                             { 4, 3, 1 }, { 4, 4, 4 }
+   } );
 
    std::stringstream printed;
    std::stringstream couted;
@@ -1179,11 +1125,10 @@ void test_Print()
 
    std::cout.rdbuf(old_buf); //reset
 
-   couted << "Row: 0 ->  Col:0->1	 Col:1->2	 Col:2->3\t\n"
-             "Row: 1 ->  Col:3->4\t\n"
-             "Row: 2 ->  Col:0->5	 Col:1->6	 Col:2->7\t\n"
-             "Row: 3 ->  Col:1->8	 Col:2->9	 Col:3->10\t\n"
-             "Row: 4 ->  Col:2->11	 Col:3->12\t\n";
+   couted << "Row: 0 ->  Col:0->4	 Col:1->1\t\n"
+             "Row: 1 ->  Col:0->1	 Col:1->4	 Col:2->1\t\n"
+             "Row: 2 ->  Col:1->1	 Col:2->4	 Col:3->1\t\n"
+             "Row: 3 ->  Col:2->1	 Col:3->4\t\n";
 
    EXPECT_EQ( printed.str(), couted.str() );
 }
-- 
GitLab


From c5c27c5d7f4207afbea7726662f61d6ad7af97fa Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Tue, 11 Feb 2020 11:11:57 +0100
Subject: [PATCH 141/179] Fixing binary sparse matrix unit tests.

---
 .../Matrices/BinarySparseMatrixTest.hpp       | 20 +++----------------
 1 file changed, 3 insertions(+), 17 deletions(-)

diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest.hpp b/src/UnitTests/Matrices/BinarySparseMatrixTest.hpp
index 8b3d8f833..276c432ff 100644
--- a/src/UnitTests/Matrices/BinarySparseMatrixTest.hpp
+++ b/src/UnitTests/Matrices/BinarySparseMatrixTest.hpp
@@ -1078,9 +1078,7 @@ void test_SaveAndLoad( const char* filename )
    const IndexType m_cols = 4;
 
    Matrix savedMatrix( m_rows, m_cols );
-   typename Matrix::CompressedRowLengthsVector rowLengths;
-   rowLengths.setSize( m_rows );
-   rowLengths.setValue( 3 );
+   typename Matrix::CompressedRowLengthsVector rowLengths( m_rows, 3 );
    savedMatrix.setCompressedRowLengths( rowLengths );
 
    for( IndexType i = 0; i < m_cols - 1; i++ )   // 0th row
@@ -1098,14 +1096,6 @@ void test_SaveAndLoad( const char* filename )
    ASSERT_NO_THROW( savedMatrix.save( filename ) );
 
    Matrix loadedMatrix;
-   loadedMatrix.reset();
-   loadedMatrix.setDimensions( m_rows, m_cols );
-   typename Matrix::CompressedRowLengthsVector rowLengths2;
-   rowLengths2.setSize( m_rows );
-   rowLengths2.setValue( 3 );
-   loadedMatrix.setCompressedRowLengths( rowLengths2 );
-
-
    ASSERT_NO_THROW( loadedMatrix.load( filename ) );
 
 
@@ -1172,12 +1162,8 @@ void test_Print()
    const IndexType m_rows = 5;
    const IndexType m_cols = 4;
 
-   Matrix m;
-   m.reset();
-   m.setDimensions( m_rows, m_cols );
-   typename Matrix::CompressedRowLengthsVector rowLengths;
-   rowLengths.setSize( m_rows );
-   rowLengths.setValue( 3 );
+   Matrix m( m_rows, m_cols );
+   typename Matrix::CompressedRowLengthsVector rowLengths( m_rows, 3 );
    m.setCompressedRowLengths( rowLengths );
 
    RealType value = 1;
-- 
GitLab


From 95966e9cb67f9558409db4ff145c0b5d4d90c0d4 Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Tue, 11 Feb 2020 11:25:33 +0100
Subject: [PATCH 142/179] Adding copyright header to sparse matric unit tests
 source files.

---
 src/UnitTests/Matrices/SparseMatrixTest_CSR.cpp        | 10 ++++++++++
 src/UnitTests/Matrices/SparseMatrixTest_CSR.cu         | 10 ++++++++++
 src/UnitTests/Matrices/SparseMatrixTest_Ellpack.cpp    | 10 ++++++++++
 src/UnitTests/Matrices/SparseMatrixTest_Ellpack.cu     | 10 ++++++++++
 .../Matrices/SparseMatrixTest_SlicedEllpack.cpp        | 10 ++++++++++
 .../Matrices/SparseMatrixTest_SlicedEllpack.cu         | 10 ++++++++++
 ...parseMatrixTest.h => SymmetricSparseMatrixTest.hpp} |  0
 7 files changed, 60 insertions(+)
 rename src/UnitTests/Matrices/{SymmetricSparseMatrixTest.h => SymmetricSparseMatrixTest.hpp} (100%)

diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR.cpp b/src/UnitTests/Matrices/SparseMatrixTest_CSR.cpp
index 258ad2c53..5830658ab 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_CSR.cpp
+++ b/src/UnitTests/Matrices/SparseMatrixTest_CSR.cpp
@@ -1 +1,11 @@
+/***************************************************************************
+                          SparseMatrixTest_CSR.cpp -  description
+                             -------------------
+    begin                : Dec 3, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
 #include "SparseMatrixTest_CSR.h"
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR.cu b/src/UnitTests/Matrices/SparseMatrixTest_CSR.cu
index 258ad2c53..91f0de81a 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_CSR.cu
+++ b/src/UnitTests/Matrices/SparseMatrixTest_CSR.cu
@@ -1 +1,11 @@
+/***************************************************************************
+                          SparseMatrixTest_CSR.cu -  description
+                             -------------------
+    begin                : Dec 3, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
 #include "SparseMatrixTest_CSR.h"
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.cpp b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.cpp
index c454706f0..3c30c54c5 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.cpp
+++ b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.cpp
@@ -1 +1,11 @@
+/***************************************************************************
+                          SparseMatrixTest_Ellpack.cpp -  description
+                             -------------------
+    begin                : Dec 3, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
 #include "SparseMatrixTest_Ellpack.h"
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.cu b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.cu
index c454706f0..9a27cece6 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.cu
+++ b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.cu
@@ -1 +1,11 @@
+/***************************************************************************
+                          SparseMatrixTest_Ellpack.cu -  description
+                             -------------------
+    begin                : Dec 3, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
 #include "SparseMatrixTest_Ellpack.h"
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cpp b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cpp
index 40e2e94b8..2c79ee502 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cpp
+++ b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cpp
@@ -1 +1,11 @@
+/***************************************************************************
+                          SparseMatrixTest_SlicedEllpack.cpp -  description
+                             -------------------
+    begin                : Dec 3, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
 #include "SparseMatrixTest_SlicedEllpack.h"
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cu b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cu
index 40e2e94b8..bff81d9a3 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cu
+++ b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cu
@@ -1 +1,11 @@
+/***************************************************************************
+                          SparseMatrixTest_SlicedEllpack.cu -  description
+                             -------------------
+    begin                : Dec 3, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
 #include "SparseMatrixTest_SlicedEllpack.h"
diff --git a/src/UnitTests/Matrices/SymmetricSparseMatrixTest.h b/src/UnitTests/Matrices/SymmetricSparseMatrixTest.hpp
similarity index 100%
rename from src/UnitTests/Matrices/SymmetricSparseMatrixTest.h
rename to src/UnitTests/Matrices/SymmetricSparseMatrixTest.hpp
-- 
GitLab


From db8ab1ee0491535ac3e13322d33b4d2e84e7c4f3 Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Tue, 11 Feb 2020 11:28:37 +0100
Subject: [PATCH 143/179] Added symmetric sparse matrix unit tests for CSR
 format.

---
 src/UnitTests/Matrices/CMakeLists.txt         |   8 ++
 .../Matrices/SymmetricSparseMatrixTest.h      | 114 ++++++++++++++++++
 .../SymmetricSparseMatrixTest_CSR.cpp         |  11 ++
 .../Matrices/SymmetricSparseMatrixTest_CSR.cu |  11 ++
 .../Matrices/SymmetricSparseMatrixTest_CSR.h  |  61 ++++++++++
 5 files changed, 205 insertions(+)
 create mode 100644 src/UnitTests/Matrices/SymmetricSparseMatrixTest.h
 create mode 100644 src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.cpp
 create mode 100644 src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.cu
 create mode 100644 src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.h

diff --git a/src/UnitTests/Matrices/CMakeLists.txt b/src/UnitTests/Matrices/CMakeLists.txt
index 8da67ef6b..1c536a982 100644
--- a/src/UnitTests/Matrices/CMakeLists.txt
+++ b/src/UnitTests/Matrices/CMakeLists.txt
@@ -37,6 +37,9 @@ IF( BUILD_CUDA )
    CUDA_ADD_EXECUTABLE( BinarySparseMatrixCopyTest BinarySparseMatrixCopyTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
    TARGET_LINK_LIBRARIES( BinarySparseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} )
 
+   CUDA_ADD_EXECUTABLE( SymmetricSparseMatrixTest_CSR SymmetricSparseMatrixTest_CSR.cu OPTIONS ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( SymmetricSparseMatrixTest_CSR ${GTEST_BOTH_LIBRARIES} )
+
 ELSE(  BUILD_CUDA )
    ADD_EXECUTABLE( DenseMatrixTest DenseMatrixTest.cpp )
    TARGET_COMPILE_OPTIONS( DenseMatrixTest PRIVATE ${CXX_TESTS_FLAGS} )
@@ -86,6 +89,10 @@ ELSE(  BUILD_CUDA )
    TARGET_COMPILE_OPTIONS( BinarySparseMatrixCopyTest PRIVATE ${CXX_TESTS_FLAGS} )
    TARGET_LINK_LIBRARIES( BinarySparseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} )
 
+   ADD_EXECUTABLE( SymmetricSparseMatrixTest_CSR SymmetricSparseMatrixTest_CSR.cpp )
+   TARGET_COMPILE_OPTIONS( SymmetricSparseMatrixTest_CSR PRIVATE ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( SymmetricSparseMatrixTest_CSR ${GTEST_BOTH_LIBRARIES} )
+
 ENDIF( BUILD_CUDA )
 
 ADD_TEST( DenseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/DenseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} )
@@ -100,6 +107,7 @@ ADD_TEST( BinarySparseMatrixTest_CSR ${EXECUTABLE_OUTPUT_PATH}/BinarySparseMatri
 ADD_TEST( BinarySparseMatrixTest_Ellpack ${EXECUTABLE_OUTPUT_PATH}/BinarySparseMatrixTest_Ellpack${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( BinarySparseMatrixTest_SlicedEllpack ${EXECUTABLE_OUTPUT_PATH}/BinarySparseMatrixTest_SlicedEllpack${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( BinarySparseMatrixCopyTest ${EXECUTABLE_OUTPUT_PATH}/BinarySparseMatrixCopyTest${CMAKE_EXECUTABLE_SUFFIX} )
+ADD_TEST( SymmetricSparseMatrixTest_CSR ${EXECUTABLE_OUTPUT_PATH}/SymmetricSparseMatrixTest_CSR${CMAKE_EXECUTABLE_SUFFIX} )
 
 if( ${BUILD_MPI} )
    if( BUILD_CUDA )
diff --git a/src/UnitTests/Matrices/SymmetricSparseMatrixTest.h b/src/UnitTests/Matrices/SymmetricSparseMatrixTest.h
new file mode 100644
index 000000000..659f555c3
--- /dev/null
+++ b/src/UnitTests/Matrices/SymmetricSparseMatrixTest.h
@@ -0,0 +1,114 @@
+/***************************************************************************
+                          SymmetricSparseMatrixTest.h -  description
+                             -------------------
+    begin                : Feb 11, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#ifdef HAVE_GTEST
+#include <gtest/gtest.h>
+#include <iostream>
+#include <TNL/Matrices/SparseMatrix.h>
+#include "SparseMatrixTest.hpp"
+
+// test fixture for typed tests
+template< typename Matrix >
+class MatrixTest : public ::testing::Test
+{
+protected:
+   using MatrixType = Matrix;
+};
+
+TYPED_TEST_SUITE( MatrixTest, MatrixTypes);
+
+TYPED_TEST( MatrixTest, Constructors )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_Constructors< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, setDimensionsTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_SetDimensions< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, setCompressedRowLengthsTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_SetCompressedRowLengths< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, setLikeTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_SetLike< MatrixType, MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, resetTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_Reset< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, getRowTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_GetRow< MatrixType >();
+}
+
+
+TYPED_TEST( MatrixTest, setElementTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_SetElement< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, addElementTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_AddElement< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, vectorProductTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_VectorProduct< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, rowsReduction )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_RowsReduction< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, saveAndLoadTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_SaveAndLoad< MatrixType >( saveAndLoadTestFileName );
+}
+
+TYPED_TEST( MatrixTest, printTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_Print< MatrixType >();
+}
+
+#endif
+
+#include "../main.h"
diff --git a/src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.cpp b/src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.cpp
new file mode 100644
index 000000000..c23fa4242
--- /dev/null
+++ b/src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.cpp
@@ -0,0 +1,11 @@
+/***************************************************************************
+                          SymmetricSparseMatrixTest_CSR.cpp -  description
+                             -------------------
+    begin                : Feb 11, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include "SymmetricSparseMatrixTest_CSR.h"
diff --git a/src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.cu b/src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.cu
new file mode 100644
index 000000000..df1d83da0
--- /dev/null
+++ b/src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.cu
@@ -0,0 +1,11 @@
+/***************************************************************************
+                          SymmetricSparseMatrixTest_CSR.cu -  description
+                             -------------------
+    begin                : Feb 11, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include "SymmetricSparseMatrixTest_CSR.h"
diff --git a/src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.h b/src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.h
new file mode 100644
index 000000000..bbb6c66cb
--- /dev/null
+++ b/src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.h
@@ -0,0 +1,61 @@
+/***************************************************************************
+                          SymmetricSparseMatrixTest_CSR.h -  description
+                             -------------------
+    begin                : Feb 11, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#ifdef HAVE_GTEST
+#include <gtest/gtest.h>
+#include <TNL/Containers/Segments/CSR.h>
+#include <TNL/Matrices/SparseMatrix.h>
+
+// test fixture for typed tests
+template< typename Matrix >
+class MatrixTest : public ::testing::Test
+{
+protected:
+   using MatrixType = Matrix;
+};
+
+// types for which MatrixTest is instantiated
+using MatrixTypes = ::testing::Types
+<
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, int,   TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, int,   TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, int,   TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, int,   TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, long,  TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, long,  TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, long,  TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, long,  TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >
+#ifdef HAVE_CUDA
+   ,TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, int,   TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, int,   TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, int,   TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, int,   TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, long,  TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, long,  TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, long,  TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, long,  TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >
+#endif // HAVE_CUDA
+>;
+
+const char* saveAndLoadTestFileName "test_SymmetricSparseMatrixTest_CSR_segments";
+
+#include "SparseMatrixTest.h"
+
+#endif // HAVE_GTEST
+
+#include "../main.h"
-- 
GitLab


From 6f43c59aac4b6dcdb4cdb6894b775c2ed24a9f40 Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Wed, 12 Feb 2020 13:19:34 +0100
Subject: [PATCH 144/179] Fixing symmetric sparse matrix unit tests.

---
 .../Matrices/SymmetricSparseMatrixTest.h      | 11 +--
 .../Matrices/SymmetricSparseMatrixTest.hpp    | 98 ++++++++++---------
 .../Matrices/SymmetricSparseMatrixTest_CSR.h  | 16 +--
 3 files changed, 59 insertions(+), 66 deletions(-)

diff --git a/src/UnitTests/Matrices/SymmetricSparseMatrixTest.h b/src/UnitTests/Matrices/SymmetricSparseMatrixTest.h
index 659f555c3..5582b138d 100644
--- a/src/UnitTests/Matrices/SymmetricSparseMatrixTest.h
+++ b/src/UnitTests/Matrices/SymmetricSparseMatrixTest.h
@@ -12,7 +12,7 @@
 #include <gtest/gtest.h>
 #include <iostream>
 #include <TNL/Matrices/SparseMatrix.h>
-#include "SparseMatrixTest.hpp"
+#include "SymmetricSparseMatrixTest.hpp"
 
 // test fixture for typed tests
 template< typename Matrix >
@@ -24,13 +24,6 @@ protected:
 
 TYPED_TEST_SUITE( MatrixTest, MatrixTypes);
 
-TYPED_TEST( MatrixTest, Constructors )
-{
-    using MatrixType = typename TestFixture::MatrixType;
-
-    test_Constructors< MatrixType >();
-}
-
 TYPED_TEST( MatrixTest, setDimensionsTest )
 {
     using MatrixType = typename TestFixture::MatrixType;
@@ -110,5 +103,3 @@ TYPED_TEST( MatrixTest, printTest )
 }
 
 #endif
-
-#include "../main.h"
diff --git a/src/UnitTests/Matrices/SymmetricSparseMatrixTest.hpp b/src/UnitTests/Matrices/SymmetricSparseMatrixTest.hpp
index 8593ff3f1..75b121060 100644
--- a/src/UnitTests/Matrices/SymmetricSparseMatrixTest.hpp
+++ b/src/UnitTests/Matrices/SymmetricSparseMatrixTest.hpp
@@ -177,13 +177,13 @@ void test_GetNumberOfNonzeroMatrixElements()
       { 0, 0,  1 },
       { 1, 0,  2 }, { 1, 1,  3 },
       { 2, 0,  4 }, { 2, 1,  5 }, {  2, 2,  6 },
-      { 3, 0,  7 }, { 3, 1,  8 },              , { 3, 3,  9 },
+      { 3, 0,  7 }, { 3, 1,  8 },                { 3, 3,  9 },
                     { 4, 1, 10 }, {  4, 2, 11 },               { 4, 4, 12 },
                     { 5, 1, 13 }, {  5, 2, 14 },                              {  5, 5, 15 },
                     { 6, 1, 16 },                { 6, 3, 17 },                              { 6, 6, 18 },
                     { 7, 1, 19 },                { 7, 3, 20 },                                            { 7, 7, 21 },
                                   {  8, 2, 22 },               { 8, 4, 23 },                                           { 8, 8, 24 },
-                                  {  9, 2, 25 },               { 9, 4, 26 },                                                         { 9, 9, 27 }
+                                  {  9, 2, 25 },               { 9, 4, 26 },                                                         { 9, 9, 27 },
                                   { 10, 2, 28 },                              { 10, 4, 29 },                                                      { 10, 10, 30 }
    } );
 
@@ -260,7 +260,7 @@ void test_GetRow()
          case 10: row.setElement( 0, 2, 28 ); row.setElement( 1, 5, 29 ); row.setElement( 2, 10, 30 ); break;
       }
    };
-   TNL::Algorithms::ParallelFor< DeviceType >::exec( ( IndexType ) 0, rows, f );
+   TNL::Algorithms::ParallelFor< DeviceType >::exec( ( IndexType ) 0, m.getRows(), f );
 
    EXPECT_EQ( m.getElement( 0,  0 ),  1 );
    EXPECT_EQ( m.getElement( 0,  1 ),  2 );
@@ -692,8 +692,8 @@ void test_VectorProduct()
                    { 2, 1, 3 },
                    { 3, 1, 4 }, { 3, 2, 5 } } );
 
-   VectorType inVector_1( m_cols, 2 );
-   VectorType outVector_1( m_rows, 1 );
+   VectorType inVector_1( m_cols_1, 2.0 );
+   VectorType outVector_1( m_rows_1, 0.0 );
    m_1.vectorProduct( inVector_1, outVector_1 );
 
    EXPECT_EQ( outVector_1.getElement( 0 ),  2 );
@@ -823,7 +823,7 @@ void test_VectorProduct()
                                                                                                       { 7, 7, 14 }
    } );
 
-   VectorType inVector_5( m_cols_5, { 1, 2, 3, 4, 5, 6, 7, 8 } );
+   VectorType inVector_5( { 1, 2, 3, 4, 5, 6, 7, 8 } );
    VectorType outVector_5( m_rows_5, 0.0 );
    m_5.vectorProduct( inVector_5, outVector_5 );
 
@@ -873,7 +873,8 @@ void test_RowsReduction()
 
    ////
    // Compute number of non-zero elements in rows.
-   typename Matrix::RowsCapacitiesType rowLengths( rows );
+   typename Matrix::RowsCapacitiesType rowLengths( m_rows_5 );
+   typename Matrix::RowsCapacitiesType rowLengths_true( { 1, 1, 4, 4, 4, 4, 1, 1 } );
    auto rowLengths_view = rowLengths.getView();
    auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, IndexType globalIdx, const RealType& value ) -> IndexType {
       return ( value != 0.0 );
@@ -884,14 +885,15 @@ void test_RowsReduction()
    auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable {
       rowLengths_view[ rowIdx ] = value;
    };
-   m.allRowsReduction( fetch, reduce, keep, 0 );
-   EXPECT_EQ( rowsCapacities, rowLengths );
-   m.getCompressedRowLengths( rowLengths );
-   EXPECT_EQ( rowsCapacities, rowLengths );
+   m_5.allRowsReduction( fetch, reduce, keep, 0 );
+
+   EXPECT_EQ( rowLengths_true, rowLengths );
+   m_5.getCompressedRowLengths( rowLengths );
+   EXPECT_EQ( rowLengths_true, rowLengths );
 
    ////
    // Compute max norm
-   TNL::Containers::Vector< RealType, DeviceType, IndexType > rowSums( rows );
+   TNL::Containers::Vector< RealType, DeviceType, IndexType > rowSums( m_5.getRows() );
    auto rowSums_view = rowSums.getView();
    auto max_fetch = [] __cuda_callable__ ( IndexType row, IndexType column, IndexType globalIdx, const RealType& value ) -> IndexType {
       return abs( value );
@@ -902,7 +904,7 @@ void test_RowsReduction()
    auto max_keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable {
       rowSums_view[ rowIdx ] = value;
    };
-   m.allRowsReduction( max_fetch, max_reduce, max_keep, 0 );
+   m_5.allRowsReduction( max_fetch, max_reduce, max_keep, 0 );
    const RealType maxNorm = TNL::max( rowSums );
    EXPECT_EQ( maxNorm, 260 ) ; // 29+30+31+32+33+34+35+36
 }
@@ -999,41 +1001,41 @@ void test_SaveAndLoad( const char* filename )
                                                           { 5, 5, 10 } } );
 
    // Check the set elements
-   EXPECT_EQ( m.getElement( 0, 0 ),  1 );
-   EXPECT_EQ( m.getElement( 0, 1 ),  2 );
-   EXPECT_EQ( m.getElement( 0, 2 ),  0 );
-   EXPECT_EQ( m.getElement( 0, 3 ),  0 );
-   EXPECT_EQ( m.getElement( 0, 4 ),  0 );
-
-   EXPECT_EQ( m.getElement( 1, 0 ),  2 );
-   EXPECT_EQ( m.getElement( 1, 1 ),  3 );
-   EXPECT_EQ( m.getElement( 1, 2 ),  4 );
-   EXPECT_EQ( m.getElement( 1, 3 ),  0 );
-   EXPECT_EQ( m.getElement( 1, 4 ),  0 );
-
-   EXPECT_EQ( m.getElement( 2, 0 ),  0 );
-   EXPECT_EQ( m.getElement( 2, 1 ),  4 );
-   EXPECT_EQ( m.getElement( 2, 2 ),  5 );
-   EXPECT_EQ( m.getElement( 2, 3 ),  6 );
-   EXPECT_EQ( m.getElement( 2, 4 ),  0 );
-
-   EXPECT_EQ( m.getElement( 3, 0 ),  0 );
-   EXPECT_EQ( m.getElement( 3, 1 ),  0 );
-   EXPECT_EQ( m.getElement( 3, 2 ),  6 );
-   EXPECT_EQ( m.getElement( 3, 3 ),  7 );
-   EXPECT_EQ( m.getElement( 3, 4 ),  8 );
-
-   EXPECT_EQ( m.getElement( 4, 0 ),  0 );
-   EXPECT_EQ( m.getElement( 4, 1 ),  0 );
-   EXPECT_EQ( m.getElement( 4, 2 ),  0 );
-   EXPECT_EQ( m.getElement( 4, 3 ),  8 );
-   EXPECT_EQ( m.getElement( 4, 4 ),  9 );
-
-   EXPECT_EQ( m.getElement( 5, 0 ),  0 );
-   EXPECT_EQ( m.getElement( 5, 1 ),  0 );
-   EXPECT_EQ( m.getElement( 5, 2 ),  0 );
-   EXPECT_EQ( m.getElement( 5, 3 ),  0 );
-   EXPECT_EQ( m.getElement( 5, 4 ), 10 );
+   EXPECT_EQ( savedMatrix.getElement( 0, 0 ),  1 );
+   EXPECT_EQ( savedMatrix.getElement( 0, 1 ),  2 );
+   EXPECT_EQ( savedMatrix.getElement( 0, 2 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 0, 3 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 0, 4 ),  0 );
+
+   EXPECT_EQ( savedMatrix.getElement( 1, 0 ),  2 );
+   EXPECT_EQ( savedMatrix.getElement( 1, 1 ),  3 );
+   EXPECT_EQ( savedMatrix.getElement( 1, 2 ),  4 );
+   EXPECT_EQ( savedMatrix.getElement( 1, 3 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 1, 4 ),  0 );
+
+   EXPECT_EQ( savedMatrix.getElement( 2, 0 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 2, 1 ),  4 );
+   EXPECT_EQ( savedMatrix.getElement( 2, 2 ),  5 );
+   EXPECT_EQ( savedMatrix.getElement( 2, 3 ),  6 );
+   EXPECT_EQ( savedMatrix.getElement( 2, 4 ),  0 );
+
+   EXPECT_EQ( savedMatrix.getElement( 3, 0 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 3, 1 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 3, 2 ),  6 );
+   EXPECT_EQ( savedMatrix.getElement( 3, 3 ),  7 );
+   EXPECT_EQ( savedMatrix.getElement( 3, 4 ),  8 );
+
+   EXPECT_EQ( savedMatrix.getElement( 4, 0 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 4, 1 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 4, 2 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 4, 3 ),  8 );
+   EXPECT_EQ( savedMatrix.getElement( 4, 4 ),  9 );
+
+   EXPECT_EQ( savedMatrix.getElement( 5, 0 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 5, 1 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 5, 2 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 5, 3 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 5, 4 ), 10 );
 
    ASSERT_NO_THROW( savedMatrix.save( filename ) );
 
diff --git a/src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.h b/src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.h
index bbb6c66cb..45dd5e5b9 100644
--- a/src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.h
+++ b/src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.h
@@ -14,12 +14,12 @@
 #include <TNL/Matrices/SparseMatrix.h>
 
 // test fixture for typed tests
-template< typename Matrix >
-class MatrixTest : public ::testing::Test
-{
-protected:
-   using MatrixType = Matrix;
-};
+//template< typename Matrix >
+//class MatrixTest : public ::testing::Test
+//{
+//protected:
+//   using MatrixType = Matrix;
+//};
 
 // types for which MatrixTest is instantiated
 using MatrixTypes = ::testing::Types
@@ -52,9 +52,9 @@ using MatrixTypes = ::testing::Types
 #endif // HAVE_CUDA
 >;
 
-const char* saveAndLoadTestFileName "test_SymmetricSparseMatrixTest_CSR_segments";
+const char* saveAndLoadTestFileName = "test_SymmetricSparseMatrixTest_CSR_segments";
 
-#include "SparseMatrixTest.h"
+#include "SymmetricSparseMatrixTest.h"
 
 #endif // HAVE_GTEST
 
-- 
GitLab


From 537a8805d134831e757cb919a37e21d9883cf057 Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Thu, 13 Feb 2020 18:35:22 +0100
Subject: [PATCH 145/179] Implementintg symmetric sparse matrix.

---
 src/TNL/Matrices/SparseMatrix.hpp             |  17 +++
 src/TNL/Matrices/SparseMatrixView.h           |   4 +-
 src/TNL/Matrices/SparseMatrixView.hpp         | 101 ++++++++++++++----
 .../Matrices/SymmetricSparseMatrixTest.h      |   7 ++
 .../Matrices/SymmetricSparseMatrixTest.hpp    |  37 +++++--
 5 files changed, 132 insertions(+), 34 deletions(-)

diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp
index 03273c98b..3eccc7211 100644
--- a/src/TNL/Matrices/SparseMatrix.hpp
+++ b/src/TNL/Matrices/SparseMatrix.hpp
@@ -11,6 +11,7 @@
 #pragma once
 
 #include <functional>
+#include <sstream>
 #include <TNL/Algorithms/Reduction.h>
 #include <TNL/Matrices/SparseMatrix.h>
 
@@ -107,11 +108,27 @@ SparseMatrix( const IndexType rows,
 {
    Containers::Vector< IndexType, Devices::Host, IndexType > rowCapacities( rows, 0 );
    for( const auto& i : data )
+   {
+      if( std::get< 0 >( i ) >= rows )
+      {
+         std::stringstream s;
+         s << "Wrong row index " << std::get< 0 >( i ) << " in an initializer list";
+         throw std::logic_error( s.str() );
+      }
       rowCapacities[ std::get< 0 >( i ) ]++;
+   }
    SparseMatrix< Real, Devices::Host, Index, MatrixType, Segments > hostMatrix( rows, columns );
    hostMatrix.setCompressedRowLengths( rowCapacities );
    for( const auto& i : data )
+   {
+      if( std::get< 1 >( i ) >= columns )
+      {
+         std::stringstream s;
+         s << "Wrong column index " << std::get< 1 >( i ) << " in an initializer list";
+         throw std::logic_error( s.str() );
+      }
       hostMatrix.setElement( std::get< 0 >( i ), std::get< 1 >( i ), std::get< 2 >( i ) );
+   }
    ( *this ) = hostMatrix;
 }
 
diff --git a/src/TNL/Matrices/SparseMatrixView.h b/src/TNL/Matrices/SparseMatrixView.h
index 2756c80d7..54d4f1766 100644
--- a/src/TNL/Matrices/SparseMatrixView.h
+++ b/src/TNL/Matrices/SparseMatrixView.h
@@ -98,8 +98,8 @@ class SparseMatrixView : public MatrixView< Real, Device, Index >
                        const IndexType column,
                        const RealType& value );
 
-      void addElement( const IndexType row,
-                       const IndexType column,
+      void addElement( IndexType row,
+                       IndexType column,
                        const RealType& value,
                        const RealType& thisElementMultiplicator = 1.0 );
 
diff --git a/src/TNL/Matrices/SparseMatrixView.hpp b/src/TNL/Matrices/SparseMatrixView.hpp
index 16a8bc62f..62300217f 100644
--- a/src/TNL/Matrices/SparseMatrixView.hpp
+++ b/src/TNL/Matrices/SparseMatrixView.hpp
@@ -153,10 +153,36 @@ getNumberOfNonzeroMatrixElements() const
 {
    const auto columns_view = this->columnIndexes.getConstView();
    const IndexType paddingIndex = this->getPaddingIndex();
-   auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType {
-      return ( columns_view[ i ] != paddingIndex );
-   };
-   return Algorithms::Reduction< DeviceType >::reduce( this->columnIndexes.getSize(), std::plus<>{}, fetch, 0 );
+   if( ! isSymmetric() )
+   {
+      auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType {
+         return ( columns_view[ i ] != paddingIndex );
+      };
+      return Algorithms::Reduction< DeviceType >::reduce( this->columnIndexes.getSize(), std::plus<>{}, fetch, 0 );
+   }
+   else
+   {
+      const auto rows = this->getRows();
+      const auto columns = this->getColumns();
+      Containers::Vector< IndexType, DeviceType, IndexType > row_sums( this->getRows(), 0 );
+      auto row_sums_view = row_sums.getView();
+      const auto columnIndexesView = this->columnIndexes.getConstView();
+      auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType localIdx, IndexType globalIdx, bool& compute ) -> RealType {
+         const IndexType column = columnIndexesView[ globalIdx ];
+         compute = ( column != paddingIndex );
+         if( ! compute )
+            return 0.0;
+         return 1 + ( column != row && column < rows && row < columns ); // the addition is for non-diagonal elements
+      };
+      auto reduction = [] __cuda_callable__ ( RealType& sum, const RealType& value ) {
+         sum += value;
+      };
+      auto keeper = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable {
+         row_sums_view[ row ] = value;
+      };
+      this->segments.segmentsReduction( 0, this->getRows(), fetch, reduction, keeper, ( RealType ) 0.0 );
+      return sum( row_sums );
+   }
 }
 
 template< typename Real,
@@ -206,8 +232,8 @@ template< typename Real,
           template< typename, typename > class SegmentsView >
 void
 SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
-addElement( const IndexType row,
-            const IndexType column,
+addElement( IndexType row,
+            IndexType column,
             const RealType& value,
             const RealType& thisElementMultiplicator )
 {
@@ -216,6 +242,13 @@ addElement( const IndexType row,
    TNL_ASSERT_GE( column, 0, "Sparse matrix column index cannot be negative." );
    TNL_ASSERT_LT( column, this->getColumns(), "Sparse matrix column index is larger than number of matrix columns." );
 
+   if( isSymmetric() && row < column )
+   {
+      swap( row, column );
+      TNL_ASSERT_LT( row, this->getRows(), "Column index is out of the symmetric part of the matrix after transposition." );
+      TNL_ASSERT_LT( column,this->getColumns(), "Row index is out of the symmetric part of the matrix after transposition." );
+   }
+
    const IndexType rowSize = this->segments.getSegmentSize( row );
    IndexType col( this->getPaddingIndex() );
    IndexType i;
@@ -276,14 +309,21 @@ template< typename Real,
           template< typename, typename > class SegmentsView >
 Real
 SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
-getElement( const IndexType row,
-            const IndexType column ) const
+getElement( IndexType row,
+            IndexType column ) const
 {
    TNL_ASSERT_GE( row, 0, "Sparse matrix row index cannot be negative." );
    TNL_ASSERT_LT( row, this->getRows(), "Sparse matrix row index is larger than number of matrix rows." );
    TNL_ASSERT_GE( column, 0, "Sparse matrix column index cannot be negative." );
    TNL_ASSERT_LT( column, this->getColumns(), "Sparse matrix column index is larger than number of matrix columns." );
 
+   if( isSymmetric() && row < column )
+   {
+      swap( row, column );
+      if( row >= this->getRows() || column >= this->getColumns() )
+         return 0.0;
+   }
+
    const IndexType rowSize = this->segments.getSegmentSize( row );
    for( IndexType i = 0; i < rowSize; i++ )
    {
@@ -588,25 +628,40 @@ void
 SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
 print( std::ostream& str ) const
 {
-   for( IndexType row = 0; row < this->getRows(); row++ )
+   if( isSymmetric() )
    {
-      str <<"Row: " << row << " -> ";
-      const IndexType rowLength = this->segments.getSegmentSize( row );
-      for( IndexType i = 0; i < rowLength; i++ )
+      for( IndexType row = 0; row < this->getRows(); row++ )
       {
-         const IndexType globalIdx = this->segments.getGlobalIndex( row, i );
-         const IndexType column = this->columnIndexes.getElement( globalIdx );
-         if( column == this->getPaddingIndex() )
-            break;
-         RealType value;
-         if( isBinary() )
-            value = 1.0;
-         else
-            value = this->values.getElement( globalIdx );
-         str << " Col:" << column << "->" << value << "\t";
+         str <<"Row: " << row << " -> ";
+         for( IndexType column = 0; column < this->getColumns(); column++ )
+         {
+            auto value = this->getElement( row, column );
+            if( value )
+               str << " Col:" << column << "->" << value << "\t";
+         }
+         str << std::endl;
       }
-      str << std::endl;
    }
+   else
+      for( IndexType row = 0; row < this->getRows(); row++ )
+      {
+         str <<"Row: " << row << " -> ";
+         const auto rowLength = this->segments.getSegmentSize( row );
+         for( IndexType i = 0; i < rowLength; i++ )
+         {
+            const IndexType globalIdx = this->segments.getGlobalIndex( row, i );
+            const IndexType column = this->columnIndexes.getElement( globalIdx );
+            if( column == this->getPaddingIndex() )
+               break;
+            RealType value;
+            if( isBinary() )
+               value = ( RealType ) 1.0;
+            else
+               value = this->values.getElement( globalIdx );
+            str << " Col:" << column << "->" << value << "\t";
+         }
+         str << std::endl;
+      }
 }
 
 template< typename Real,
diff --git a/src/UnitTests/Matrices/SymmetricSparseMatrixTest.h b/src/UnitTests/Matrices/SymmetricSparseMatrixTest.h
index 5582b138d..02fd8c585 100644
--- a/src/UnitTests/Matrices/SymmetricSparseMatrixTest.h
+++ b/src/UnitTests/Matrices/SymmetricSparseMatrixTest.h
@@ -45,6 +45,13 @@ TYPED_TEST( MatrixTest, setLikeTest )
     test_SetLike< MatrixType, MatrixType >();
 }
 
+TYPED_TEST( MatrixTest, getNumberOfNonzeroMatrixElements )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_GetNumberOfNonzeroMatrixElements< MatrixType >();
+}
+
 TYPED_TEST( MatrixTest, resetTest )
 {
     using MatrixType = typename TestFixture::MatrixType;
diff --git a/src/UnitTests/Matrices/SymmetricSparseMatrixTest.hpp b/src/UnitTests/Matrices/SymmetricSparseMatrixTest.hpp
index 75b121060..7c8278422 100644
--- a/src/UnitTests/Matrices/SymmetricSparseMatrixTest.hpp
+++ b/src/UnitTests/Matrices/SymmetricSparseMatrixTest.hpp
@@ -73,11 +73,11 @@ void test_SetCompressedRowLengths()
     |       24    25             26     |
     \       27       28             30  /
     */
-   const IndexType rows = 10;
+   const IndexType rows = 11;
    const IndexType cols = 11;
 
    Matrix m( rows, cols );
-   typename Matrix::CompressedRowLengthsVector rowLengths { 1, 2, 3, 3, 3, 3, 3, 3, 3, 3  };
+   typename Matrix::CompressedRowLengthsVector rowLengths { 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3  };
    m.setCompressedRowLengths( rowLengths );
 
    // Insert values into the rows.
@@ -120,9 +120,25 @@ void test_SetCompressedRowLengths()
    m.setElement( 7, 3, value++ );
    m.setElement( 7, 7, value++ );
 
+   // 8th row - lower part
+   m.setElement( 8, 2, value++ );
+   m.setElement( 8, 4, value++ );
+   m.setElement( 8, 8, value++ );
+
+   // 8th row - lower part
+   m.setElement( 9, 2, value++ );
+   m.setElement( 9, 4, value++ );
+   m.setElement( 9, 9, value++ );
+
+   // 8th row - lower part
+   m.setElement( 10,  2, value++ );
+   m.setElement( 10,  5, value++ );
+   m.setElement( 10, 10, value++ );
+
    rowLengths = 0;
    m.getCompressedRowLengths( rowLengths );
-   typename Matrix::CompressedRowLengthsVector correctRowLengths{ 1, 2, 3, 3, 3, 3, 3, 3, 3, 3 };
+
+   typename Matrix::CompressedRowLengthsVector correctRowLengths{ 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3 };
    EXPECT_EQ( rowLengths, correctRowLengths );
 }
 
@@ -170,8 +186,8 @@ void test_GetNumberOfNonzeroMatrixElements()
                                             49
     */
 
-   const IndexType rows = 10;
-   const IndexType cols = 10;
+   const IndexType rows = 11;
+   const IndexType cols = 11;
 
    Matrix m( rows, cols, {
       { 0, 0,  1 },
@@ -276,7 +292,7 @@ void test_GetRow()
 
    EXPECT_EQ( m.getElement( 1,  0 ),  2 );
    EXPECT_EQ( m.getElement( 1,  1 ),  3 );
-   EXPECT_EQ( m.getElement( 1,  2 ),  4 );
+   EXPECT_EQ( m.getElement( 1,  2 ),  5 );
    EXPECT_EQ( m.getElement( 1,  3 ),  8 );
    EXPECT_EQ( m.getElement( 1,  4 ), 10 );
    EXPECT_EQ( m.getElement( 1,  5 ), 13 );
@@ -421,7 +437,7 @@ void test_SetElement()
    Matrix m( { 1, 1, 1, 4, 1, 1, 7, 1, 1, 1 }, 10 );
 
    RealType value = 1;
-   for( IndexType i = 0; i < 4; i++ )
+   for( IndexType i = 0; i < 3; i++ )
       m.setElement( i, i, value++ );
 
    for( IndexType i = 0; i < 4; i++ )
@@ -574,7 +590,7 @@ void test_AddElement()
                    { 2, 1, 4 }, { 2, 2, 5 },
                                 { 3, 2, 6 }, { 3, 3, 7 },
                                              { 4, 3, 8 }, { 4, 4,  9 },
-                                                          { 5, 5, 10 } } );
+                                                          { 5, 4, 10 } } );
 
    // Check the set elements
    EXPECT_EQ( m.getElement( 0, 0 ),  1 );
@@ -626,9 +642,12 @@ void test_AddElement()
     */
 
    for( IndexType i = 1; i < rows; i++ )
+   {
       m.addElement( i, i - 1, 1.0, 2.0 );
+      m.addElement( i, i, 0.0, 2.0 );
+   }
 
-
+   std::cerr << m << std::endl;
    EXPECT_EQ( m.getElement( 0, 0 ),  2 );
    EXPECT_EQ( m.getElement( 0, 1 ),  5 );
    EXPECT_EQ( m.getElement( 0, 2 ),  0 );
-- 
GitLab


From 4e62a8fff15e08c8935f1269bc755f381468c907 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Mon, 24 Feb 2020 21:25:07 +0100
Subject: [PATCH 146/179] Fixing symmetric sparse matrix.

---
 src/TNL/Matrices/SparseMatrix.h                |  2 +-
 src/TNL/Matrices/SparseMatrix.hpp              |  4 ++--
 src/TNL/Matrices/SparseMatrixView.h            |  4 ++--
 src/TNL/Matrices/SparseMatrixView.hpp          | 18 +++++++++++++++---
 .../Matrices/SymmetricSparseMatrixTest.hpp     |  9 +++++----
 5 files changed, 25 insertions(+), 12 deletions(-)

diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h
index 573d382ce..81422eef3 100644
--- a/src/TNL/Matrices/SparseMatrix.h
+++ b/src/TNL/Matrices/SparseMatrix.h
@@ -145,7 +145,7 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
       void vectorProduct( const InVector& inVector,
                           OutVector& outVector,
                           const RealType& matrixMultiplicator = 1.0,
-                          const RealType& inVectorAddition = 0.0 ) const;
+                          const RealType& outVectorMultiplicator = 0.0 ) const;
 
       /*template< typename Real2, typename Index2 >
       void addMatrix( const SparseMatrix< Real2, Segments, Device, Index2 >& matrix,
diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp
index 3eccc7211..22ca56940 100644
--- a/src/TNL/Matrices/SparseMatrix.hpp
+++ b/src/TNL/Matrices/SparseMatrix.hpp
@@ -396,9 +396,9 @@ SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAll
 vectorProduct( const InVector& inVector,
                OutVector& outVector,
                const RealType& matrixMultiplicator,
-               const RealType& inVectorAddition ) const
+               const RealType& outVectorMultiplicator ) const
 {
-   this->view.vectorProduct( inVector, outVector, matrixMultiplicator, inVectorAddition );
+   this->view.vectorProduct( inVector, outVector, matrixMultiplicator, outVectorMultiplicator );
    /*TNL_ASSERT_EQ( this->getColumns(), inVector.getSize(), "Matrix columns do not fit with input vector." );
    TNL_ASSERT_EQ( this->getRows(), outVector.getSize(), "Matrix rows do not fit with output vector." );
 
diff --git a/src/TNL/Matrices/SparseMatrixView.h b/src/TNL/Matrices/SparseMatrixView.h
index 54d4f1766..a611b5467 100644
--- a/src/TNL/Matrices/SparseMatrixView.h
+++ b/src/TNL/Matrices/SparseMatrixView.h
@@ -118,8 +118,8 @@ class SparseMatrixView : public MatrixView< Real, Device, Index >
                 typename OutVector >
       void vectorProduct( const InVector& inVector,
                           OutVector& outVector,
-                          const RealType& matrixMultiplicator = 1.0,
-                          const RealType& inVectorAddition = 0.0 ) const;
+                          const RealType matrixMultiplicator = 1.0,
+                          const RealType outVectorMultiplicator = 0.0 ) const;
 
       template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
       void rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
diff --git a/src/TNL/Matrices/SparseMatrixView.hpp b/src/TNL/Matrices/SparseMatrixView.hpp
index 62300217f..87f4f1038 100644
--- a/src/TNL/Matrices/SparseMatrixView.hpp
+++ b/src/TNL/Matrices/SparseMatrixView.hpp
@@ -13,6 +13,7 @@
 #include <functional>
 #include <TNL/Matrices/SparseMatrixView.h>
 #include <TNL/Algorithms/Reduction.h>
+#include <TNL/Atomic.h>
 
 namespace TNL {
 namespace Matrices {
@@ -367,8 +368,8 @@ void
 SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
 vectorProduct( const InVector& inVector,
                OutVector& outVector,
-               const RealType& matrixMultiplicator,
-               const RealType& inVectorAddition ) const
+               const RealType matrixMultiplicator,
+               const RealType outVectorMultiplicator ) const
 {
    TNL_ASSERT_EQ( this->getColumns(), inVector.getSize(), "Matrix columns do not fit with input vector." );
    TNL_ASSERT_EQ( this->getRows(), outVector.getSize(), "Matrix rows do not fit with output vector." );
@@ -378,11 +379,19 @@ vectorProduct( const InVector& inVector,
    const auto valuesView = this->values.getConstView();
    const auto columnIndexesView = this->columnIndexes.getConstView();
    const IndexType paddingIndex = this->getPaddingIndex();
+   if( isSymmetric() )
+      outVector *= outVectorMultiplicator;
    auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType localIdx, IndexType globalIdx, bool& compute ) -> RealType {
       const IndexType column = columnIndexesView[ globalIdx ];
       compute = ( column != paddingIndex );
       if( ! compute )
          return 0.0;
+      if( isSymmetric() )
+      {
+         TNL_ASSERT_TRUE( false, "" );
+         //Atomic< RealType, DeviceType > atomic;
+         //if( isBinary() )
+      }
       if( isBinary() )
          return inVectorView[ column ];
       return valuesView[ globalIdx ] * inVectorView[ column ];
@@ -391,7 +400,10 @@ vectorProduct( const InVector& inVector,
       sum += value;
    };
    auto keeper = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable {
-      outVectorView[ row ] = value;
+      if( outVectorMultiplicator == 0.0 )
+         outVectorView[ row ] = matrixMultiplicator * value;
+      else
+         outVectorView[ row ] = outVectorMultiplicator * outVectorView[ row ] + matrixMultiplicator * value;
    };
    this->segments.segmentsReduction( 0, this->getRows(), fetch, reduction, keeper, ( RealType ) 0.0 );
 
diff --git a/src/UnitTests/Matrices/SymmetricSparseMatrixTest.hpp b/src/UnitTests/Matrices/SymmetricSparseMatrixTest.hpp
index 7c8278422..193c1e031 100644
--- a/src/UnitTests/Matrices/SymmetricSparseMatrixTest.hpp
+++ b/src/UnitTests/Matrices/SymmetricSparseMatrixTest.hpp
@@ -641,13 +641,14 @@ void test_AddElement()
     *    \  0  0  0  0 10 /   \  0  0  0  0  1 /   \  0  0  0  0 21 /
     */
 
-   for( IndexType i = 1; i < rows; i++ )
+   for( IndexType i = 0; i < rows; i++ )
    {
-      m.addElement( i, i - 1, 1.0, 2.0 );
-      m.addElement( i, i, 0.0, 2.0 );
+      if( i > 0 )
+         m.addElement( i, i - 1, 1.0, 2.0 );
+      if( i < cols )
+         m.addElement( i, i, 0.0, 2.0 );
    }
 
-   std::cerr << m << std::endl;
    EXPECT_EQ( m.getElement( 0, 0 ),  2 );
    EXPECT_EQ( m.getElement( 0, 1 ),  5 );
    EXPECT_EQ( m.getElement( 0, 2 ),  0 );
-- 
GitLab


From 335dd0fc17225ea55af0d25a22f8b605fdaede58 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Mon, 24 Feb 2020 21:32:38 +0100
Subject: [PATCH 147/179] Fixing sparse matrix to work with StaticVector as
 RealType.

---
 src/TNL/Matrices/SparseMatrixView.hpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/TNL/Matrices/SparseMatrixView.hpp b/src/TNL/Matrices/SparseMatrixView.hpp
index 87f4f1038..cb6afd8e4 100644
--- a/src/TNL/Matrices/SparseMatrixView.hpp
+++ b/src/TNL/Matrices/SparseMatrixView.hpp
@@ -168,20 +168,20 @@ getNumberOfNonzeroMatrixElements() const
       Containers::Vector< IndexType, DeviceType, IndexType > row_sums( this->getRows(), 0 );
       auto row_sums_view = row_sums.getView();
       const auto columnIndexesView = this->columnIndexes.getConstView();
-      auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType localIdx, IndexType globalIdx, bool& compute ) -> RealType {
+      auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType localIdx, IndexType globalIdx, bool& compute ) -> IndexType {
          const IndexType column = columnIndexesView[ globalIdx ];
          compute = ( column != paddingIndex );
          if( ! compute )
             return 0.0;
          return 1 + ( column != row && column < rows && row < columns ); // the addition is for non-diagonal elements
       };
-      auto reduction = [] __cuda_callable__ ( RealType& sum, const RealType& value ) {
+      auto reduction = [] __cuda_callable__ ( IndexType& sum, const IndexType& value ) {
          sum += value;
       };
-      auto keeper = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable {
+      auto keeper = [=] __cuda_callable__ ( IndexType row, const IndexType& value ) mutable {
          row_sums_view[ row ] = value;
       };
-      this->segments.segmentsReduction( 0, this->getRows(), fetch, reduction, keeper, ( RealType ) 0.0 );
+      this->segments.segmentsReduction( 0, this->getRows(), fetch, reduction, keeper, ( IndexType ) 0 );
       return sum( row_sums );
    }
 }
@@ -648,7 +648,7 @@ print( std::ostream& str ) const
          for( IndexType column = 0; column < this->getColumns(); column++ )
          {
             auto value = this->getElement( row, column );
-            if( value )
+            if( value != ( RealType ) 0 )
                str << " Col:" << column << "->" << value << "\t";
          }
          str << std::endl;
-- 
GitLab


From 46ee01f39ef067724c2fb6c79e7662295f0570af Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Tue, 25 Feb 2020 22:04:13 +0100
Subject: [PATCH 148/179] Fixes of SegmentsViewType and SegmentViewType.

---
 src/TNL/Containers/Segments/Ellpack.h           | 2 +-
 src/TNL/Containers/Segments/EllpackView.h       | 2 +-
 src/TNL/Containers/Segments/SlicedEllpack.h     | 2 +-
 src/TNL/Containers/Segments/SlicedEllpackView.h | 2 +-
 src/TNL/Matrices/SparseMatrix.h                 | 1 +
 src/TNL/Matrices/SparseMatrixView.h             | 5 ++---
 src/TNL/Matrices/SparseMatrixView.hpp           | 1 +
 7 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/src/TNL/Containers/Segments/Ellpack.h b/src/TNL/Containers/Segments/Ellpack.h
index c197c7010..6edacb1cf 100644
--- a/src/TNL/Containers/Segments/Ellpack.h
+++ b/src/TNL/Containers/Segments/Ellpack.h
@@ -34,7 +34,7 @@ class Ellpack
       using OffsetsHolder = Containers::Vector< IndexType, DeviceType, IndexType >;
       using SegmentsSizes = OffsetsHolder;
       template< typename Device_, typename Index_ >
-      using ViewTemplate = EllpackView< Device_, Index_ >;
+      using ViewTemplate = EllpackView< Device_, Index_, RowMajorOrder, Alignment >;
       using ViewType = EllpackView< Device, Index, RowMajorOrder, Alignment >;
       //using ConstViewType = EllpackView< Device, std::add_const_t< Index >, RowMajorOrder, Alignment >;
       using SegmentViewType = SegmentView< IndexType, RowMajorOrder >;
diff --git a/src/TNL/Containers/Segments/EllpackView.h b/src/TNL/Containers/Segments/EllpackView.h
index 3870f0802..dcbc56d1b 100644
--- a/src/TNL/Containers/Segments/EllpackView.h
+++ b/src/TNL/Containers/Segments/EllpackView.h
@@ -35,7 +35,7 @@ class EllpackView
       using OffsetsHolder = Containers::Vector< IndexType, DeviceType, IndexType >;
       using SegmentsSizes = OffsetsHolder;
       template< typename Device_, typename Index_ >
-      using ViewTemplate = EllpackView< Device_, Index_ >;
+      using ViewTemplate = EllpackView< Device_, Index_, RowMajorOrder, Alignment >;
       using ViewType = EllpackView;
       using ConstViewType = EllpackView< Device, std::add_const_t< Index > >;
       using SegmentViewType = SegmentView< IndexType, RowMajorOrder >;
diff --git a/src/TNL/Containers/Segments/SlicedEllpack.h b/src/TNL/Containers/Segments/SlicedEllpack.h
index 5953cde36..e1cdfa1d4 100644
--- a/src/TNL/Containers/Segments/SlicedEllpack.h
+++ b/src/TNL/Containers/Segments/SlicedEllpack.h
@@ -35,7 +35,7 @@ class SlicedEllpack
       static constexpr bool getRowMajorOrder() { return RowMajorOrder; }
       using ViewType = SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >;
       template< typename Device_, typename Index_ >
-      using ViewTemplate = SlicedEllpackView< Device_, Index_ >;
+      using ViewTemplate = SlicedEllpackView< Device_, Index_, RowMajorOrder, SliceSize >;
       using ConstViewType = SlicedEllpackView< Device, std::add_const_t< Index >, RowMajorOrder, SliceSize >;
       using SegmentViewType = SegmentView< IndexType, RowMajorOrder >;
 
diff --git a/src/TNL/Containers/Segments/SlicedEllpackView.h b/src/TNL/Containers/Segments/SlicedEllpackView.h
index 2b310a805..23001553c 100644
--- a/src/TNL/Containers/Segments/SlicedEllpackView.h
+++ b/src/TNL/Containers/Segments/SlicedEllpackView.h
@@ -33,7 +33,7 @@ class SlicedEllpackView
       static constexpr int getSliceSize() { return SliceSize; }
       static constexpr bool getRowMajorOrder() { return RowMajorOrder; }
       template< typename Device_, typename Index_ >
-      using ViewTemplate = SlicedEllpackView< Device_, Index_ >;
+      using ViewTemplate = SlicedEllpackView< Device_, Index_, RowMajorOrder, SliceSize >;
       using ViewType = SlicedEllpackView;
       using ConstViewType = SlicedEllpackView< Device, std::add_const_t< Index > >;
       using SegmentViewType = SegmentView< IndexType, RowMajorOrder >;
diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h
index 81422eef3..e31d9d6ba 100644
--- a/src/TNL/Matrices/SparseMatrix.h
+++ b/src/TNL/Matrices/SparseMatrix.h
@@ -40,6 +40,7 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
       using SegmentsType = Segments< Device, Index, IndexAllocator >;
       template< typename Device_, typename Index_ >
       using SegmentsViewTemplate = typename SegmentsType::template ViewTemplate< Device_, Index >;
+      using SegmentsViewType = typename SegmentsType::ViewType;
       using SegmentViewType = typename SegmentsType::SegmentViewType;
       using DeviceType = Device;
       using IndexType = Index;
diff --git a/src/TNL/Matrices/SparseMatrixView.h b/src/TNL/Matrices/SparseMatrixView.h
index a611b5467..4fa65b70a 100644
--- a/src/TNL/Matrices/SparseMatrixView.h
+++ b/src/TNL/Matrices/SparseMatrixView.h
@@ -51,7 +51,6 @@ class SparseMatrixView : public MatrixView< Real, Device, Index >
       typedef Containers::VectorView< IndexType, DeviceType, IndexType > CompressedRowLengthsVectorView;
       typedef typename CompressedRowLengthsVectorView::ConstViewType ConstCompressedRowLengthsVectorView;
 
-
       __cuda_callable__
       SparseMatrixView();
 
@@ -103,8 +102,8 @@ class SparseMatrixView : public MatrixView< Real, Device, Index >
                        const RealType& value,
                        const RealType& thisElementMultiplicator = 1.0 );
 
-      RealType getElement( const IndexType row,
-                           const IndexType column ) const;
+      RealType getElement( IndexType row,
+                           IndexType column ) const;
 
       template< typename Vector >
       __cuda_callable__
diff --git a/src/TNL/Matrices/SparseMatrixView.hpp b/src/TNL/Matrices/SparseMatrixView.hpp
index cb6afd8e4..4e5244806 100644
--- a/src/TNL/Matrices/SparseMatrixView.hpp
+++ b/src/TNL/Matrices/SparseMatrixView.hpp
@@ -209,6 +209,7 @@ SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
 getRow( const IndexType& rowIdx ) -> RowView
 {
    TNL_ASSERT_LT( rowIdx, this->getRows(), "Row index is larger than number of matrix rows." );
+   typename RowView::SegmentViewType t = this->segments.getSegmentView( rowIdx );
    return RowView( this->segments.getSegmentView( rowIdx ), this->values.getView(), this->columnIndexes.getView() );
 }
 
-- 
GitLab


From 91b56fdcca41f3532f31812c801cdc00ea6678ce Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Wed, 26 Feb 2020 13:50:34 +0100
Subject: [PATCH 149/179] Added a method SparseMatrix::setElements.

---
 src/TNL/Matrices/SparseMatrix.h   |  2 +
 src/TNL/Matrices/SparseMatrix.hpp | 64 +++++++++++++++++++------------
 2 files changed, 42 insertions(+), 24 deletions(-)

diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h
index e31d9d6ba..c7f953a8c 100644
--- a/src/TNL/Matrices/SparseMatrix.h
+++ b/src/TNL/Matrices/SparseMatrix.h
@@ -102,6 +102,8 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
          this->setCompressedRowLengths( rowLengths );
       };
 
+      void setElements( const std::initializer_list< std::tuple< IndexType, IndexType, RealType > >& data );
+
       template< typename Vector >
       void getCompressedRowLengths( Vector& rowLengths ) const;
 
diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp
index 22ca56940..4c1f3b1ce 100644
--- a/src/TNL/Matrices/SparseMatrix.hpp
+++ b/src/TNL/Matrices/SparseMatrix.hpp
@@ -106,30 +106,7 @@ SparseMatrix( const IndexType rows,
               const IndexAllocatorType& indexAllocator )
 : BaseType( rows, columns, realAllocator ), columnIndexes( indexAllocator )
 {
-   Containers::Vector< IndexType, Devices::Host, IndexType > rowCapacities( rows, 0 );
-   for( const auto& i : data )
-   {
-      if( std::get< 0 >( i ) >= rows )
-      {
-         std::stringstream s;
-         s << "Wrong row index " << std::get< 0 >( i ) << " in an initializer list";
-         throw std::logic_error( s.str() );
-      }
-      rowCapacities[ std::get< 0 >( i ) ]++;
-   }
-   SparseMatrix< Real, Devices::Host, Index, MatrixType, Segments > hostMatrix( rows, columns );
-   hostMatrix.setCompressedRowLengths( rowCapacities );
-   for( const auto& i : data )
-   {
-      if( std::get< 1 >( i ) >= columns )
-      {
-         std::stringstream s;
-         s << "Wrong column index " << std::get< 1 >( i ) << " in an initializer list";
-         throw std::logic_error( s.str() );
-      }
-      hostMatrix.setElement( std::get< 0 >( i ), std::get< 1 >( i ), std::get< 2 >( i ) );
-   }
-   ( *this ) = hostMatrix;
+   this->setElements( data );
 }
 
 template< typename Real,
@@ -231,6 +208,45 @@ setCompressedRowLengths( const RowsCapacitiesVector& rowsCapacities )
    this->view = this->getView();
 }
 
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
+          typename RealAllocator,
+          typename IndexAllocator >
+void
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
+setElements( const std::initializer_list< std::tuple< IndexType, IndexType, RealType > >& data )
+{
+   const auto& rows = this->getRows();
+   const auto& columns = this->getColumns();
+   Containers::Vector< IndexType, Devices::Host, IndexType > rowCapacities( rows, 0 );
+   for( const auto& i : data )
+   {
+      if( std::get< 0 >( i ) >= rows )
+      {
+         std::stringstream s;
+         s << "Wrong row index " << std::get< 0 >( i ) << " in an initializer list";
+         throw std::logic_error( s.str() );
+      }
+      rowCapacities[ std::get< 0 >( i ) ]++;
+   }
+   SparseMatrix< Real, Devices::Host, Index, MatrixType, Segments > hostMatrix( rows, columns );
+   hostMatrix.setCompressedRowLengths( rowCapacities );
+   for( const auto& i : data )
+   {
+      if( std::get< 1 >( i ) >= columns )
+      {
+         std::stringstream s;
+         s << "Wrong column index " << std::get< 1 >( i ) << " in an initializer list";
+         throw std::logic_error( s.str() );
+      }
+      hostMatrix.setElement( std::get< 0 >( i ), std::get< 1 >( i ), std::get< 2 >( i ) );
+   }
+   ( *this ) = hostMatrix;
+}
+
 template< typename Real,
           typename Device,
           typename Index,
-- 
GitLab


From 036d57b0de44f95b44acf3876b80e5d40367c247 Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Wed, 26 Feb 2020 13:51:03 +0100
Subject: [PATCH 150/179] Fixed SparseMatrix::addElement unit test.

---
 src/UnitTests/Matrices/SparseMatrixTest.h | 42 +++++++++++------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/src/UnitTests/Matrices/SparseMatrixTest.h b/src/UnitTests/Matrices/SparseMatrixTest.h
index 45dc40578..30d3a692d 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest.h
+++ b/src/UnitTests/Matrices/SparseMatrixTest.h
@@ -663,9 +663,9 @@ void test_AddElement()
     *    /  1  2  3  0  0 \
     *    |  0  4  5  6  0 |
     *    |  0  0  7  8  9 |
-    *    | 10  0  0  0  0 |
-    *    |  0 11  0  0  0 |
-    *    \  0  0  0 12  0 /
+    *    | 10  1  1  0  0 |
+    *    |  0 11  1  1  0 |
+    *    \  0  0  1 12  1 /
     */
 
    const IndexType rows = 6;
@@ -675,9 +675,9 @@ void test_AddElement()
       { 0, 0,  1 }, { 0, 1,  2 }, { 0, 2, 3 },
                     { 1, 1,  4 }, { 1, 2, 5 }, { 1, 3,  6 },
                                   { 2, 2, 7 }, { 2, 3,  8 }, { 2, 4, 9 },
-      { 3, 0, 10 }, { 3, 1,  0 }, { 3, 2, 0 },
-                    { 4, 1, 11 }, { 4, 2, 0 }, { 4, 3,  0 },
-                                  { 5, 2, 0 }, { 5, 3, 12 }, { 5, 4, 0 } } );
+      { 3, 0, 10 }, { 3, 1,  1 }, { 3, 2, 1 },
+                    { 4, 1, 11 }, { 4, 2, 1 }, { 4, 3,  1 },
+                                  { 5, 2, 1 }, { 5, 3, 12 }, { 5, 4, 1 } } );
    /*typename Matrix::CompressedRowLengthsVector rowLengths( rows, 3 );
    m.setCompressedRowLengths( rowLengths );
 
@@ -718,22 +718,22 @@ void test_AddElement()
    EXPECT_EQ( m.getElement( 2, 4 ),  9 );
 
    EXPECT_EQ( m.getElement( 3, 0 ), 10 );
-   EXPECT_EQ( m.getElement( 3, 1 ),  0 );
-   EXPECT_EQ( m.getElement( 3, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 1 ),  1 );
+   EXPECT_EQ( m.getElement( 3, 2 ),  1 );
    EXPECT_EQ( m.getElement( 3, 3 ),  0 );
    EXPECT_EQ( m.getElement( 3, 4 ),  0 );
 
    EXPECT_EQ( m.getElement( 4, 0 ),  0 );
    EXPECT_EQ( m.getElement( 4, 1 ), 11 );
-   EXPECT_EQ( m.getElement( 4, 2 ),  0 );
-   EXPECT_EQ( m.getElement( 4, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 2 ),  1 );
+   EXPECT_EQ( m.getElement( 4, 3 ),  1 );
    EXPECT_EQ( m.getElement( 4, 4 ),  0 );
 
    EXPECT_EQ( m.getElement( 5, 0 ),  0 );
    EXPECT_EQ( m.getElement( 5, 1 ),  0 );
-   EXPECT_EQ( m.getElement( 5, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 2 ),  1 );
    EXPECT_EQ( m.getElement( 5, 3 ), 12 );
-   EXPECT_EQ( m.getElement( 5, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 4 ),  1 );
 
    // Add new elements to the old elements with a multiplying factor applied to the old elements.
    /*
@@ -742,9 +742,9 @@ void test_AddElement()
     *    /  3  6  9  0  0 \
     *    |  0 12 15 18  0 |
     *    |  0  0 21 24 27 |
-    *    | 30 11 12  0  0 |
-    *    |  0 35 14 15  0 |
-    *    \  0  0 16 41 18 /
+    *    | 30 13 14  0  0 |
+    *    |  0 35 16 17  0 |
+    *    \  0  0 18 41 20 /
     */
 
    RealType newValue = 1;
@@ -786,22 +786,22 @@ void test_AddElement()
    EXPECT_EQ( m.getElement( 2, 4 ), 27 );
 
    EXPECT_EQ( m.getElement( 3, 0 ), 30 );
-   EXPECT_EQ( m.getElement( 3, 1 ), 11 );
-   EXPECT_EQ( m.getElement( 3, 2 ), 12 );
+   EXPECT_EQ( m.getElement( 3, 1 ), 13 );
+   EXPECT_EQ( m.getElement( 3, 2 ), 14 );
    EXPECT_EQ( m.getElement( 3, 3 ),  0 );
    EXPECT_EQ( m.getElement( 3, 4 ),  0 );
 
    EXPECT_EQ( m.getElement( 4, 0 ),  0 );
    EXPECT_EQ( m.getElement( 4, 1 ), 35 );
-   EXPECT_EQ( m.getElement( 4, 2 ), 14 );
-   EXPECT_EQ( m.getElement( 4, 3 ), 15 );
+   EXPECT_EQ( m.getElement( 4, 2 ), 16 );
+   EXPECT_EQ( m.getElement( 4, 3 ), 17 );
    EXPECT_EQ( m.getElement( 4, 4 ),  0 );
 
    EXPECT_EQ( m.getElement( 5, 0 ),  0 );
    EXPECT_EQ( m.getElement( 5, 1 ),  0 );
-   EXPECT_EQ( m.getElement( 5, 2 ), 16 );
+   EXPECT_EQ( m.getElement( 5, 2 ), 18 );
    EXPECT_EQ( m.getElement( 5, 3 ), 41 );
-   EXPECT_EQ( m.getElement( 5, 4 ), 18 );
+   EXPECT_EQ( m.getElement( 5, 4 ), 20 );
 }
 
 template< typename Matrix >
-- 
GitLab


From cda43f31c15455833c81aa35289472f93ea50655 Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Wed, 26 Feb 2020 17:08:02 +0100
Subject: [PATCH 151/179] Added atomic operations.

---
 src/TNL/Algorithms/AtomicOperations.h | 70 +++++++++++++++++++++++++++
 1 file changed, 70 insertions(+)
 create mode 100644 src/TNL/Algorithms/AtomicOperations.h

diff --git a/src/TNL/Algorithms/AtomicOperations.h b/src/TNL/Algorithms/AtomicOperations.h
new file mode 100644
index 000000000..b00260846
--- /dev/null
+++ b/src/TNL/Algorithms/AtomicOperations.h
@@ -0,0 +1,70 @@
+/***************************************************************************
+                          AtomicOperations.h  -  description
+                             -------------------
+    begin                : Feb 26, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+// Implemented by: Tomas Oberhuber, Jakub Klinkovsky
+
+#pragma once
+
+#include <TNL/Devices/Sequential.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+
+namespace TNL {
+namespace Algorithms {
+
+template< typename Device >
+struct AtomicOperations{};
+
+template<>
+struct AtomicOperations< Devices::Host >
+{
+   template< typename Value >
+   static void add( Value& v, const Value& a )
+   {
+#pragma omp atomic update
+      v += a;
+   }
+};
+
+template<>
+struct AtomicOperations< Devices::Cuda >
+{
+   template< typename Value >
+   __cuda_callable__
+   static void add( Value& v, const Value& a )
+   {
+#ifdef HAVE_CUDA
+#if __CUDA_ARCH__ < 600
+      if( std::is_same< Value, double >::value )
+      {
+         unsigned long long int* v_as_ull = ( unsigned long long int* ) &v;
+         unsigned long long int old = *v_as_ull, assumed;
+
+         do
+         {
+            assumed = old;
+            old = atomicCAS( v_as_ull,
+                             assumed,
+                             __double_as_longlong( s + __longlong_as_double( assumed ) ) ) ;
+
+         // Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN)
+         }
+         while( assumed != old );
+         return;
+      }
+#endif
+      atomicAdd( &v, a );
+#endif
+   }
+
+};
+
+} //namespace Algorithms
+} //namespace TNL
\ No newline at end of file
-- 
GitLab


From a72c076d3455730dcbe6c1950e739458d5a1d57a Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Wed, 26 Feb 2020 17:08:30 +0100
Subject: [PATCH 152/179] Fixed symmetric sparse matrix with unit tests.

---
 src/TNL/Matrices/SparseMatrixView.hpp         | 36 +++++++++----
 .../Matrices/SymmetricSparseMatrixTest.hpp    | 53 +++++++++----------
 2 files changed, 51 insertions(+), 38 deletions(-)

diff --git a/src/TNL/Matrices/SparseMatrixView.hpp b/src/TNL/Matrices/SparseMatrixView.hpp
index 4e5244806..e07e00fa6 100644
--- a/src/TNL/Matrices/SparseMatrixView.hpp
+++ b/src/TNL/Matrices/SparseMatrixView.hpp
@@ -13,7 +13,7 @@
 #include <functional>
 #include <TNL/Matrices/SparseMatrixView.h>
 #include <TNL/Algorithms/Reduction.h>
-#include <TNL/Atomic.h>
+#include <TNL/Algorithms/AtomicOperations.h>
 
 namespace TNL {
 namespace Matrices {
@@ -382,16 +382,24 @@ vectorProduct( const InVector& inVector,
    const IndexType paddingIndex = this->getPaddingIndex();
    if( isSymmetric() )
       outVector *= outVectorMultiplicator;
-   auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType localIdx, IndexType globalIdx, bool& compute ) -> RealType {
+   auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType localIdx, IndexType globalIdx, bool& compute ) mutable -> RealType {
       const IndexType column = columnIndexesView[ globalIdx ];
       compute = ( column != paddingIndex );
       if( ! compute )
          return 0.0;
-      if( isSymmetric() )
+      if( isSymmetric() && column < row )
       {
-         TNL_ASSERT_TRUE( false, "" );
-         //Atomic< RealType, DeviceType > atomic;
-         //if( isBinary() )
+         if( isBinary() )
+            Algorithms::AtomicOperations< DeviceType >::add( outVectorView[ column ], matrixMultiplicator * inVectorView[ row ] );
+         else
+         {
+            //std::cerr << outVectorView << std::endl;
+            Algorithms::AtomicOperations< DeviceType >::add( outVectorView[ column ], matrixMultiplicator * valuesView[ globalIdx ] * inVectorView[ row ] );
+            //outVectorView[ column ] += matrixMultiplicator * valuesView[ globalIdx ] * inVectorView[ row ];
+
+            //std::cerr << "Symmetric add to out vector row " << column << " value " << valuesView[ globalIdx ] << " * " << inVectorView[ row ] <<
+            //   " --> " << outVectorView[ column ] << std::endl;
+         }
       }
       if( isBinary() )
          return inVectorView[ column ];
@@ -401,10 +409,20 @@ vectorProduct( const InVector& inVector,
       sum += value;
    };
    auto keeper = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable {
-      if( outVectorMultiplicator == 0.0 )
-         outVectorView[ row ] = matrixMultiplicator * value;
+      if( isSymmetric() )
+      {
+         //std::cerr << outVectorView << std::endl;
+         //std::cerr << "Adding " << matrixMultiplicator * value << " to result vector " << outVectorView[ row ];
+         outVectorView[ row ] += matrixMultiplicator * value;
+         //std::cerr << " ---> " << outVectorView[ row ] << std::endl;
+      }
       else
-         outVectorView[ row ] = outVectorMultiplicator * outVectorView[ row ] + matrixMultiplicator * value;
+      {
+         if( outVectorMultiplicator == 0.0 )
+            outVectorView[ row ] = matrixMultiplicator * value;
+         else
+            outVectorView[ row ] = outVectorMultiplicator * outVectorView[ row ] + matrixMultiplicator * value;
+      }
    };
    this->segments.segmentsReduction( 0, this->getRows(), fetch, reduction, keeper, ( RealType ) 0.0 );
 
diff --git a/src/UnitTests/Matrices/SymmetricSparseMatrixTest.hpp b/src/UnitTests/Matrices/SymmetricSparseMatrixTest.hpp
index 193c1e031..58a4f4fce 100644
--- a/src/UnitTests/Matrices/SymmetricSparseMatrixTest.hpp
+++ b/src/UnitTests/Matrices/SymmetricSparseMatrixTest.hpp
@@ -12,6 +12,7 @@
 #include <TNL/Containers/VectorView.h>
 #include <TNL/Math.h>
 #include <TNL/Algorithms/ParallelFor.h>
+#include <TNL/Algorithms/AtomicOperations.h>
 #include <iostream>
 #include <sstream>
 
@@ -734,10 +735,10 @@ void test_VectorProduct()
    const IndexType m_cols_2 = 4;
 
    Matrix m_2( m_rows_2, m_cols_2, {
-      { 0, 0, 1 }, { 0, 1, 2 }, { 0, 2, 3 },
-      { 1, 0, 2 },              { 1, 2, 6 }, { 1, 3, 8 },
+      { 0, 0, 1 },
+      { 1, 0, 2 },
       { 2, 0, 3 }, { 2, 1, 6 }, { 2, 2, 7 },
-                   { 3, 2, 8 },              { 3, 3, 9 } } );
+                   { 3, 1, 8 },              { 3, 3, 9 } } );
 
    VectorType inVector_2( m_cols_2, 2 );
    VectorType outVector_2( m_rows_2, 0 );
@@ -835,9 +836,9 @@ void test_VectorProduct()
    Matrix m_5( m_rows_5, m_cols_5,{
       { 0, 0, 1 },
                    { 1, 1, 2, },
-                                 { 2, 2, 3 }, { 2, 3,  4 }, { 2, 4,  6 }, { 2, 5,  9 },
-                                 { 3, 2, 4 }, { 3, 3,  5 }, { 3, 4,  7 }, { 3, 5, 10 },
-                                 { 4, 2, 6 }, { 4, 3,  7 }, { 4, 4,  8 }, { 4, 5, 11 },
+                                 { 2, 2, 3 },
+                                 { 3, 2, 4 }, { 3, 3,  5 },
+                                 { 4, 2, 6 }, { 4, 3,  7 }, { 4, 4,  8 },
                                  { 5, 2, 9 }, { 5, 3, 10 }, { 5, 4, 11 }, { 5, 5, 12 },
                                                                                         { 6, 6, 13 },
                                                                                                       { 7, 7, 14 }
@@ -883,9 +884,9 @@ void test_RowsReduction()
    Matrix m_5( m_rows_5, m_cols_5,{
       { 0, 0, 1 },
                    { 1, 1, 2, },
-                                 { 2, 2, 3 }, { 2, 3,  4 }, { 2, 4,  6 }, { 2, 5,  9 },
-                                 { 3, 2, 4 }, { 3, 3,  5 }, { 3, 4,  7 }, { 3, 5, 10 },
-                                 { 4, 2, 6 }, { 4, 3,  7 }, { 4, 4,  8 }, { 4, 5, 11 },
+                                 { 2, 2, 3 },
+                                 { 3, 2, 4 }, { 3, 3,  5 },
+                                 { 4, 2, 6 }, { 4, 3,  7 }, { 4, 4,  8 },
                                  { 5, 2, 9 }, { 5, 3, 10 }, { 5, 4, 11 }, { 5, 5, 12 },
                                                                                         { 6, 6, 13 },
                                                                                                       { 7, 7, 14 }
@@ -896,24 +897,28 @@ void test_RowsReduction()
    typename Matrix::RowsCapacitiesType rowLengths( m_rows_5 );
    typename Matrix::RowsCapacitiesType rowLengths_true( { 1, 1, 4, 4, 4, 4, 1, 1 } );
    auto rowLengths_view = rowLengths.getView();
-   auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, IndexType globalIdx, const RealType& value ) -> IndexType {
+   rowLengths_view = 0;
+   auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType column, IndexType globalIdx, const RealType& value ) mutable -> IndexType {
+      if( value != 0.0 && row != column)
+         TNL::Algorithms::AtomicOperations< DeviceType >::add( rowLengths_view[ column ], ( IndexType ) 1 );
       return ( value != 0.0 );
    };
    auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) {
       aux += a;
    };
    auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable {
-      rowLengths_view[ rowIdx ] = value;
+      rowLengths_view[ rowIdx ] += value;
    };
    m_5.allRowsReduction( fetch, reduce, keep, 0 );
 
    EXPECT_EQ( rowLengths_true, rowLengths );
    m_5.getCompressedRowLengths( rowLengths );
-   EXPECT_EQ( rowLengths_true, rowLengths );
+   typename Matrix::RowsCapacitiesType rowLengths_symmetric( { 1, 1, 1, 2, 3, 4, 1, 1 } );
+   EXPECT_EQ( rowLengths_symmetric, rowLengths );
 
    ////
    // Compute max norm
-   TNL::Containers::Vector< RealType, DeviceType, IndexType > rowSums( m_5.getRows() );
+   /*TNL::Containers::Vector< RealType, DeviceType, IndexType > rowSums( m_5.getRows() );
    auto rowSums_view = rowSums.getView();
    auto max_fetch = [] __cuda_callable__ ( IndexType row, IndexType column, IndexType globalIdx, const RealType& value ) -> IndexType {
       return abs( value );
@@ -926,7 +931,7 @@ void test_RowsReduction()
    };
    m_5.allRowsReduction( max_fetch, max_reduce, max_keep, 0 );
    const RealType maxNorm = TNL::max( rowSums );
-   EXPECT_EQ( maxNorm, 260 ) ; // 29+30+31+32+33+34+35+36
+   EXPECT_EQ( maxNorm, 260 ) ; // 29+30+31+32+33+34+35+36*/
 }
 
 template< typename Matrix >
@@ -1018,7 +1023,7 @@ void test_SaveAndLoad( const char* filename )
                    { 2, 1, 4 }, { 2, 2, 5 },
                                 { 3, 2, 6 }, { 3, 3, 7 },
                                              { 4, 3, 8 }, { 4, 4,  9 },
-                                                          { 5, 5, 10 } } );
+                                                          { 5, 4, 10 } } );
 
    // Check the set elements
    EXPECT_EQ( savedMatrix.getElement( 0, 0 ),  1 );
@@ -1060,9 +1065,6 @@ void test_SaveAndLoad( const char* filename )
    ASSERT_NO_THROW( savedMatrix.save( filename ) );
 
    Matrix loadedMatrix;
-   //typename Matrix::CompressedRowLengthsVector rowLengths2( m_rows );
-   //rowLengths2 = 3;
-   //loadedMatrix.setCompressedRowLengths( rowLengths2 );
 
    ASSERT_NO_THROW( loadedMatrix.load( filename ) );
 
@@ -1071,42 +1073,36 @@ void test_SaveAndLoad( const char* filename )
    EXPECT_EQ( savedMatrix.getElement( 0, 2 ), loadedMatrix.getElement( 0, 2 ) );
    EXPECT_EQ( savedMatrix.getElement( 0, 3 ), loadedMatrix.getElement( 0, 3 ) );
    EXPECT_EQ( savedMatrix.getElement( 0, 4 ), loadedMatrix.getElement( 0, 4 ) );
-   EXPECT_EQ( savedMatrix.getElement( 0, 5 ), loadedMatrix.getElement( 0, 5 ) );
 
    EXPECT_EQ( savedMatrix.getElement( 1, 0 ), loadedMatrix.getElement( 1, 0 ) );
    EXPECT_EQ( savedMatrix.getElement( 1, 1 ), loadedMatrix.getElement( 1, 1 ) );
    EXPECT_EQ( savedMatrix.getElement( 1, 2 ), loadedMatrix.getElement( 1, 2 ) );
    EXPECT_EQ( savedMatrix.getElement( 1, 3 ), loadedMatrix.getElement( 1, 3 ) );
    EXPECT_EQ( savedMatrix.getElement( 1, 4 ), loadedMatrix.getElement( 1, 4 ) );
-   EXPECT_EQ( savedMatrix.getElement( 1, 5 ), loadedMatrix.getElement( 1, 5 ) );
 
    EXPECT_EQ( savedMatrix.getElement( 2, 0 ), loadedMatrix.getElement( 2, 0 ) );
    EXPECT_EQ( savedMatrix.getElement( 2, 1 ), loadedMatrix.getElement( 2, 1 ) );
    EXPECT_EQ( savedMatrix.getElement( 2, 2 ), loadedMatrix.getElement( 2, 2 ) );
    EXPECT_EQ( savedMatrix.getElement( 2, 3 ), loadedMatrix.getElement( 2, 3 ) );
    EXPECT_EQ( savedMatrix.getElement( 2, 4 ), loadedMatrix.getElement( 2, 4 ) );
-   EXPECT_EQ( savedMatrix.getElement( 2, 5 ), loadedMatrix.getElement( 2, 5 ) );
 
    EXPECT_EQ( savedMatrix.getElement( 3, 0 ), loadedMatrix.getElement( 3, 0 ) );
    EXPECT_EQ( savedMatrix.getElement( 3, 1 ), loadedMatrix.getElement( 3, 1 ) );
    EXPECT_EQ( savedMatrix.getElement( 3, 2 ), loadedMatrix.getElement( 3, 2 ) );
    EXPECT_EQ( savedMatrix.getElement( 3, 3 ), loadedMatrix.getElement( 3, 3 ) );
    EXPECT_EQ( savedMatrix.getElement( 3, 4 ), loadedMatrix.getElement( 3, 4 ) );
-   EXPECT_EQ( savedMatrix.getElement( 3, 5 ), loadedMatrix.getElement( 3, 5 ) );
 
    EXPECT_EQ( savedMatrix.getElement( 4, 0 ), loadedMatrix.getElement( 4, 0 ) );
    EXPECT_EQ( savedMatrix.getElement( 4, 1 ), loadedMatrix.getElement( 4, 1 ) );
    EXPECT_EQ( savedMatrix.getElement( 4, 2 ), loadedMatrix.getElement( 4, 2 ) );
    EXPECT_EQ( savedMatrix.getElement( 4, 3 ), loadedMatrix.getElement( 4, 3 ) );
    EXPECT_EQ( savedMatrix.getElement( 4, 4 ), loadedMatrix.getElement( 4, 4 ) );
-   EXPECT_EQ( savedMatrix.getElement( 4, 5 ), loadedMatrix.getElement( 4, 5 ) );
 
    EXPECT_EQ( savedMatrix.getElement( 5, 0 ), loadedMatrix.getElement( 5, 0 ) );
    EXPECT_EQ( savedMatrix.getElement( 5, 1 ), loadedMatrix.getElement( 5, 1 ) );
    EXPECT_EQ( savedMatrix.getElement( 5, 2 ), loadedMatrix.getElement( 5, 2 ) );
    EXPECT_EQ( savedMatrix.getElement( 5, 3 ), loadedMatrix.getElement( 5, 3 ) );
    EXPECT_EQ( savedMatrix.getElement( 5, 4 ), loadedMatrix.getElement( 5, 4 ) );
-   EXPECT_EQ( savedMatrix.getElement( 5, 5 ), loadedMatrix.getElement( 5, 5 ) );
    EXPECT_EQ( std::remove( filename ), 0 );
 }
 
@@ -1130,11 +1126,10 @@ void test_Print()
    const IndexType m_cols = 4;
 
    Matrix m( m_rows, m_cols, {
-      { 0, 0, 4 }, { 0, 1, 1 },
-      { 1, 0, 1 }, { 1, 1, 4 }, { 1, 2, 1 },
-                   { 2, 1, 1 }, { 2, 2, 4 }, { 2, 3, 1 },
-                                { 3, 2, 1 }, { 3, 3, 4 }, { 3, 4, 1 },
-                                             { 4, 3, 1 }, { 4, 4, 4 }
+      { 0, 0, 4 },
+      { 1, 0, 1 }, { 1, 1, 4 },
+                   { 2, 1, 1 }, { 2, 2, 4 },
+                                { 3, 2, 1 }, { 3, 3, 4 }
    } );
 
    std::stringstream printed;
-- 
GitLab


From a4885bf91357a0559d4661b079241b1499572667 Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Wed, 26 Feb 2020 22:19:10 +0100
Subject: [PATCH 153/179] Fixed symmetric sparse matrix to run with CUDA.

---
 src/TNL/Algorithms/AtomicOperations.h         | 59 +++++++++++++------
 src/TNL/Matrices/SparseMatrix.h               |  5 ++
 src/TNL/Matrices/SparseMatrixView.hpp         | 12 ----
 .../Matrices/SymmetricSparseMatrixTest_CSR.h  | 20 +++----
 4 files changed, 56 insertions(+), 40 deletions(-)

diff --git a/src/TNL/Algorithms/AtomicOperations.h b/src/TNL/Algorithms/AtomicOperations.h
index b00260846..4be725d48 100644
--- a/src/TNL/Algorithms/AtomicOperations.h
+++ b/src/TNL/Algorithms/AtomicOperations.h
@@ -12,6 +12,7 @@
 
 #pragma once
 
+#include <cuda.h>
 #include <TNL/Devices/Sequential.h>
 #include <TNL/Devices/Host.h>
 #include <TNL/Devices/Cuda.h>
@@ -41,30 +42,52 @@ struct AtomicOperations< Devices::Cuda >
    static void add( Value& v, const Value& a )
    {
 #ifdef HAVE_CUDA
+      atomicAdd( &v, a );
+#endif // HAVE_CUDA
+   }
+
+#ifdef HAVE_CUDA
+   __device__
+   static void add( double& v, const double& a )
+   {
 #if __CUDA_ARCH__ < 600
-      if( std::is_same< Value, double >::value )
-      {
-         unsigned long long int* v_as_ull = ( unsigned long long int* ) &v;
-         unsigned long long int old = *v_as_ull, assumed;
+      unsigned long long int* v_as_ull = ( unsigned long long int* ) &v;
+      unsigned long long int old = *v_as_ull, assumed;
 
-         do
-         {
-            assumed = old;
-            old = atomicCAS( v_as_ull,
-                             assumed,
-                             __double_as_longlong( s + __longlong_as_double( assumed ) ) ) ;
+      do
+      {
+         assumed = old;
+         old = atomicCAS( v_as_ull,
+                          assumed,
+                          __double_as_longlong( a + __longlong_as_double( assumed ) ) ) ;
 
-         // Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN)
-         }
-         while( assumed != old );
-         return;
+      // Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN)
       }
-#endif
+      while( assumed != old );
+#else // __CUDA_ARCH__ < 600
       atomicAdd( &v, a );
-#endif
+#endif //__CUDA_ARCH__ < 600
+
+#else // HAVE_CUDA
+   static void add( double& v, const double& a ){}
+#endif // HAVE_CUDA
    }
 
+   __cuda_callable__
+   static void add( long int& v, const long int& a )
+   {
+#ifdef HAVE_CUDA
+      TNL_ASSERT_TRUE( false, "Atomic add for long int is not supported on CUDA." );
+#endif // HAVE_CUDA
+   }
+   
+   __cuda_callable__
+   static void add( short int& v, const short int& a )
+   {
+#ifdef HAVE_CUDA
+      TNL_ASSERT_TRUE( false, "Atomic add for short int is not supported on CUDA." );
+#endif // HAVE_CUDA
+   }
 };
-
 } //namespace Algorithms
-} //namespace TNL
\ No newline at end of file
+} //namespace TNL
diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h
index c7f953a8c..d48e7d6ea 100644
--- a/src/TNL/Matrices/SparseMatrix.h
+++ b/src/TNL/Matrices/SparseMatrix.h
@@ -34,6 +34,11 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
       static constexpr bool isSymmetric() { return MatrixType::isSymmetric(); };
       static constexpr bool isBinary() { return MatrixType::isBinary(); };
 
+      static_assert( ! isSymmetric() ||
+               ! std::is_same< Device, Devices::Cuda >::value ||
+               ( ( std::is_same< Real, float >::value || std::is_same< Real, double >::value || std::is_same< Real, int >::value || std::is_same< Real, long long int >::value ),
+              "Given Real type is not supported by atomic operations on GPU which are necessary for symmetric operations." ) );
+
       using RealType = Real;
       template< typename Device_, typename Index_, typename IndexAllocator_ >
       using SegmentsTemplate = Segments< Device_, Index_, IndexAllocator_ >;
diff --git a/src/TNL/Matrices/SparseMatrixView.hpp b/src/TNL/Matrices/SparseMatrixView.hpp
index e07e00fa6..98285e064 100644
--- a/src/TNL/Matrices/SparseMatrixView.hpp
+++ b/src/TNL/Matrices/SparseMatrixView.hpp
@@ -392,14 +392,7 @@ vectorProduct( const InVector& inVector,
          if( isBinary() )
             Algorithms::AtomicOperations< DeviceType >::add( outVectorView[ column ], matrixMultiplicator * inVectorView[ row ] );
          else
-         {
-            //std::cerr << outVectorView << std::endl;
             Algorithms::AtomicOperations< DeviceType >::add( outVectorView[ column ], matrixMultiplicator * valuesView[ globalIdx ] * inVectorView[ row ] );
-            //outVectorView[ column ] += matrixMultiplicator * valuesView[ globalIdx ] * inVectorView[ row ];
-
-            //std::cerr << "Symmetric add to out vector row " << column << " value " << valuesView[ globalIdx ] << " * " << inVectorView[ row ] <<
-            //   " --> " << outVectorView[ column ] << std::endl;
-         }
       }
       if( isBinary() )
          return inVectorView[ column ];
@@ -410,12 +403,7 @@ vectorProduct( const InVector& inVector,
    };
    auto keeper = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable {
       if( isSymmetric() )
-      {
-         //std::cerr << outVectorView << std::endl;
-         //std::cerr << "Adding " << matrixMultiplicator * value << " to result vector " << outVectorView[ row ];
          outVectorView[ row ] += matrixMultiplicator * value;
-         //std::cerr << " ---> " << outVectorView[ row ] << std::endl;
-      }
       else
       {
          if( outVectorMultiplicator == 0.0 )
diff --git a/src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.h b/src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.h
index 45dd5e5b9..f6f7ec95a 100644
--- a/src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.h
+++ b/src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.h
@@ -36,19 +36,19 @@ using MatrixTypes = ::testing::Types
     TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, long,  TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
     TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, long,  TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
     TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, long,  TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >
-#ifdef HAVE_CUDA
-   ,TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
-    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
-    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
-    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
+#ifdef HAVE_CUDA // Commented types are not supported by atomic operations on GPU.
+   ,//TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
+    //TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
+    //TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
+    //TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
     TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, int,   TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
-    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, int,   TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
+    //TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, int,   TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
     TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, int,   TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
     TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, int,   TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
-    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, long,  TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
-    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, long,  TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
-    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, long,  TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
-    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, long,  TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >
+    //TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, long,  TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
+    //TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, long,  TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
+    //TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, long,  TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
+    //TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, long,  TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >
 #endif // HAVE_CUDA
 >;
 
-- 
GitLab


From 57aa3118aa5c915213d4665c93a37e83e33415f2 Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Thu, 27 Feb 2020 10:02:32 +0100
Subject: [PATCH 154/179] Added --with-ci-flags option to build scripts.

---
 .gitlab-ci.yml | 3 ++-
 CMakeLists.txt | 5 +++++
 build          | 4 ++++
 3 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 624a19729..fa4c3725d 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -46,7 +46,7 @@ stages:
         - export CTEST_OUTPUT_ON_FAILURE=1
         - export CTEST_PARALLEL_LEVEL=4
         # enforce (more or less) warning-free builds
-        - export CXXFLAGS="-Werror -Wno-error=deprecated -Wno-error=deprecated-declarations -Wno-error=uninitialized -Wno-error=vla"
+        #- export CXXFLAGS="-Werror -Wno-error=deprecated -Wno-error=deprecated-declarations -Wno-error=uninitialized -Wno-error=vla"
         - mkdir -p "./builddir/$CI_JOB_NAME"
         - pushd "./builddir/$CI_JOB_NAME"
         - cmake ../..
@@ -64,6 +64,7 @@ stages:
                 -DWITH_EXAMPLES=${WITH_EXAMPLES}
                 -DWITH_TOOLS=${WITH_TOOLS}
                 -DWITH_PYTHON=${WITH_PYTHON}
+                -DWITH_CI_FLAGS=yes
         # "install" implies the "all" target
         - ninja ${NINJAFLAGS} install
         - if [[ ${WITH_TESTS} == "yes" ]]; then
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7d1666163..a312b00cb 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -96,6 +96,11 @@ set( CMAKE_SHARED_LINKER_FLAGS "" )
 set( CMAKE_SHARED_LINKER_FLAGS_DEBUG "-rdynamic" )
 set( CMAKE_SHARED_LINKER_FLAGS_RELEASE "" )
 
+if( ${WITH_CI_FLAGS} )
+   # enforce (more or less) warning-free builds
+   set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror -Wno-error=deprecated -Wno-error=deprecated-declarations -Wno-error=uninitialized -Wno-error=vla" )
+endif()
+
 # set additional Debug/Release options using generator expressions
 # (that way we can exclude some options for specific targets, see https://stackoverflow.com/a/59734798 for details)
 add_compile_options(
diff --git a/build b/build
index 9deb12d10..5a26cbb61 100755
--- a/build
+++ b/build
@@ -31,6 +31,7 @@ WITH_EXAMPLES="yes"
 WITH_PYTHON="yes"
 WITH_TOOLS="yes"
 WITH_BENCHMARKS="yes"
+WITH_CI_FLAGS="no"
 
 for option in "$@"
 do
@@ -63,6 +64,7 @@ do
         --with-benchmarks=*              ) WITH_BENCHMARKS="${option#*=}" ;;
         --with-python=*                  ) WITH_PYTHON="${option#*=}" ;;
         --with-cxx-flags=*               ) WITH_CXX_FLAGS="${option#*=}" ;;
+        --with-ci-flags=*                ) WITH_CI_FLAGS="${option#*=}" ;;
         *                                )
            echo "Unknown option ${option}. Use --help for more information."
            exit 1 ;;
@@ -93,6 +95,7 @@ if [[ ${HELP} == "yes" ]]; then
     echo "   --with-python=yes/no                  Compile the Python bindings. 'yes' by default."
     echo "   --with-benchmarks=yes/no              Compile the 'src/Benchmarks' directory. 'yes' by default."
     echo "   --with-cxx-flags=FLAGS                Additional flags for C++ compiler."
+    echo "   --with-cxx-flags=yes/no               Turns on more strict C++ flags for CI. 'no' by default."
     echo "   --cmake=CMAKE                         Path to cmake. 'cmake' by default."
     echo "   --verbose                             It enables verbose build."
     echo "   --root-dir=PATH                       Path to the TNL source code root dir."
@@ -145,6 +148,7 @@ cmake_command=(
          -DWITH_PYTHON=${WITH_PYTHON}
          -DWITH_BENCHMARKS=${WITH_BENCHMARKS}
          -DWITH_CXX_FLAGS=${WITH_CXX_FLAGS}
+         -DWITH_CI_FLAGS=${WITH_CI_FLAGS}
          -DDCMTK_DIR=${DCMTK_DIR}
 )
 
-- 
GitLab


From 135698134ecafff7846e342bf75202c434e54595 Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Thu, 27 Feb 2020 10:56:12 +0100
Subject: [PATCH 155/179] Deleted unused variable in SparseMatrixView.

---
 src/TNL/Matrices/SparseMatrixView.hpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/TNL/Matrices/SparseMatrixView.hpp b/src/TNL/Matrices/SparseMatrixView.hpp
index 98285e064..e0f1e5e0a 100644
--- a/src/TNL/Matrices/SparseMatrixView.hpp
+++ b/src/TNL/Matrices/SparseMatrixView.hpp
@@ -209,7 +209,6 @@ SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
 getRow( const IndexType& rowIdx ) -> RowView
 {
    TNL_ASSERT_LT( rowIdx, this->getRows(), "Row index is larger than number of matrix rows." );
-   typename RowView::SegmentViewType t = this->segments.getSegmentView( rowIdx );
    return RowView( this->segments.getSegmentView( rowIdx ), this->values.getView(), this->columnIndexes.getView() );
 }
 
-- 
GitLab


From ab0c1c08db015d2334ec794db3a41c84d3fc9cae Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Thu, 27 Feb 2020 10:59:25 +0100
Subject: [PATCH 156/179] Added TNL::is_same which works in CUDA device code.

---
 src/TNL/Assert.h                 | 21 ++++++++++++++++++++-
 src/TNL/Containers/Array.hpp     |  8 ++++----
 src/TNL/Containers/ArrayView.hpp |  8 ++++----
 3 files changed, 28 insertions(+), 9 deletions(-)

diff --git a/src/TNL/Assert.h b/src/TNL/Assert.h
index dc4ba7cf4..1d3aa88fe 100644
--- a/src/TNL/Assert.h
+++ b/src/TNL/Assert.h
@@ -124,6 +124,25 @@
 #include <TNL/Debugging/StackBacktrace.h>
 
 namespace TNL {
+
+   // This is alternative implementation of is_same because std::is_same
+   // does not work in CUDA device code ("std::integral_constant<bool, (bool)0> ::value").
+   // This can be removed when std::_is_same works well.
+   //
+   template< typename T1, typename T2 >
+   struct is_same
+   {
+      __cuda_callable__
+      static constexpr bool value() { return false; }
+   };
+
+   template< typename T1 >
+   struct is_same< T1, T1 >
+   {
+      __cuda_callable__
+      static constexpr bool value() { return true; }
+   };
+
 /**
  * \brief Internal namespace for helper classes used in the TNL_ASSERT_* macros.
  */
@@ -394,7 +413,7 @@ TNL_IMPL_CMP_HELPER_( GT, > );
    pred( __TNL_JOIN_STRINGS( val1, op, val2 ), \
          msg, __FILE__, __TNL_PRETTY_FUNCTION, __LINE__, \
          #val1, #val2, val1, val2 )
-   
+
 // Main definitions of the TNL_ASSERT_* macros
 // unary
 #define TNL_ASSERT_TRUE( val, msg ) \
diff --git a/src/TNL/Containers/Array.hpp b/src/TNL/Containers/Array.hpp
index 4dd8d5a2f..40b7d1b45 100644
--- a/src/TNL/Containers/Array.hpp
+++ b/src/TNL/Containers/Array.hpp
@@ -522,9 +522,9 @@ Array< Value, Device, Index, Allocator >::
 operator[]( const Index& i )
 {
 #ifdef __CUDA_ARCH__
-   TNL_ASSERT( ( std::is_same< Device, Devices::Cuda >::value ), "Attempt to access data not allocated on CUDA device from CUDA device." );
+   TNL_ASSERT_TRUE( ( TNL::is_same< Device, Devices::Cuda >::value() ), "Attempt to access data not allocated on CUDA device from CUDA device." );
 #else
-   TNL_ASSERT( ( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Sequential >::value ),
+   TNL_ASSERT_TRUE( ( TNL::is_same< Device, Devices::Host >::value() || TNL::is_same< Device, Devices::Sequential >::value() ),
                "Attempt to access data not allocated on the host from the host." );
 #endif
    TNL_ASSERT_GE( i, (Index) 0, "Element index must be non-negative." );
@@ -542,9 +542,9 @@ Array< Value, Device, Index, Allocator >::
 operator[]( const Index& i ) const
 {
 #ifdef __CUDA_ARCH__
-   TNL_ASSERT( ( std::is_same< Device, Devices::Cuda >::value ), "Attempt to access data not allocated on CUDA device from CUDA device." );
+   TNL_ASSERT_TRUE( ( TNL::is_same< Device, Devices::Cuda >::value() ), "Attempt to access data not allocated on CUDA device from CUDA device." );
 #else
-   TNL_ASSERT( ( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Sequential >::value ),
+   TNL_ASSERT_TRUE( ( TNL::is_same< Device, Devices::Host >::value() || TNL::is_same< Device, Devices::Sequential >::value() ),
                "Attempt to access data not allocated on the host from the host." );
 #endif
    TNL_ASSERT_GE( i, (Index) 0, "Element index must be non-negative." );
diff --git a/src/TNL/Containers/ArrayView.hpp b/src/TNL/Containers/ArrayView.hpp
index 81e143ac2..838ebc32b 100644
--- a/src/TNL/Containers/ArrayView.hpp
+++ b/src/TNL/Containers/ArrayView.hpp
@@ -253,9 +253,9 @@ Value& ArrayView< Value, Device, Index >::
 operator[]( Index i )
 {
 #ifdef __CUDA_ARCH__
-   TNL_ASSERT( ( std::is_same< Device, Devices::Cuda >::value ), "Attempt to access data not allocated on CUDA device from CUDA device." );
+   TNL_ASSERT_TRUE( ( TNL::is_same< Device, Devices::Cuda >::value() ), "Attempt to access data not allocated on CUDA device from CUDA device." );
 #else
-   TNL_ASSERT( ( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Sequential >::value ),
+   TNL_ASSERT_TRUE( ( TNL::is_same< Device, Devices::Host >::value() || TNL::is_same< Device, Devices::Sequential >::value() ),
                "Attempt to access data not allocated on the host from the host." );
 #endif
    TNL_ASSERT_GE( i, 0, "Element index must be non-negative." );
@@ -272,9 +272,9 @@ Value& ArrayView< Value, Device, Index >::
 operator[]( Index i ) const
 {
 #ifdef __CUDA_ARCH__
-   TNL_ASSERT( ( std::is_same< Device, Devices::Cuda >::value ), "Attempt to access data not allocated on CUDA device from CUDA device." );
+   TNL_ASSERT_TRUE( ( TNL::is_same< Device, Devices::Cuda >::value() ), "Attempt to access data not allocated on CUDA device from CUDA device." );
 #else
-   TNL_ASSERT( ( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Sequential >::value ),
+   TNL_ASSERT_TRUE( ( TNL::is_same< Device, Devices::Host >::value() || TNL::is_same< Device, Devices::Sequential >::value() ),
                "Attempt to access data not allocated on the host from the host." );
 #endif
    TNL_ASSERT_GE( i, 0, "Element index must be non-negative." );
-- 
GitLab


From 26ed46e975764f38b86800e1329638dc53bc577e Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Thu, 27 Feb 2020 11:00:13 +0100
Subject: [PATCH 157/179] Deleting CI C++ flags which were moved to the root
 CMakeLists.

---
 .gitlab-ci.yml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index fa4c3725d..de46d4c82 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -45,8 +45,6 @@ stages:
           fi
         - export CTEST_OUTPUT_ON_FAILURE=1
         - export CTEST_PARALLEL_LEVEL=4
-        # enforce (more or less) warning-free builds
-        #- export CXXFLAGS="-Werror -Wno-error=deprecated -Wno-error=deprecated-declarations -Wno-error=uninitialized -Wno-error=vla"
         - mkdir -p "./builddir/$CI_JOB_NAME"
         - pushd "./builddir/$CI_JOB_NAME"
         - cmake ../..
-- 
GitLab


From 02354656efe4f6162a65375ec0e7181f2978e818 Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Thu, 27 Feb 2020 11:46:29 +0100
Subject: [PATCH 158/179] Fixed include of cuda.h in AtomicOperations.

---
 src/TNL/Algorithms/AtomicOperations.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/TNL/Algorithms/AtomicOperations.h b/src/TNL/Algorithms/AtomicOperations.h
index 4be725d48..679b14eb5 100644
--- a/src/TNL/Algorithms/AtomicOperations.h
+++ b/src/TNL/Algorithms/AtomicOperations.h
@@ -12,7 +12,9 @@
 
 #pragma once
 
+#ifdef HAVE_CUDA
 #include <cuda.h>
+#endif
 #include <TNL/Devices/Sequential.h>
 #include <TNL/Devices/Host.h>
 #include <TNL/Devices/Cuda.h>
@@ -80,7 +82,7 @@ struct AtomicOperations< Devices::Cuda >
       TNL_ASSERT_TRUE( false, "Atomic add for long int is not supported on CUDA." );
 #endif // HAVE_CUDA
    }
-   
+
    __cuda_callable__
    static void add( short int& v, const short int& a )
    {
-- 
GitLab


From fe5aca83de4ff47a768896a6dac35cea0ff48eb7 Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Thu, 27 Feb 2020 12:23:18 +0100
Subject: [PATCH 159/179] Added MatrixInfo.

---
 src/TNL/Matrices/MatrixInfo.h | 76 +++++++++++++++++++++++++++++++++++
 1 file changed, 76 insertions(+)
 create mode 100644 src/TNL/Matrices/MatrixInfo.h

diff --git a/src/TNL/Matrices/MatrixInfo.h b/src/TNL/Matrices/MatrixInfo.h
new file mode 100644
index 000000000..34f85dd82
--- /dev/null
+++ b/src/TNL/Matrices/MatrixInfo.h
@@ -0,0 +1,76 @@
+/***************************************************************************
+                          Matrix.h  -  description
+                             -------------------
+    begin                : Dec 18, 2013
+    copyright            : (C) 2013 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/String.h>
+#include <TNL/Matrices/Dense.h>
+#include <TNL/Matrices/DenseView.h>
+#include <TNL/Matrices/SparseMatrix.h>
+#include <TNL/Matrices/SparseMatrixView.h>
+
+namespace TNL {
+/**
+ * \brief Namespace for matrix formats.
+ */
+namespace Matrices {
+
+template< typename Matrix >
+struct MatrixInfo
+{};
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+struct MatrixInfo< DenseView< Real, Device, RowMajorOrder > >
+{
+   static String getDensity() { return String( "dense" ); };
+};
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+struct MatrixInfo< Dense< Real, Device, RowMajorOrder, RealAllocator > >
+: public MatrixInfo< typename Dense< Real, Device, RowMajorOrder, RealAllocator >::ViewType >
+{
+};
+
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename Device_, typename Index_ > class SegmentsView >
+struct MatrixInfo< SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView > >
+{
+   static String getDensity() { return String( "sparse" ); };
+
+   static String getFormat() {
+      if( std::is_same< SegementsView ........ >)
+   };
+};
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename Device_, typename Index_, typename IndexAllocator_ > class Segments,
+          typename RealAllocator,
+          typename IndexAllocator >
+struct MatrixInfo< SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator > >
+:public MatrixInfo< typename SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::ViewType >
+{
+}
+
+} //namespace Matrices
+} //namespace TNL
\ No newline at end of file
-- 
GitLab


From 8f57fd182a486412b5acafd06a235245ac30c045 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Thu, 27 Feb 2020 14:02:56 +0100
Subject: [PATCH 160/179] Fixed syntax error in AtomicOperations.h

---
 src/TNL/Algorithms/AtomicOperations.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/TNL/Algorithms/AtomicOperations.h b/src/TNL/Algorithms/AtomicOperations.h
index 679b14eb5..6b5c5b4e0 100644
--- a/src/TNL/Algorithms/AtomicOperations.h
+++ b/src/TNL/Algorithms/AtomicOperations.h
@@ -69,11 +69,10 @@ struct AtomicOperations< Devices::Cuda >
 #else // __CUDA_ARCH__ < 600
       atomicAdd( &v, a );
 #endif //__CUDA_ARCH__ < 600
-
+   }
 #else // HAVE_CUDA
    static void add( double& v, const double& a ){}
 #endif // HAVE_CUDA
-   }
 
    __cuda_callable__
    static void add( long int& v, const long int& a )
-- 
GitLab


From b06136b9dfd99b592cfd88cdff659b966c54ee7a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Thu, 27 Feb 2020 16:32:37 +0100
Subject: [PATCH 161/179] Added use of MatrixInfo to SpMV benchmark.

---
 src/Benchmarks/SpMV/spmv.h                    |  6 +-
 src/TNL/Containers/Segments/CSR.h             |  2 +
 src/TNL/Containers/Segments/CSR.hpp           | 10 +++
 src/TNL/Containers/Segments/CSRView.h         |  2 +
 src/TNL/Containers/Segments/CSRView.hpp       |  9 +++
 src/TNL/Containers/Segments/Ellpack.h         |  3 +-
 src/TNL/Containers/Segments/Ellpack.hpp       | 12 ++++
 src/TNL/Containers/Segments/EllpackView.h     |  2 +
 src/TNL/Containers/Segments/EllpackView.hpp   | 11 +++
 src/TNL/Containers/Segments/SlicedEllpack.h   |  2 +
 src/TNL/Containers/Segments/SlicedEllpack.hpp | 12 ++++
 .../Containers/Segments/SlicedEllpackView.h   |  2 +
 .../Containers/Segments/SlicedEllpackView.hpp | 11 +++
 src/TNL/Matrices/MatrixInfo.h                 | 69 ++++++++++++++++---
 14 files changed, 139 insertions(+), 14 deletions(-)

diff --git a/src/Benchmarks/SpMV/spmv.h b/src/Benchmarks/SpMV/spmv.h
index e3a1ae047..02a26854d 100644
--- a/src/Benchmarks/SpMV/spmv.h
+++ b/src/Benchmarks/SpMV/spmv.h
@@ -25,6 +25,7 @@
 #include <TNL/Matrices/Legacy/BiEllpack.h>
 
 #include <TNL/Matrices/MatrixReader.h>
+#include <TNL/Matrices/MatrixInfo.h>
 
 #include <TNL/Matrices/SparseMatrix.h>
 #include <TNL/Matrices/MatrixType.h>
@@ -160,7 +161,7 @@ benchmarkSpMV( Benchmark& benchmark,
           { "non-zeros", convertToString( hostMatrix.getNumberOfNonzeroMatrixElements() ) },
           { "rows", convertToString( hostMatrix.getRows() ) },
           { "columns", convertToString( hostMatrix.getColumns() ) },
-          { "matrix format", convertToString( getType( hostMatrix ) ) }
+          { "matrix format", MatrixInfo< HostMatrix >::getFormat() } //convertToString( getType( hostMatrix ) ) }
        } ));
 
     hostVector.setSize( hostMatrix.getColumns() );
@@ -294,7 +295,8 @@ benchmarkSpmvSynthetic( Benchmark& benchmark,
    
    benchmarkSpMV< Real, SlicedEllpackAlias >( benchmark, inputFileName, verboseMR );
    benchmarkSpMV< Real, SparseMatrix_SlicedEllpack >( benchmark, inputFileName, verboseMR );
-   //benchmarkSpMV< Real, Matrices::ChunkedEllpack >( benchmark, inputFileName, verboseMR );
+   benchmarkSpMV< Real, Matrices::ChunkedEllpack >( benchmark, inputFileName, verboseMR );
+   benchmarkSpMV< Real, Matrices::BiEllpack >( benchmark, inputFileName, verboseMR );
 
    ////
    // Segments based sparse matrices
diff --git a/src/TNL/Containers/Segments/CSR.h b/src/TNL/Containers/Segments/CSR.h
index 3645e9f6a..89cad0c6a 100644
--- a/src/TNL/Containers/Segments/CSR.h
+++ b/src/TNL/Containers/Segments/CSR.h
@@ -47,6 +47,8 @@ class CSR
 
       static String getSerializationType();
 
+      static String getSegmentsType();
+
       /**
        * \brief Set sizes of particular segments.
        */
diff --git a/src/TNL/Containers/Segments/CSR.hpp b/src/TNL/Containers/Segments/CSR.hpp
index 55dcba74c..9a948b04e 100644
--- a/src/TNL/Containers/Segments/CSR.hpp
+++ b/src/TNL/Containers/Segments/CSR.hpp
@@ -64,6 +64,16 @@ getSerializationType()
    return "CSR< [any_device], " + TNL::getSerializationType< IndexType >() + " >";
 }
 
+template< typename Device,
+          typename Index,
+          typename IndexAllocator >
+String
+CSR< Device, Index, IndexAllocator >::
+getSegmentsType()
+{
+   return ViewType::getSegmentsType();
+}
+
 template< typename Device,
           typename Index,
           typename IndexAllocator >
diff --git a/src/TNL/Containers/Segments/CSRView.h b/src/TNL/Containers/Segments/CSRView.h
index 759fe8ff7..f7cf815d0 100644
--- a/src/TNL/Containers/Segments/CSRView.h
+++ b/src/TNL/Containers/Segments/CSRView.h
@@ -52,6 +52,8 @@ class CSRView
 
       static String getSerializationType();
 
+      static String getSegmentsType();
+
       __cuda_callable__
       ViewType getView();
 
diff --git a/src/TNL/Containers/Segments/CSRView.hpp b/src/TNL/Containers/Segments/CSRView.hpp
index 043e06e04..fab5c6da7 100644
--- a/src/TNL/Containers/Segments/CSRView.hpp
+++ b/src/TNL/Containers/Segments/CSRView.hpp
@@ -64,6 +64,15 @@ getSerializationType()
    return "CSR< [any_device], " + TNL::getSerializationType< IndexType >() + " >";
 }
 
+template< typename Device,
+          typename Index >
+String
+CSRView< Device, Index >::
+getSegmentsType()
+{
+   return "CSR";
+}
+
 template< typename Device,
           typename Index >
 __cuda_callable__
diff --git a/src/TNL/Containers/Segments/Ellpack.h b/src/TNL/Containers/Segments/Ellpack.h
index 6edacb1cf..a1188a854 100644
--- a/src/TNL/Containers/Segments/Ellpack.h
+++ b/src/TNL/Containers/Segments/Ellpack.h
@@ -39,7 +39,6 @@ class Ellpack
       //using ConstViewType = EllpackView< Device, std::add_const_t< Index >, RowMajorOrder, Alignment >;
       using SegmentViewType = SegmentView< IndexType, RowMajorOrder >;
 
-
       Ellpack();
 
       Ellpack( const SegmentsSizes& sizes );
@@ -52,6 +51,8 @@ class Ellpack
 
       static String getSerializationType();
 
+      static String getSegmentsType();
+
       ViewType getView();
 
       //ConstViewType getConstView() const;
diff --git a/src/TNL/Containers/Segments/Ellpack.hpp b/src/TNL/Containers/Segments/Ellpack.hpp
index 663a65bc8..9c59c5529 100644
--- a/src/TNL/Containers/Segments/Ellpack.hpp
+++ b/src/TNL/Containers/Segments/Ellpack.hpp
@@ -88,6 +88,18 @@ getSerializationType()
    return "Ellpack< [any_device], " + TNL::getSerializationType< IndexType >() + " >";
 }
 
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder,
+          int Alignment >
+String
+Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >::
+getSegmentsType()
+{
+   return ViewType::getSegmentsType();
+}
+
 template< typename Device,
           typename Index,
           typename IndexAllocator,
diff --git a/src/TNL/Containers/Segments/EllpackView.h b/src/TNL/Containers/Segments/EllpackView.h
index dcbc56d1b..10a89bd7b 100644
--- a/src/TNL/Containers/Segments/EllpackView.h
+++ b/src/TNL/Containers/Segments/EllpackView.h
@@ -54,6 +54,8 @@ class EllpackView
 
       static String getSerializationType();
 
+      static String getSegmentsType();
+
       __cuda_callable__
       ViewType getView();
 
diff --git a/src/TNL/Containers/Segments/EllpackView.hpp b/src/TNL/Containers/Segments/EllpackView.hpp
index ea2dc0d21..84086f380 100644
--- a/src/TNL/Containers/Segments/EllpackView.hpp
+++ b/src/TNL/Containers/Segments/EllpackView.hpp
@@ -74,6 +74,17 @@ getSerializationType()
    return "Ellpack< [any_device], " + TNL::getSerializationType< IndexType >() + " >";
 }
 
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int Alignment >
+String
+EllpackView< Device, Index, RowMajorOrder, Alignment >::
+getSegmentsType()
+{
+   return "Ellpack";
+}
+
 template< typename Device,
           typename Index,
           bool RowMajorOrder,
diff --git a/src/TNL/Containers/Segments/SlicedEllpack.h b/src/TNL/Containers/Segments/SlicedEllpack.h
index e1cdfa1d4..2027f1d78 100644
--- a/src/TNL/Containers/Segments/SlicedEllpack.h
+++ b/src/TNL/Containers/Segments/SlicedEllpack.h
@@ -49,6 +49,8 @@ class SlicedEllpack
 
       static String getSerializationType();
 
+      static String getSegmentsType();
+
       ViewType getView();
 
       ConstViewType getConstView() const;
diff --git a/src/TNL/Containers/Segments/SlicedEllpack.hpp b/src/TNL/Containers/Segments/SlicedEllpack.hpp
index 3d3a6d8c3..9ba1276e3 100644
--- a/src/TNL/Containers/Segments/SlicedEllpack.hpp
+++ b/src/TNL/Containers/Segments/SlicedEllpack.hpp
@@ -81,6 +81,18 @@ getSerializationType()
    return "SlicedEllpack< [any_device], " + TNL::getSerializationType< IndexType >() + " >";
 }
 
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder,
+          int SliceSize >
+String
+SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >::
+getSegmentsType()
+{
+   return ViewType::getSegmentsType();
+}
+
 template< typename Device,
           typename Index,
           typename IndexAllocator,
diff --git a/src/TNL/Containers/Segments/SlicedEllpackView.h b/src/TNL/Containers/Segments/SlicedEllpackView.h
index 23001553c..6e2e55bbc 100644
--- a/src/TNL/Containers/Segments/SlicedEllpackView.h
+++ b/src/TNL/Containers/Segments/SlicedEllpackView.h
@@ -56,6 +56,8 @@ class SlicedEllpackView
 
       static String getSerializationType();
 
+      static String getSegmentsType();
+
       __cuda_callable__
       ViewType getView();
 
diff --git a/src/TNL/Containers/Segments/SlicedEllpackView.hpp b/src/TNL/Containers/Segments/SlicedEllpackView.hpp
index 3e3c8c09c..f9e252fd9 100644
--- a/src/TNL/Containers/Segments/SlicedEllpackView.hpp
+++ b/src/TNL/Containers/Segments/SlicedEllpackView.hpp
@@ -85,6 +85,17 @@ getSerializationType()
    return "SlicedEllpack< [any_device], " + TNL::getSerializationType< IndexType >() + " >";
 }
 
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int SliceSize >
+String
+SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >::
+getSegmentsType()
+{
+   return "SlicedEllpack";
+}
+
 template< typename Device,
           typename Index,
           bool RowMajorOrder,
diff --git a/src/TNL/Matrices/MatrixInfo.h b/src/TNL/Matrices/MatrixInfo.h
index 34f85dd82..75cac4055 100644
--- a/src/TNL/Matrices/MatrixInfo.h
+++ b/src/TNL/Matrices/MatrixInfo.h
@@ -12,9 +12,17 @@
 
 #include <TNL/String.h>
 #include <TNL/Matrices/Dense.h>
-#include <TNL/Matrices/DenseView.h>
+#include <TNL/Matrices/DenseMatrixView.h>
 #include <TNL/Matrices/SparseMatrix.h>
 #include <TNL/Matrices/SparseMatrixView.h>
+#include <TNL/Containers/Segments/CSRView.h>
+#include <TNL/Containers/Segments/EllpackView.h>
+#include <TNL/Containers/Segments/SlicedEllpackView.h>
+#include <TNL/Matrices/Legacy/CSR.h>
+#include <TNL/Matrices/Legacy/Ellpack.h>
+#include <TNL/Matrices/Legacy/SlicedEllpack.h>
+#include <TNL/Matrices/Legacy/ChunkedEllpack.h>
+#include <TNL/Matrices/Legacy/BiEllpack.h>
 
 namespace TNL {
 /**
@@ -30,7 +38,7 @@ template< typename Real,
           typename Device,
           typename Index,
           bool RowMajorOrder >
-struct MatrixInfo< DenseView< Real, Device, RowMajorOrder > >
+struct MatrixInfo< DenseMatrixView< Real, Device, Index, RowMajorOrder > >
 {
    static String getDensity() { return String( "dense" ); };
 };
@@ -40,12 +48,11 @@ template< typename Real,
           typename Index,
           bool RowMajorOrder,
           typename RealAllocator >
-struct MatrixInfo< Dense< Real, Device, RowMajorOrder, RealAllocator > >
-: public MatrixInfo< typename Dense< Real, Device, RowMajorOrder, RealAllocator >::ViewType >
+struct MatrixInfo< Dense< Real, Device, Index, RowMajorOrder, RealAllocator > >
+: public MatrixInfo< typename Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::ViewType >
 {
 };
 
-
 template< typename Real,
           typename Device,
           typename Index,
@@ -55,9 +62,7 @@ struct MatrixInfo< SparseMatrixView< Real, Device, Index, MatrixType, SegmentsVi
 {
    static String getDensity() { return String( "sparse" ); };
 
-   static String getFormat() {
-      if( std::is_same< SegementsView ........ >)
-   };
+   static String getFormat() { return SegmentsView< Device, Index >::getSegmentsType(); };
 };
 
 template< typename Real,
@@ -68,9 +73,51 @@ template< typename Real,
           typename RealAllocator,
           typename IndexAllocator >
 struct MatrixInfo< SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator > >
-:public MatrixInfo< typename SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::ViewType >
+: public MatrixInfo< typename SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::ViewType >
+{
+};
+
+/////
+// Legacy matrices
+template< typename Real, typename Device, typename Index >
+struct MatrixInfo< BiEllpack< Real, Device, Index > >
+{
+   static String getDensity() { return String( "sparse" ); };
+
+   static String getFormat() { return "BiEllpack Legacy"; };
+};
+
+template< typename Real, typename Device, typename Index >
+struct MatrixInfo< CSR< Real, Device, Index > >
+{
+   static String getDensity() { return String( "sparse" ); };
+
+   static String getFormat() { return "CSR Legacy"; };
+};
+
+template< typename Real, typename Device, typename Index >
+struct MatrixInfo< ChunkedEllpack< Real, Device, Index > >
 {
-}
+   static String getDensity() { return String( "sparse" ); };
+
+   static String getFormat() { return "ChunkedEllpack Legacy"; };
+};
+
+template< typename Real, typename Device, typename Index >
+struct MatrixInfo< Ellpack< Real, Device, Index > >
+{
+   static String getDensity() { return String( "sparse" ); };
+
+   static String getFormat() { return "Ellpack Legacy"; };
+};
+
+template< typename Real, typename Device, typename Index, int SliceSize >
+struct MatrixInfo< SlicedEllpack< Real, Device, Index, SliceSize> >
+{
+   static String getDensity() { return String( "sparse" ); };
+
+   static String getFormat() { return "SlicedEllpack Legacy"; };
+};
 
 } //namespace Matrices
-} //namespace TNL
\ No newline at end of file
+} //namespace TNL
-- 
GitLab


From 3cde4e81047d78453c5fa6463d43d7241e9eba42 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Thu, 27 Feb 2020 20:46:57 +0100
Subject: [PATCH 162/179] Fixed MatrixReader after rebase.

---
 src/TNL/Matrices/MatrixReader_impl.h | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/src/TNL/Matrices/MatrixReader_impl.h b/src/TNL/Matrices/MatrixReader_impl.h
index a80d00283..df2c05c63 100644
--- a/src/TNL/Matrices/MatrixReader_impl.h
+++ b/src/TNL/Matrices/MatrixReader_impl.h
@@ -60,12 +60,6 @@ void MatrixReader< Matrix >::readMtxFileHostMatrix( std::istream& file,
    matrix.setDimensions( rows, columns );
    rowLengths.setSize( rows );
 
-   if( ! computeCompressedRowLengthsFromMtxFile( file, rowLengths, columns, rows, symmetricMatrix, verbose ) )
-   {
-      std::cerr << "Unable to compute compressed row lengths." << std::endl;
-      return false;
-   }
-
    computeCompressedRowLengthsFromMtxFile( file, rowLengths, columns, rows, symmetricMatrix, verbose );
 
    matrix.setCompressedRowLengths( rowLengths );
-- 
GitLab


From b69b69ab624a5bd0014d03b44f513541c60ec0d2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Sun, 1 Mar 2020 07:50:07 +0100
Subject: [PATCH 163/179] Removed --with-cxx-flags from the build script

If necessary, custom flags can be specified by simply exporting the
CXXFLAGS environment variable in the shell.
---
 CMakeLists.txt | 2 +-
 build          | 4 ----
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index a312b00cb..ea0d8a30b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -82,7 +82,7 @@ set( CMAKE_CXX_STANDARD_REQUIRED ON )
 set( CMAKE_CXX_EXTENSIONS OFF )
 
 # set default build options
-set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${WITH_CXX_FLAGS} -pthread -Wall -Wno-unused-local-typedefs -Wno-unused-variable -Wno-unknown-pragmas" )
+set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -Wall -Wno-unused-local-typedefs -Wno-unused-variable -Wno-unknown-pragmas" )
 set( CMAKE_CXX_FLAGS_DEBUG "-g" )
 set( CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG" )
 # pass -rdynamic only in Debug mode
diff --git a/build b/build
index 5a26cbb61..ee74fa87b 100755
--- a/build
+++ b/build
@@ -63,7 +63,6 @@ do
         --with-tools=*                   ) WITH_TOOLS="${option#*=}" ;;
         --with-benchmarks=*              ) WITH_BENCHMARKS="${option#*=}" ;;
         --with-python=*                  ) WITH_PYTHON="${option#*=}" ;;
-        --with-cxx-flags=*               ) WITH_CXX_FLAGS="${option#*=}" ;;
         --with-ci-flags=*                ) WITH_CI_FLAGS="${option#*=}" ;;
         *                                )
            echo "Unknown option ${option}. Use --help for more information."
@@ -94,8 +93,6 @@ if [[ ${HELP} == "yes" ]]; then
     echo "   --with-tools=yes/no                   Compile the 'src/Tools' directory. 'yes' by default."
     echo "   --with-python=yes/no                  Compile the Python bindings. 'yes' by default."
     echo "   --with-benchmarks=yes/no              Compile the 'src/Benchmarks' directory. 'yes' by default."
-    echo "   --with-cxx-flags=FLAGS                Additional flags for C++ compiler."
-    echo "   --with-cxx-flags=yes/no               Turns on more strict C++ flags for CI. 'no' by default."
     echo "   --cmake=CMAKE                         Path to cmake. 'cmake' by default."
     echo "   --verbose                             It enables verbose build."
     echo "   --root-dir=PATH                       Path to the TNL source code root dir."
@@ -147,7 +144,6 @@ cmake_command=(
          -DWITH_TOOLS=${WITH_TOOLS}
          -DWITH_PYTHON=${WITH_PYTHON}
          -DWITH_BENCHMARKS=${WITH_BENCHMARKS}
-         -DWITH_CXX_FLAGS=${WITH_CXX_FLAGS}
          -DWITH_CI_FLAGS=${WITH_CI_FLAGS}
          -DDCMTK_DIR=${DCMTK_DIR}
 )
-- 
GitLab


From 603b5edf83f3bb36b35d23d77e29b84b540243a2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Sun, 1 Mar 2020 08:05:26 +0100
Subject: [PATCH 164/179] Removed useless include of SlicedEllpack from
 ODESolvers benchmark

---
 src/Benchmarks/ODESolvers/tnl-benchmark-ode-solvers.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/Benchmarks/ODESolvers/tnl-benchmark-ode-solvers.h b/src/Benchmarks/ODESolvers/tnl-benchmark-ode-solvers.h
index 1e4bc380e..dad2cdd8d 100644
--- a/src/Benchmarks/ODESolvers/tnl-benchmark-ode-solvers.h
+++ b/src/Benchmarks/ODESolvers/tnl-benchmark-ode-solvers.h
@@ -36,8 +36,6 @@
 #include "Euler.h"
 #include "Merson.h"
 
-#include <TNL/Matrices/Legacy/SlicedEllpack.h>
-
 using namespace TNL;
 using namespace TNL::Benchmarks;
 using namespace TNL::Pointers;
-- 
GitLab


From 2d93d5680f8ca256862a038277c74763afb81d1d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Sun, 1 Mar 2020 08:46:15 +0100
Subject: [PATCH 165/179] Removed TNL::is_same and tweaked asserts in Array and
 ArrayView

---
 src/TNL/Assert.h                 | 19 -------------------
 src/TNL/Containers/Array.h       |  4 ++--
 src/TNL/Containers/Array.hpp     | 12 +++++-------
 src/TNL/Containers/ArrayView.h   |  4 ++--
 src/TNL/Containers/ArrayView.hpp | 10 ++++------
 5 files changed, 13 insertions(+), 36 deletions(-)

diff --git a/src/TNL/Assert.h b/src/TNL/Assert.h
index 1d3aa88fe..630abd09f 100644
--- a/src/TNL/Assert.h
+++ b/src/TNL/Assert.h
@@ -124,25 +124,6 @@
 #include <TNL/Debugging/StackBacktrace.h>
 
 namespace TNL {
-
-   // This is alternative implementation of is_same because std::is_same
-   // does not work in CUDA device code ("std::integral_constant<bool, (bool)0> ::value").
-   // This can be removed when std::_is_same works well.
-   //
-   template< typename T1, typename T2 >
-   struct is_same
-   {
-      __cuda_callable__
-      static constexpr bool value() { return false; }
-   };
-
-   template< typename T1 >
-   struct is_same< T1, T1 >
-   {
-      __cuda_callable__
-      static constexpr bool value() { return true; }
-   };
-
 /**
  * \brief Internal namespace for helper classes used in the TNL_ASSERT_* macros.
  */
diff --git a/src/TNL/Containers/Array.h b/src/TNL/Containers/Array.h
index bf69f4888..116624511 100644
--- a/src/TNL/Containers/Array.h
+++ b/src/TNL/Containers/Array.h
@@ -457,7 +457,7 @@ class Array
        * host, and if the array was allocated in the device memory, it can be
        * called only from device kernels. If NDEBUG is not defined, assertions
        * inside this methods performs runtime checks for cross-device memory
-       * accesses which lead to segmentation fault. If you need to do just a 
+       * accesses which lead to segmentation fault. If you need to do just a
        * pointer arithmetics use \e getData instead.
        *
        * \param i The index of the element to be accessed.
@@ -474,7 +474,7 @@ class Array
        * host, and if the array was allocated in the device memory, it can be
        * called only from device kernels. If NDEBUG is not defined, assertions
        * inside this methods performs runtime checks for cross-device memory
-       * accesses which lead to segmentation fault. If you need to do just a 
+       * accesses which lead to segmentation fault. If you need to do just a
        * pointer arithmetics use \e getData instead.
        *
        * \param i The index of the element to be accessed.
diff --git a/src/TNL/Containers/Array.hpp b/src/TNL/Containers/Array.hpp
index 40b7d1b45..ab81db7aa 100644
--- a/src/TNL/Containers/Array.hpp
+++ b/src/TNL/Containers/Array.hpp
@@ -71,7 +71,7 @@ Array( const IndexType& size, const Value& value, const AllocatorType& allocator
 : allocator( allocator )
 {
    this->setSize( size );
-   ( *this ) = value;
+   *this = value;
 }
 
 template< typename Value,
@@ -522,10 +522,9 @@ Array< Value, Device, Index, Allocator >::
 operator[]( const Index& i )
 {
 #ifdef __CUDA_ARCH__
-   TNL_ASSERT_TRUE( ( TNL::is_same< Device, Devices::Cuda >::value() ), "Attempt to access data not allocated on CUDA device from CUDA device." );
+   TNL_ASSERT_TRUE( (std::is_same< Device, Devices::Cuda >{}()), "Attempt to access data not allocated on CUDA device from CUDA device." );
 #else
-   TNL_ASSERT_TRUE( ( TNL::is_same< Device, Devices::Host >::value() || TNL::is_same< Device, Devices::Sequential >::value() ),
-               "Attempt to access data not allocated on the host from the host." );
+   TNL_ASSERT_FALSE( (std::is_same< Device, Devices::Cuda >{}()), "Attempt to access data not allocated on the host from the host." );
 #endif
    TNL_ASSERT_GE( i, (Index) 0, "Element index must be non-negative." );
    TNL_ASSERT_LT( i, this->getSize(), "Element index is out of bounds." );
@@ -542,10 +541,9 @@ Array< Value, Device, Index, Allocator >::
 operator[]( const Index& i ) const
 {
 #ifdef __CUDA_ARCH__
-   TNL_ASSERT_TRUE( ( TNL::is_same< Device, Devices::Cuda >::value() ), "Attempt to access data not allocated on CUDA device from CUDA device." );
+   TNL_ASSERT_TRUE( (std::is_same< Device, Devices::Cuda >{}()), "Attempt to access data not allocated on CUDA device from CUDA device." );
 #else
-   TNL_ASSERT_TRUE( ( TNL::is_same< Device, Devices::Host >::value() || TNL::is_same< Device, Devices::Sequential >::value() ),
-               "Attempt to access data not allocated on the host from the host." );
+   TNL_ASSERT_FALSE( (std::is_same< Device, Devices::Cuda >{}()), "Attempt to access data not allocated on the host from the host." );
 #endif
    TNL_ASSERT_GE( i, (Index) 0, "Element index must be non-negative." );
    TNL_ASSERT_LT( i, this->getSize(), "Element index is out of bounds." );
diff --git a/src/TNL/Containers/ArrayView.h b/src/TNL/Containers/ArrayView.h
index af54aef8a..c29e00741 100644
--- a/src/TNL/Containers/ArrayView.h
+++ b/src/TNL/Containers/ArrayView.h
@@ -349,7 +349,7 @@ public:
     * host, and if the data was allocated in the device memory, it can be
     * called only from device kernels. If NDEBUG is not defined, assertions
     * inside this methods performs runtime checks for cross-device memory
-    * accesses which lead to segmentation fault. If you need to do just a 
+    * accesses which lead to segmentation fault. If you need to do just a
     * pointer arithmetics use \e getData instead.
     *
     * \param i The index of the element to be accessed.
@@ -367,7 +367,7 @@ public:
     * host, and if the data was allocated in the device memory, it can be
     * called only from device kernels. If NDEBUG is not defined, assertions
     * inside this methods performs runtime checks for cross-device memory
-    * accesses which lead to segmentation fault. If you need to do just a 
+    * accesses which lead to segmentation fault. If you need to do just a
     * pointer arithmetics use \e getData instead.
     *
     * \param i The index of the element to be accessed.
diff --git a/src/TNL/Containers/ArrayView.hpp b/src/TNL/Containers/ArrayView.hpp
index 838ebc32b..0562b81db 100644
--- a/src/TNL/Containers/ArrayView.hpp
+++ b/src/TNL/Containers/ArrayView.hpp
@@ -253,10 +253,9 @@ Value& ArrayView< Value, Device, Index >::
 operator[]( Index i )
 {
 #ifdef __CUDA_ARCH__
-   TNL_ASSERT_TRUE( ( TNL::is_same< Device, Devices::Cuda >::value() ), "Attempt to access data not allocated on CUDA device from CUDA device." );
+   TNL_ASSERT_TRUE( (std::is_same< Device, Devices::Cuda >{}()), "Attempt to access data not allocated on CUDA device from CUDA device." );
 #else
-   TNL_ASSERT_TRUE( ( TNL::is_same< Device, Devices::Host >::value() || TNL::is_same< Device, Devices::Sequential >::value() ),
-               "Attempt to access data not allocated on the host from the host." );
+   TNL_ASSERT_FALSE( (std::is_same< Device, Devices::Cuda >{}()), "Attempt to access data not allocated on the host from the host." );
 #endif
    TNL_ASSERT_GE( i, 0, "Element index must be non-negative." );
    TNL_ASSERT_LT( i, this->getSize(), "Element index is out of bounds." );
@@ -272,10 +271,9 @@ Value& ArrayView< Value, Device, Index >::
 operator[]( Index i ) const
 {
 #ifdef __CUDA_ARCH__
-   TNL_ASSERT_TRUE( ( TNL::is_same< Device, Devices::Cuda >::value() ), "Attempt to access data not allocated on CUDA device from CUDA device." );
+   TNL_ASSERT_TRUE( (std::is_same< Device, Devices::Cuda >{}()), "Attempt to access data not allocated on CUDA device from CUDA device." );
 #else
-   TNL_ASSERT_TRUE( ( TNL::is_same< Device, Devices::Host >::value() || TNL::is_same< Device, Devices::Sequential >::value() ),
-               "Attempt to access data not allocated on the host from the host." );
+   TNL_ASSERT_FALSE( (std::is_same< Device, Devices::Cuda >{}()), "Attempt to access data not allocated on the host from the host." );
 #endif
    TNL_ASSERT_GE( i, 0, "Element index must be non-negative." );
    TNL_ASSERT_LT( i, this->getSize(), "Element index is out of bounds." );
-- 
GitLab


From 79c1e84193c78387d100632a023e0e381167ebff Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Sun, 1 Mar 2020 08:50:45 +0100
Subject: [PATCH 166/179] Removed method ArrayView::copy

Shallow copy is equivalent to binding, for which there is ArrayView::bind.
---
 src/TNL/Containers/ArrayView.h                    |  9 ---------
 src/TNL/Containers/ArrayView.hpp                  | 13 -------------
 src/TNL/Containers/Segments/CSRView.hpp           |  2 +-
 src/TNL/Containers/Segments/SlicedEllpackView.hpp |  4 ++--
 src/TNL/Matrices/MatrixView.hpp                   |  2 +-
 src/TNL/Matrices/MultidiagonalMatrixView.hpp      |  4 ++--
 src/TNL/Matrices/SparseMatrixView.hpp             |  2 +-
 7 files changed, 7 insertions(+), 29 deletions(-)

diff --git a/src/TNL/Containers/ArrayView.h b/src/TNL/Containers/ArrayView.h
index c29e00741..5b9766ffd 100644
--- a/src/TNL/Containers/ArrayView.h
+++ b/src/TNL/Containers/ArrayView.h
@@ -237,15 +237,6 @@ public:
              typename = std::enable_if_t< std::is_convertible< T, ValueType >::value || IsArrayType< T >::value > >
    ArrayView& operator=( const T& array );
 
-   /**
-    * \brief Makes shallow copy of the array view.
-    * 
-    * \param view Reference to the source array view.
-    * \return Reference to this array view.
-    */
-   __cuda_callable__
-   ArrayView& copy( const ArrayView& view );
-
    /**
     * \brief Swaps this array view with another.
     *
diff --git a/src/TNL/Containers/ArrayView.hpp b/src/TNL/Containers/ArrayView.hpp
index 0562b81db..e36182cd5 100644
--- a/src/TNL/Containers/ArrayView.hpp
+++ b/src/TNL/Containers/ArrayView.hpp
@@ -118,19 +118,6 @@ operator=( const T& data )
    return *this;
 }
 
-template< typename Value,
-           typename Device,
-           typename Index >
-__cuda_callable__
-ArrayView< Value, Device, Index >&
-ArrayView< Value, Device, Index >::
-copy( const ArrayView& view )
-{
-   data = view.data;
-   size = view.size;
-   return *this;
-}
-
 template< typename Value,
           typename Device,
           typename Index >
diff --git a/src/TNL/Containers/Segments/CSRView.hpp b/src/TNL/Containers/Segments/CSRView.hpp
index fab5c6da7..02be7f099 100644
--- a/src/TNL/Containers/Segments/CSRView.hpp
+++ b/src/TNL/Containers/Segments/CSRView.hpp
@@ -237,7 +237,7 @@ CSRView< Device, Index >&
 CSRView< Device, Index >::
 operator=( const CSRView& view )
 {
-   this->offsets.copy( view.offsets );
+   this->offsets.bind( view.offsets );
    return *this;
 }
 
diff --git a/src/TNL/Containers/Segments/SlicedEllpackView.hpp b/src/TNL/Containers/Segments/SlicedEllpackView.hpp
index f9e252fd9..c4e03aada 100644
--- a/src/TNL/Containers/Segments/SlicedEllpackView.hpp
+++ b/src/TNL/Containers/Segments/SlicedEllpackView.hpp
@@ -368,8 +368,8 @@ operator=( const SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >& v
    this->size = view.size;
    this->alignedSize = view.alignedSize;
    this->segmentsCount = view.segmentsCount;
-   this->sliceOffsets.copy( view.sliceOffsets );
-   this->sliceSegmentSizes.copy( view.sliceSegmentSizes );
+   this->sliceOffsets.bind( view.sliceOffsets );
+   this->sliceSegmentSizes.bind( view.sliceSegmentSizes );
    return *this;
 }
 
diff --git a/src/TNL/Matrices/MatrixView.hpp b/src/TNL/Matrices/MatrixView.hpp
index 363fec208..9fd73e519 100644
--- a/src/TNL/Matrices/MatrixView.hpp
+++ b/src/TNL/Matrices/MatrixView.hpp
@@ -138,7 +138,7 @@ operator=( const MatrixView& view )
 {
    rows = view.rows;
    columns = view.columns;
-   values.copy( view.values );
+   values.bind( view.values );
    return *this;
 }
 
diff --git a/src/TNL/Matrices/MultidiagonalMatrixView.hpp b/src/TNL/Matrices/MultidiagonalMatrixView.hpp
index 2bd5392df..ecfe1c1d8 100644
--- a/src/TNL/Matrices/MultidiagonalMatrixView.hpp
+++ b/src/TNL/Matrices/MultidiagonalMatrixView.hpp
@@ -335,8 +335,8 @@ MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
 operator=( const MultidiagonalMatrixView& view )
 {
    MatrixView< Real, Device, Index >::operator=( view );
-   this->diagonalsShifts.copy( view.diagonalsShifts );
-   this->hostDiagonalsShifts.copy( view.hostDiagonalsShifts );
+   this->diagonalsShifts.bind( view.diagonalsShifts );
+   this->hostDiagonalsShifts.bind( view.hostDiagonalsShifts );
    this->indexer = view.indexer;
    return *this;
 }
diff --git a/src/TNL/Matrices/SparseMatrixView.hpp b/src/TNL/Matrices/SparseMatrixView.hpp
index e0f1e5e0a..2bae61f98 100644
--- a/src/TNL/Matrices/SparseMatrixView.hpp
+++ b/src/TNL/Matrices/SparseMatrixView.hpp
@@ -606,7 +606,7 @@ SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
 operator=( const SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >& matrix )
 {
    MatrixView< Real, Device, Index >::operator=( matrix );
-   this->columnIndexes.copy( matrix.columnIndexes );
+   this->columnIndexes.bind( matrix.columnIndexes );
    this->segments = matrix.segments;
    return *this;
 }
-- 
GitLab


From ae8ee53ea2af54ecbb9f79f666da778a70c2dc68 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Sun, 1 Mar 2020 09:06:36 +0100
Subject: [PATCH 167/179] Fixed expression in static_assert in SparseMatrix.h

---
 src/TNL/Matrices/SparseMatrix.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h
index d48e7d6ea..032767518 100644
--- a/src/TNL/Matrices/SparseMatrix.h
+++ b/src/TNL/Matrices/SparseMatrix.h
@@ -34,10 +34,11 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
       static constexpr bool isSymmetric() { return MatrixType::isSymmetric(); };
       static constexpr bool isBinary() { return MatrixType::isBinary(); };
 
-      static_assert( ! isSymmetric() ||
-               ! std::is_same< Device, Devices::Cuda >::value ||
-               ( ( std::is_same< Real, float >::value || std::is_same< Real, double >::value || std::is_same< Real, int >::value || std::is_same< Real, long long int >::value ),
-              "Given Real type is not supported by atomic operations on GPU which are necessary for symmetric operations." ) );
+      static_assert(
+            ! isSymmetric() ||
+            ! std::is_same< Device, Devices::Cuda >::value ||
+            ( std::is_same< Real, float >::value || std::is_same< Real, double >::value || std::is_same< Real, int >::value || std::is_same< Real, long long int >::value ),
+            "Given Real type is not supported by atomic operations on GPU which are necessary for symmetric operations." );
 
       using RealType = Real;
       template< typename Device_, typename Index_, typename IndexAllocator_ >
-- 
GitLab


From c22a969b8d9a4d7c30d82c13b86bfc4e987ce131 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Sun, 1 Mar 2020 09:30:23 +0100
Subject: [PATCH 168/179] Removed unnecessary/duplicate types from the
 instantiations of SegmentsTest

---
 src/UnitTests/Containers/Segments/SegmentsTest_CSR.h   | 10 +---------
 .../Containers/Segments/SegmentsTest_Ellpack.h         | 10 +---------
 .../Containers/Segments/SegmentsTest_SlicedEllpack.h   | 10 +---------
 3 files changed, 3 insertions(+), 27 deletions(-)

diff --git a/src/UnitTests/Containers/Segments/SegmentsTest_CSR.h b/src/UnitTests/Containers/Segments/SegmentsTest_CSR.h
index 81d4e9ff3..f2a3a1863 100644
--- a/src/UnitTests/Containers/Segments/SegmentsTest_CSR.h
+++ b/src/UnitTests/Containers/Segments/SegmentsTest_CSR.h
@@ -27,19 +27,11 @@ protected:
 // types for which MatrixTest is instantiated
 using CSRSegmentsTypes = ::testing::Types
 <
-    TNL::Containers::Segments::CSR< TNL::Devices::Host, int    >,
-    TNL::Containers::Segments::CSR< TNL::Devices::Host, long   >,
-    TNL::Containers::Segments::CSR< TNL::Devices::Host, int    >,
-    TNL::Containers::Segments::CSR< TNL::Devices::Host, long   >,
     TNL::Containers::Segments::CSR< TNL::Devices::Host, int    >,
     TNL::Containers::Segments::CSR< TNL::Devices::Host, long   >
 #ifdef HAVE_CUDA
    ,TNL::Containers::Segments::CSR< TNL::Devices::Cuda, int    >,
-    TNL::Containers::Segments::CSR< TNL::Devices::Cuda, long   >,
-    TNL::Containers::Segments::CSR< TNL::Devices::Cuda, int    >,
-    TNL::Containers::Segments::CSR< TNL::Devices::Cuda, long   >,
-    TNL::Containers::Segments::CSR< TNL::Devices::Cuda, int    >,
-    TNL::Containers::Segments::CSR< TNL::Devices::Cuda, long   >,
+    TNL::Containers::Segments::CSR< TNL::Devices::Cuda, long   >
 #endif
 >;
 
diff --git a/src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.h b/src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.h
index 7b5e90b23..7def8a732 100644
--- a/src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.h
+++ b/src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.h
@@ -27,19 +27,11 @@ protected:
 // types for which MatrixTest is instantiated
 using EllpackSegmentsTypes = ::testing::Types
 <
-    TNL::Containers::Segments::Ellpack< TNL::Devices::Host, int    >,
-    TNL::Containers::Segments::Ellpack< TNL::Devices::Host, long   >,
-    TNL::Containers::Segments::Ellpack< TNL::Devices::Host, int    >,
-    TNL::Containers::Segments::Ellpack< TNL::Devices::Host, long   >,
     TNL::Containers::Segments::Ellpack< TNL::Devices::Host, int    >,
     TNL::Containers::Segments::Ellpack< TNL::Devices::Host, long   >
 #ifdef HAVE_CUDA
    ,TNL::Containers::Segments::Ellpack< TNL::Devices::Cuda, int    >,
-    TNL::Containers::Segments::Ellpack< TNL::Devices::Cuda, long   >,
-    TNL::Containers::Segments::Ellpack< TNL::Devices::Cuda, int    >,
-    TNL::Containers::Segments::Ellpack< TNL::Devices::Cuda, long   >,
-    TNL::Containers::Segments::Ellpack< TNL::Devices::Cuda, int    >,
-    TNL::Containers::Segments::Ellpack< TNL::Devices::Cuda, long   >,
+    TNL::Containers::Segments::Ellpack< TNL::Devices::Cuda, long   >
 #endif
 >;
 
diff --git a/src/UnitTests/Containers/Segments/SegmentsTest_SlicedEllpack.h b/src/UnitTests/Containers/Segments/SegmentsTest_SlicedEllpack.h
index 1bcff3191..51131c7df 100644
--- a/src/UnitTests/Containers/Segments/SegmentsTest_SlicedEllpack.h
+++ b/src/UnitTests/Containers/Segments/SegmentsTest_SlicedEllpack.h
@@ -27,19 +27,11 @@ protected:
 // types for which MatrixTest is instantiated
 using SlicedEllpackSegmentsTypes = ::testing::Types
 <
-    TNL::Containers::Segments::SlicedEllpack< TNL::Devices::Host, int    >,
-    TNL::Containers::Segments::SlicedEllpack< TNL::Devices::Host, long   >,
-    TNL::Containers::Segments::SlicedEllpack< TNL::Devices::Host, int    >,
-    TNL::Containers::Segments::SlicedEllpack< TNL::Devices::Host, long   >,
     TNL::Containers::Segments::SlicedEllpack< TNL::Devices::Host, int    >,
     TNL::Containers::Segments::SlicedEllpack< TNL::Devices::Host, long   >
 #ifdef HAVE_CUDA
    ,TNL::Containers::Segments::SlicedEllpack< TNL::Devices::Cuda, int    >,
-    TNL::Containers::Segments::SlicedEllpack< TNL::Devices::Cuda, long   >,
-    TNL::Containers::Segments::SlicedEllpack< TNL::Devices::Cuda, int    >,
-    TNL::Containers::Segments::SlicedEllpack< TNL::Devices::Cuda, long   >,
-    TNL::Containers::Segments::SlicedEllpack< TNL::Devices::Cuda, int    >,
-    TNL::Containers::Segments::SlicedEllpack< TNL::Devices::Cuda, long   >,
+    TNL::Containers::Segments::SlicedEllpack< TNL::Devices::Cuda, long   >
 #endif
 >;
 
-- 
GitLab


From dec4b38830fd9a5c0a2456f4c1c2ff7025a82175 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Sun, 1 Mar 2020 09:35:31 +0100
Subject: [PATCH 169/179] Removed tests for sparse matrices which use short as
 IndexType to speed up the compilation

---
 src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.h    | 10 +---------
 .../Matrices/BinarySparseMatrixTest_Ellpack.h          | 10 +---------
 .../Matrices/BinarySparseMatrixTest_SlicedEllpack.h    | 10 +---------
 .../Matrices/Legacy/SparseMatrixTest_AdEllpack.h       | 10 +---------
 .../Matrices/Legacy/SparseMatrixTest_BiEllpack.h       | 10 +---------
 src/UnitTests/Matrices/Legacy/SparseMatrixTest_CSR.h   | 10 +---------
 .../Matrices/Legacy/SparseMatrixTest_ChunkedEllpack.h  | 10 +---------
 .../Matrices/Legacy/SparseMatrixTest_Ellpack.h         | 10 +---------
 .../Matrices/Legacy/SparseMatrixTest_SlicedEllpack.h   | 10 +---------
 src/UnitTests/Matrices/SparseMatrixTest_CSR.h          | 10 +---------
 src/UnitTests/Matrices/SparseMatrixTest_Ellpack.h      | 10 +---------
 .../Matrices/SparseMatrixTest_SlicedEllpack.h          | 10 +---------
 12 files changed, 12 insertions(+), 108 deletions(-)

diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.h b/src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.h
index 9cd52741a..a853281be 100644
--- a/src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.h
+++ b/src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.h
@@ -29,10 +29,6 @@ protected:
 // types for which MatrixTest is instantiated
 using CSRMatrixTypes = ::testing::Types
 <
-    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, short, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >,
-    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, short, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >,
-    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, short, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >,
-    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, short, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >,
     TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, int,   TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >,
     TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, int,   TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >,
     TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, int,   TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >,
@@ -42,11 +38,7 @@ using CSRMatrixTypes = ::testing::Types
     TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, long,  TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >,
     TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, long,  TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >
 #ifdef HAVE_CUDA
-   ,TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, short, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >,
-    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, short, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >,
-    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, short, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >,
-    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, short, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >,
-    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, int,   TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >,
+   ,TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, int,   TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >,
     TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, int,   TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >,
     TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, int,   TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >,
     TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, int,   TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >,
diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.h b/src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.h
index 708bd85f0..3c0a65cfd 100644
--- a/src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.h
+++ b/src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.h
@@ -40,10 +40,6 @@ using ColumnMajorEllpack = TNL::Containers::Segments::Ellpack< Device, Index, In
 // types for which MatrixTest is instantiated
 using EllpackMatrixTypes = ::testing::Types
 <
-    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, RowMajorEllpack >,
-    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, RowMajorEllpack >,
-    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, RowMajorEllpack >,
-    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, RowMajorEllpack >,
     TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, RowMajorEllpack >,
     TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, RowMajorEllpack >,
     TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, RowMajorEllpack >,
@@ -53,11 +49,7 @@ using EllpackMatrixTypes = ::testing::Types
     TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, RowMajorEllpack >,
     TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, RowMajorEllpack >
 #ifdef HAVE_CUDA
-   ,TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >,
-    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >,
-    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >,
-    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >,
-    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >,
+   ,TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >,
     TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >,
     TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >,
     TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >,
diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.h b/src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.h
index 7ebc25968..98c5f65ae 100644
--- a/src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.h
+++ b/src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.h
@@ -40,10 +40,6 @@ using ColumnMajorSlicedEllpack = TNL::Containers::Segments::SlicedEllpack< Devic
 // types for which MatrixTest is instantiated
 using SlicedEllpackMatrixTypes = ::testing::Types
 <
-    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >,
-    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >,
-    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >,
-    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >,
     TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >,
     TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >,
     TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >,
@@ -53,11 +49,7 @@ using SlicedEllpackMatrixTypes = ::testing::Types
     TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >,
     TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >
 #ifdef HAVE_CUDA
-   ,TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >,
-    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >,
-    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >,
-    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >,
-    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >,
+   ,TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >,
     TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >,
     TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >,
     TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >,
diff --git a/src/UnitTests/Matrices/Legacy/SparseMatrixTest_AdEllpack.h b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_AdEllpack.h
index d2d268dac..8e07205e5 100644
--- a/src/UnitTests/Matrices/Legacy/SparseMatrixTest_AdEllpack.h
+++ b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_AdEllpack.h
@@ -27,10 +27,6 @@ protected:
 // types for which MatrixTest is instantiated
 using AdEllpackMatrixTypes = ::testing::Types
 <
-    TNL::Matrices::AdEllpack< int,    TNL::Devices::Host, short >,
-    TNL::Matrices::AdEllpack< long,   TNL::Devices::Host, short >,
-    TNL::Matrices::AdEllpack< float,  TNL::Devices::Host, short >,
-    TNL::Matrices::AdEllpack< double, TNL::Devices::Host, short >,
     TNL::Matrices::AdEllpack< int,    TNL::Devices::Host, int >,
     TNL::Matrices::AdEllpack< long,   TNL::Devices::Host, int >,
     TNL::Matrices::AdEllpack< float,  TNL::Devices::Host, int >,
@@ -40,11 +36,7 @@ using AdEllpackMatrixTypes = ::testing::Types
     TNL::Matrices::AdEllpack< float,  TNL::Devices::Host, long >,
     TNL::Matrices::AdEllpack< double, TNL::Devices::Host, long >
 #ifdef HAVE_CUDA
-   ,TNL::Matrices::AdEllpack< int,    TNL::Devices::Cuda, short >,
-    TNL::Matrices::AdEllpack< long,   TNL::Devices::Cuda, short >,
-    TNL::Matrices::AdEllpack< float,  TNL::Devices::Cuda, short >,
-    TNL::Matrices::AdEllpack< double, TNL::Devices::Cuda, short >,
-    TNL::Matrices::AdEllpack< int,    TNL::Devices::Cuda, int >,
+   ,TNL::Matrices::AdEllpack< int,    TNL::Devices::Cuda, int >,
     TNL::Matrices::AdEllpack< long,   TNL::Devices::Cuda, int >,
     TNL::Matrices::AdEllpack< float,  TNL::Devices::Cuda, int >,
     TNL::Matrices::AdEllpack< double, TNL::Devices::Cuda, int >,
diff --git a/src/UnitTests/Matrices/Legacy/SparseMatrixTest_BiEllpack.h b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_BiEllpack.h
index 9dab63c1a..c38648107 100644
--- a/src/UnitTests/Matrices/Legacy/SparseMatrixTest_BiEllpack.h
+++ b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_BiEllpack.h
@@ -27,10 +27,6 @@ protected:
 // types for which MatrixTest is instantiated
 using BiEllpackMatrixTypes = ::testing::Types
 <
-    TNL::Matrices::BiEllpack< int,    TNL::Devices::Host, short >,
-    TNL::Matrices::BiEllpack< long,   TNL::Devices::Host, short >,
-    TNL::Matrices::BiEllpack< float,  TNL::Devices::Host, short >,
-    TNL::Matrices::BiEllpack< double, TNL::Devices::Host, short >,
     TNL::Matrices::BiEllpack< int,    TNL::Devices::Host, int >,
     TNL::Matrices::BiEllpack< long,   TNL::Devices::Host, int >,
     TNL::Matrices::BiEllpack< float,  TNL::Devices::Host, int >,
@@ -40,11 +36,7 @@ using BiEllpackMatrixTypes = ::testing::Types
     TNL::Matrices::BiEllpack< float,  TNL::Devices::Host, long >,
     TNL::Matrices::BiEllpack< double, TNL::Devices::Host, long >
 #ifdef HAVE_CUDA
-   ,TNL::Matrices::BiEllpack< int,    TNL::Devices::Cuda, short >,
-    TNL::Matrices::BiEllpack< long,   TNL::Devices::Cuda, short >,
-    TNL::Matrices::BiEllpack< float,  TNL::Devices::Cuda, short >,
-    TNL::Matrices::BiEllpack< double, TNL::Devices::Cuda, short >,
-    TNL::Matrices::BiEllpack< int,    TNL::Devices::Cuda, int >,
+   ,TNL::Matrices::BiEllpack< int,    TNL::Devices::Cuda, int >,
     TNL::Matrices::BiEllpack< long,   TNL::Devices::Cuda, int >,
     TNL::Matrices::BiEllpack< float,  TNL::Devices::Cuda, int >,
     TNL::Matrices::BiEllpack< double, TNL::Devices::Cuda, int >,
diff --git a/src/UnitTests/Matrices/Legacy/SparseMatrixTest_CSR.h b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_CSR.h
index 3cae12e3a..13c1ed6e0 100644
--- a/src/UnitTests/Matrices/Legacy/SparseMatrixTest_CSR.h
+++ b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_CSR.h
@@ -27,10 +27,6 @@ protected:
 // types for which MatrixTest is instantiated
 using CSRMatrixTypes = ::testing::Types
 <
-    TNL::Matrices::CSR< int,    TNL::Devices::Host, short >,
-    TNL::Matrices::CSR< long,   TNL::Devices::Host, short >,
-    TNL::Matrices::CSR< float,  TNL::Devices::Host, short >,
-    TNL::Matrices::CSR< double, TNL::Devices::Host, short >,
     TNL::Matrices::CSR< int,    TNL::Devices::Host, int >,
     TNL::Matrices::CSR< long,   TNL::Devices::Host, int >,
     TNL::Matrices::CSR< float,  TNL::Devices::Host, int >,
@@ -40,11 +36,7 @@ using CSRMatrixTypes = ::testing::Types
     TNL::Matrices::CSR< float,  TNL::Devices::Host, long >,
     TNL::Matrices::CSR< double, TNL::Devices::Host, long >
 #ifdef HAVE_CUDA
-   ,TNL::Matrices::CSR< int,    TNL::Devices::Cuda, short >,
-    TNL::Matrices::CSR< long,   TNL::Devices::Cuda, short >,
-    TNL::Matrices::CSR< float,  TNL::Devices::Cuda, short >,
-    TNL::Matrices::CSR< double, TNL::Devices::Cuda, short >,
-    TNL::Matrices::CSR< int,    TNL::Devices::Cuda, int >,
+   ,TNL::Matrices::CSR< int,    TNL::Devices::Cuda, int >,
     TNL::Matrices::CSR< long,   TNL::Devices::Cuda, int >,
     TNL::Matrices::CSR< float,  TNL::Devices::Cuda, int >,
     TNL::Matrices::CSR< double, TNL::Devices::Cuda, int >,
diff --git a/src/UnitTests/Matrices/Legacy/SparseMatrixTest_ChunkedEllpack.h b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_ChunkedEllpack.h
index a3c049910..5d304bde3 100644
--- a/src/UnitTests/Matrices/Legacy/SparseMatrixTest_ChunkedEllpack.h
+++ b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_ChunkedEllpack.h
@@ -28,10 +28,6 @@ protected:
 // types for which MatrixTest is instantiated
 using ChEllpackMatrixTypes = ::testing::Types
 <
-    TNL::Matrices::ChunkedEllpack< int,    TNL::Devices::Host, short >,
-    TNL::Matrices::ChunkedEllpack< long,   TNL::Devices::Host, short >,
-    TNL::Matrices::ChunkedEllpack< float,  TNL::Devices::Host, short >,
-    TNL::Matrices::ChunkedEllpack< double, TNL::Devices::Host, short >,
     TNL::Matrices::ChunkedEllpack< int,    TNL::Devices::Host, int >,
     TNL::Matrices::ChunkedEllpack< long,   TNL::Devices::Host, int >,
     TNL::Matrices::ChunkedEllpack< float,  TNL::Devices::Host, int >,
@@ -41,11 +37,7 @@ using ChEllpackMatrixTypes = ::testing::Types
     TNL::Matrices::ChunkedEllpack< float,  TNL::Devices::Host, long >,
     TNL::Matrices::ChunkedEllpack< double, TNL::Devices::Host, long >
 #ifdef HAVE_CUDA
-   ,TNL::Matrices::ChunkedEllpack< int,    TNL::Devices::Cuda, short >,
-    TNL::Matrices::ChunkedEllpack< long,   TNL::Devices::Cuda, short >,
-    TNL::Matrices::ChunkedEllpack< float,  TNL::Devices::Cuda, short >,
-    TNL::Matrices::ChunkedEllpack< double, TNL::Devices::Cuda, short >,
-    TNL::Matrices::ChunkedEllpack< int,    TNL::Devices::Cuda, int >,
+   ,TNL::Matrices::ChunkedEllpack< int,    TNL::Devices::Cuda, int >,
     TNL::Matrices::ChunkedEllpack< long,   TNL::Devices::Cuda, int >,
     TNL::Matrices::ChunkedEllpack< float,  TNL::Devices::Cuda, int >,
     TNL::Matrices::ChunkedEllpack< double, TNL::Devices::Cuda, int >,
diff --git a/src/UnitTests/Matrices/Legacy/SparseMatrixTest_Ellpack.h b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_Ellpack.h
index fa6b2027c..bb9fe4fc7 100644
--- a/src/UnitTests/Matrices/Legacy/SparseMatrixTest_Ellpack.h
+++ b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_Ellpack.h
@@ -27,10 +27,6 @@ protected:
 // types for which MatrixTest is instantiated
 using EllpackMatrixTypes = ::testing::Types
 <
-    TNL::Matrices::Ellpack< int,    TNL::Devices::Host, short >,
-    TNL::Matrices::Ellpack< long,   TNL::Devices::Host, short >,
-    TNL::Matrices::Ellpack< float,  TNL::Devices::Host, short >,
-    TNL::Matrices::Ellpack< double, TNL::Devices::Host, short >,
     TNL::Matrices::Ellpack< int,    TNL::Devices::Host, int >,
     TNL::Matrices::Ellpack< long,   TNL::Devices::Host, int >,
     TNL::Matrices::Ellpack< float,  TNL::Devices::Host, int >,
@@ -40,11 +36,7 @@ using EllpackMatrixTypes = ::testing::Types
     TNL::Matrices::Ellpack< float,  TNL::Devices::Host, long >,
     TNL::Matrices::Ellpack< double, TNL::Devices::Host, long >
 #ifdef HAVE_CUDA
-   ,TNL::Matrices::Ellpack< int,    TNL::Devices::Cuda, short >,
-    TNL::Matrices::Ellpack< long,   TNL::Devices::Cuda, short >,
-    TNL::Matrices::Ellpack< float,  TNL::Devices::Cuda, short >,
-    TNL::Matrices::Ellpack< double, TNL::Devices::Cuda, short >,
-    TNL::Matrices::Ellpack< int,    TNL::Devices::Cuda, int >,
+   ,TNL::Matrices::Ellpack< int,    TNL::Devices::Cuda, int >,
     TNL::Matrices::Ellpack< long,   TNL::Devices::Cuda, int >,
     TNL::Matrices::Ellpack< float,  TNL::Devices::Cuda, int >,
     TNL::Matrices::Ellpack< double, TNL::Devices::Cuda, int >,
diff --git a/src/UnitTests/Matrices/Legacy/SparseMatrixTest_SlicedEllpack.h b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_SlicedEllpack.h
index 7f5ad546f..8b3958384 100644
--- a/src/UnitTests/Matrices/Legacy/SparseMatrixTest_SlicedEllpack.h
+++ b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_SlicedEllpack.h
@@ -32,10 +32,6 @@ using SlicedEllpackType = TNL::Matrices::SlicedEllpack< Real, Device, Index, 32
 // types for which MatrixTest is instantiated
 using SlicedEllpackMatrixTypes = ::testing::Types
 <
-    SlicedEllpackType< int,     TNL::Devices::Host, short >,
-    SlicedEllpackType< long,    TNL::Devices::Host, short >,
-    SlicedEllpackType< float,   TNL::Devices::Host, short >,
-    SlicedEllpackType< double,  TNL::Devices::Host, short >,
     SlicedEllpackType< int,     TNL::Devices::Host, int   >,
     SlicedEllpackType< long,    TNL::Devices::Host, int   >,
     SlicedEllpackType< float,   TNL::Devices::Host, int   >,
@@ -45,11 +41,7 @@ using SlicedEllpackMatrixTypes = ::testing::Types
     SlicedEllpackType< float,   TNL::Devices::Host, long  >,
     SlicedEllpackType< double,  TNL::Devices::Host, long  >
 #ifdef HAVE_CUDA
-   ,SlicedEllpackType< int,     TNL::Devices::Cuda, short >,
-    SlicedEllpackType< long,    TNL::Devices::Cuda, short >,
-    SlicedEllpackType< float,   TNL::Devices::Cuda, short >,
-    SlicedEllpackType< double,  TNL::Devices::Cuda, short >,
-    SlicedEllpackType< int,     TNL::Devices::Cuda, int   >,
+   ,SlicedEllpackType< int,     TNL::Devices::Cuda, int   >,
     SlicedEllpackType< long,    TNL::Devices::Cuda, int   >,
     SlicedEllpackType< float,   TNL::Devices::Cuda, int   >,
     SlicedEllpackType< double,  TNL::Devices::Cuda, int   >,
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR.h b/src/UnitTests/Matrices/SparseMatrixTest_CSR.h
index f029c3bc7..a72d548f5 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_CSR.h
+++ b/src/UnitTests/Matrices/SparseMatrixTest_CSR.h
@@ -29,10 +29,6 @@ protected:
 // types for which MatrixTest is instantiated
 using CSRMatrixTypes = ::testing::Types
 <
-    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >,
-    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >,
-    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >,
-    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >,
     TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >,
     TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >,
     TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >,
@@ -42,11 +38,7 @@ using CSRMatrixTypes = ::testing::Types
     TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >,
     TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >
 #ifdef HAVE_CUDA
-   ,TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >,
-    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >,
-    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >,
-    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >,
-    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >,
+   ,TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >,
     TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >,
     TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >,
     TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >,
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.h b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.h
index 2bf5fe20d..2a890e694 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.h
+++ b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.h
@@ -40,10 +40,6 @@ using ColumnMajorEllpack = TNL::Containers::Segments::Ellpack< Device, Index, In
 // types for which MatrixTest is instantiated
 using EllpackMatrixTypes = ::testing::Types
 <
-    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, RowMajorEllpack >,
-    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, RowMajorEllpack >,
-    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, RowMajorEllpack >,
-    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, RowMajorEllpack >,
     TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, RowMajorEllpack >,
     TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, RowMajorEllpack >,
     TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, RowMajorEllpack >,
@@ -53,11 +49,7 @@ using EllpackMatrixTypes = ::testing::Types
     TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, RowMajorEllpack >,
     TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, RowMajorEllpack >
 #ifdef HAVE_CUDA
-   ,TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >,
-    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >,
-    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >,
-    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >,
-    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >,
+   ,TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >,
     TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >,
     TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >,
     TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >,
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.h b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.h
index 190839fd5..17b48dcf4 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.h
+++ b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.h
@@ -41,10 +41,6 @@ using ColumnMajorSlicedEllpack = TNL::Containers::Segments::SlicedEllpack< Devic
 // types for which MatrixTest is instantiated
 using SlicedEllpackMatrixTypes = ::testing::Types
 <
-    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >,
-    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >,
-    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >,
-    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >,
     TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >,
     TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >,
     TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >,
@@ -54,11 +50,7 @@ using SlicedEllpackMatrixTypes = ::testing::Types
     TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >,
     TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >
 #ifdef HAVE_CUDA
-   ,TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >,
-    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >,
-    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >,
-    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >,
-    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >,
+   ,TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >,
     TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >,
     TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >,
     TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >,
-- 
GitLab


From 7e9a8ca0410dc0b4c8748f5e3c647c90d411b45d Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Tue, 3 Mar 2020 10:56:52 +0100
Subject: [PATCH 170/179] Restoration of protected members in Matrix.

---
 src/TNL/Matrices/Dense.hpp | 2 +-
 src/TNL/Matrices/Matrix.h  | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/TNL/Matrices/Dense.hpp b/src/TNL/Matrices/Dense.hpp
index 0d7037b1f..346c26ed8 100644
--- a/src/TNL/Matrices/Dense.hpp
+++ b/src/TNL/Matrices/Dense.hpp
@@ -918,7 +918,7 @@ operator=( const Dense< RHSReal, RHSDevice, RHSIndex, RHSRowMajorOrder, RHSRealA
    this->setLike( matrix );
    if( RowMajorOrder == RHSRowMajorOrder )
    {
-      this->values = matrix.values;
+      this->values = matrix.getValues();
       return *this;
    }
 
diff --git a/src/TNL/Matrices/Matrix.h b/src/TNL/Matrices/Matrix.h
index cf61f9efa..3c0fd8a9b 100644
--- a/src/TNL/Matrices/Matrix.h
+++ b/src/TNL/Matrices/Matrix.h
@@ -126,8 +126,7 @@ public:
    __cuda_callable__
    Index getValuesSize() const;
 
-   // TODO: restore this
-   //protected:
+   protected:
 
    IndexType rows, columns, numberOfColors;
 
-- 
GitLab


From ba0375a7929c1956014954a0c4e8554ce4b42ed2 Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Tue, 3 Mar 2020 10:57:45 +0100
Subject: [PATCH 171/179] Code formatting in DenseMatrixView.hpp

---
 src/TNL/Matrices/DenseMatrixView.hpp | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/src/TNL/Matrices/DenseMatrixView.hpp b/src/TNL/Matrices/DenseMatrixView.hpp
index 00ca5edc2..d61c50794 100644
--- a/src/TNL/Matrices/DenseMatrixView.hpp
+++ b/src/TNL/Matrices/DenseMatrixView.hpp
@@ -374,8 +374,9 @@ template< typename Real,
           bool RowMajorOrder >
    template< typename Vector >
 __cuda_callable__
-typename Vector::RealType DenseMatrixView< Real, Device, Index, RowMajorOrder >::rowVectorProduct( const IndexType row,
-                                                                                   const Vector& vector ) const
+typename Vector::RealType
+DenseMatrixView< Real, Device, Index, RowMajorOrder >::
+rowVectorProduct( const IndexType row, const Vector& vector ) const
 {
    RealType sum( 0.0 );
    // TODO: Fix this
@@ -390,8 +391,9 @@ template< typename Real,
           bool RowMajorOrder >
    template< typename InVector,
              typename OutVector >
-void DenseMatrixView< Real, Device, Index, RowMajorOrder >::vectorProduct( const InVector& inVector,
-                                                           OutVector& outVector ) const
+void
+DenseMatrixView< Real, Device, Index, RowMajorOrder >::
+vectorProduct( const InVector& inVector, OutVector& outVector ) const
 {
    TNL_ASSERT_EQ( this->getColumns(), inVector.getSize(), "Matrix columns count differs with input vector size." );
    TNL_ASSERT_EQ( this->getRows(), outVector.getSize(), "Matrix rows count differs with output vector size." );
@@ -416,9 +418,11 @@ template< typename Real,
           typename Index,
           bool RowMajorOrder >
    template< typename Matrix >
-void DenseMatrixView< Real, Device, Index, RowMajorOrder >::addMatrix( const Matrix& matrix,
-                                              const RealType& matrixMultiplicator,
-                                              const RealType& thisMatrixMultiplicator )
+void
+DenseMatrixView< Real, Device, Index, RowMajorOrder >::
+addMatrix( const Matrix& matrix,
+           const RealType& matrixMultiplicator,
+           const RealType& thisMatrixMultiplicator )
 {
    TNL_ASSERT( this->getColumns() == matrix.getColumns() &&
               this->getRows() == matrix.getRows(),
-- 
GitLab


From af032c5e55a8e7712447cd4794d27212f561561d Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Tue, 3 Mar 2020 10:58:34 +0100
Subject: [PATCH 172/179] Moving CUDA kernels from DenseMatrixView.hpp to
 details/DenseMatrix.h

---
 src/TNL/Matrices/DenseMatrixView.hpp   | 254 -------------------------
 src/TNL/Matrices/details/DenseMatrix.h | 253 ++++++++++++++++++++++++
 2 files changed, 253 insertions(+), 254 deletions(-)

diff --git a/src/TNL/Matrices/DenseMatrixView.hpp b/src/TNL/Matrices/DenseMatrixView.hpp
index d61c50794..50f30d889 100644
--- a/src/TNL/Matrices/DenseMatrixView.hpp
+++ b/src/TNL/Matrices/DenseMatrixView.hpp
@@ -437,107 +437,6 @@ addMatrix( const Matrix& matrix,
       this->values = thisMatrixMultiplicator * this->values + matrixMultiplicator * matrix.values;
 }
 
-#ifdef HAVE_CUDA_______________
-template< typename Real,
-          typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator,
-          typename Matrix1,
-          typename Matrix2,
-          int tileDim,
-          int tileRowBlockSize >
-__global__ void DenseMatrixProductKernel( Dense< Real, Devices::Cuda, Index, RowMajorOrder >* resultMatrix,
-                                                   const Matrix1* matrixA,
-                                                   const Matrix2* matrixB,
-                                                   const Real matrixAMultiplicator,
-                                                   const Real matrixBMultiplicator,
-                                                   const Index gridIdx_x,
-                                                   const Index gridIdx_y )
-{
-   /****
-    * Here we compute product C = A * B. To profit from the fast
-    * shared memory we do it by tiles.
-    */
-
-   typedef Index IndexType;
-   typedef Real RealType;
-   __shared__ Real tileA[ tileDim*tileDim ];
-   __shared__ Real tileB[ tileDim*tileDim ];
-   __shared__ Real tileC[ tileDim*tileDim ];
-
-   const IndexType& matrixARows = matrixA->getRows();
-   const IndexType& matrixAColumns = matrixA->getColumns();
-   const IndexType& matrixBRows = matrixB->getRows();
-   const IndexType& matrixBColumns = matrixB->getColumns();
-
-   /****
-    * Reset the tile C
-    */
-   for( IndexType row = 0; row < tileDim; row += tileRowBlockSize )
-      tileC[ ( row + threadIdx.y )*tileDim + threadIdx.x ] = 0.0;
-
-   /****
-    * Compute the result tile coordinates
-    */
-   const IndexType resultTileRow = ( gridIdx_y*gridDim.y + blockIdx.y )*tileDim;
-   const IndexType resultTileColumn = ( gridIdx_x*gridDim.x + blockIdx.x )*tileDim;
-
-   /****
-    * Sum over the matrix tiles
-    */
-   for( IndexType i = 0; i < matrixAColumns; i += tileDim )
-   {
-      for( IndexType row = 0; row < tileDim; row += tileRowBlockSize )
-      {
-         const IndexType matrixARow = resultTileRow + threadIdx.y + row;
-         const IndexType matrixAColumn = i + threadIdx.x;
-         if( matrixARow < matrixARows && matrixAColumn < matrixAColumns )
-            tileA[ (threadIdx.y + row)*tileDim + threadIdx.x ] =
-               matrixAMultiplicator * matrixA->getElementFast( matrixARow,  matrixAColumn );
-
-         const IndexType matrixBRow = i + threadIdx.y + row;
-         const IndexType matrixBColumn = resultTileColumn + threadIdx.x;
-         if( matrixBRow < matrixBRows && matrixBColumn < matrixBColumns )
-            tileB[ (threadIdx.y + row)*tileDim + threadIdx.x ] =
-               matrixBMultiplicator * matrixB->getElementFast( matrixBRow, matrixBColumn );
-      }
-      __syncthreads();
-
-      const IndexType tileALastRow = tnlCudaMin( tileDim, matrixARows - resultTileRow );
-      const IndexType tileALastColumn = tnlCudaMin( tileDim, matrixAColumns - i );
-      const IndexType tileBLastRow = tnlCudaMin( tileDim, matrixBRows - i );
-      const IndexType tileBLastColumn =
-         tnlCudaMin( tileDim, matrixBColumns - resultTileColumn );
-
-      for( IndexType row = 0; row < tileALastRow; row += tileRowBlockSize )
-      {
-         RealType sum( 0.0 );
-         for( IndexType j = 0; j < tileALastColumn; j++ )
-            sum += tileA[ ( threadIdx.y + row )*tileDim + j ]*
-                      tileB[ j*tileDim + threadIdx.x ];
-         tileC[ ( row + threadIdx.y )*tileDim + threadIdx.x ] += sum;
-      }
-      __syncthreads();
-   }
-
-   /****
-    * Write the result tile to the result matrix
-    */
-   const IndexType& matrixCRows = resultMatrix->getRows();
-   const IndexType& matrixCColumns = resultMatrix->getColumns();
-   for( IndexType row = 0; row < tileDim; row += tileRowBlockSize )
-   {
-      const IndexType matrixCRow = resultTileRow + row + threadIdx.y;
-      const IndexType matrixCColumn = resultTileColumn + threadIdx.x;
-      if( matrixCRow < matrixCRows && matrixCColumn < matrixCColumns )
-         resultMatrix->setElementFast( matrixCRow,
-                                       matrixCColumn,
-                                       tileC[ ( row + threadIdx.y )*tileDim + threadIdx.x ] );
-   }
-
-}
-#endif
-
 template< typename Real,
           typename Device,
           typename Index,
@@ -627,159 +526,6 @@ void DenseMatrixView< Real, Device, Index, RowMajorOrder >::getMatrixProduct( co
    }
 }
 
-#ifdef HAVE_CUDA________________________
-template< typename Real,
-          typename Index,
-          typename Matrix,
-          bool RowMajorOrder,
-          typename RealAllocator,
-          int tileDim,
-          int tileRowBlockSize >
-__global__ void DenseTranspositionAlignedKernel( Dense< Real, Devices::Cuda, Index >* resultMatrix,
-                                                          const Matrix* inputMatrix,
-                                                          const Real matrixMultiplicator,
-                                                          const Index gridIdx_x,
-                                                          const Index gridIdx_y )
-{
-   __shared__ Real tile[ tileDim*tileDim ];
-
-   const Index columns = inputMatrix->getColumns();
-   const Index rows = inputMatrix->getRows();
-
-
-   /****
-    * Diagonal mapping of the CUDA blocks
-    */
-   Index blockIdx_x, blockIdx_y;
-   if( columns == rows )
-   {
-      blockIdx_y = blockIdx.x;
-      blockIdx_x = (blockIdx.x+blockIdx.y)%gridDim.x;
-   }
-   else
-   {
-      Index bID = blockIdx.x + gridDim.x*blockIdx.y;
-      blockIdx_y = bID % gridDim.y;
-      blockIdx_x = ( ( bID / gridDim.y ) + blockIdx_y ) % gridDim.x;
-   }
-
-   /****
-    * Read the tile to the shared memory
-    */
-   const Index readRowPosition =
-      ( gridIdx_y*gridDim.y + blockIdx_y )*tileDim + threadIdx.y;
-   const Index readColumnPosition =
-      ( gridIdx_x*gridDim.x + blockIdx_x )*tileDim + threadIdx.x;
-   for( Index rowBlock = 0;
-        rowBlock < tileDim;
-        rowBlock += tileRowBlockSize )
-   {
-      tile[ Cuda::getInterleaving( threadIdx.x*tileDim +  threadIdx.y + rowBlock ) ] =
-               inputMatrix->getElementFast( readColumnPosition,
-                                            readRowPosition + rowBlock );
-   }
-   __syncthreads();
-
-   /****
-    * Write the tile to the global memory
-    */
-   const Index writeRowPosition =
-      ( gridIdx_x*gridDim.x + blockIdx_x )*tileDim + threadIdx.y;
-   const Index writeColumnPosition =
-      ( gridIdx_y*gridDim.y + blockIdx_y )*tileDim + threadIdx.x;
-   for( Index rowBlock = 0;
-        rowBlock < tileDim;
-        rowBlock += tileRowBlockSize )
-   {
-      resultMatrix->setElementFast( writeColumnPosition,
-                                    writeRowPosition + rowBlock,
-                                    matrixMultiplicator * tile[ Cuda::getInterleaving( ( threadIdx.y + rowBlock ) * tileDim + threadIdx.x ) ] );
-
-   }
-
-}
-
-template< typename Real,
-          typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator,
-          typename Matrix,
-          int tileDim,
-          int tileRowBlockSize >
-__global__ void DenseTranspositionNonAlignedKernel( Dense< Real, Devices::Cuda, Index >* resultMatrix,
-                                                             const Matrix* inputMatrix,
-                                                             const Real matrixMultiplicator,
-                                                             const Index gridIdx_x,
-                                                             const Index gridIdx_y )
-{
-   __shared__ Real tile[ tileDim*tileDim ];
-
-   const Index columns = inputMatrix->getColumns();
-   const Index rows = inputMatrix->getRows();
-
-   /****
-    * Diagonal mapping of the CUDA blocks
-    */
-   Index blockIdx_x, blockIdx_y;
-   if( columns == rows )
-   {
-      blockIdx_y = blockIdx.x;
-      blockIdx_x = (blockIdx.x+blockIdx.y)%gridDim.x;
-   }
-   else
-   {
-      Index bID = blockIdx.x + gridDim.x*blockIdx.y;
-      blockIdx_y = bID % gridDim.y;
-      blockIdx_x = ( ( bID / gridDim.y ) + blockIdx_y ) % gridDim.x;
-   }
-
-   /****
-    * Read the tile to the shared memory
-    */
-   const Index readRowPosition =
-      ( gridIdx_y*gridDim.y + blockIdx_y )*tileDim + threadIdx.y;
-   const Index readColumnPosition =
-      ( gridIdx_x*gridDim.x + blockIdx_x )*tileDim + threadIdx.x;
-   if( readColumnPosition < columns )
-   {
-      const Index readOffset = readRowPosition * columns + readColumnPosition;
-      for( Index rowBlock = 0;
-           rowBlock < tileDim;
-           rowBlock += tileRowBlockSize )
-      {
-         if( readRowPosition + rowBlock < rows )
-            tile[ Cuda::getInterleaving( threadIdx.x*tileDim +  threadIdx.y + rowBlock ) ] =
-               inputMatrix->getElementFast( readColumnPosition,
-                                            readRowPosition + rowBlock );
-      }
-   }
-   __syncthreads();
-
-   /****
-    * Write the tile to the global memory
-    */
-   const Index writeRowPosition =
-      ( gridIdx_x*gridDim.x + blockIdx_x )*tileDim + threadIdx.y;
-   const Index writeColumnPosition =
-      ( gridIdx_y*gridDim.y + blockIdx_y )*tileDim + threadIdx.x;
-   if( writeColumnPosition < rows )
-   {
-      const Index writeOffset = writeRowPosition * rows + writeColumnPosition;
-      for( Index rowBlock = 0;
-           rowBlock < tileDim;
-           rowBlock += tileRowBlockSize )
-      {
-         if( writeRowPosition + rowBlock < columns )
-            resultMatrix->setElementFast( writeColumnPosition,
-                                          writeRowPosition + rowBlock,
-                                          matrixMultiplicator * tile[ Cuda::getInterleaving( ( threadIdx.y + rowBlock ) * tileDim + threadIdx.x ) ] );
-      }
-   }
-
-}
-
-
-#endif
 
 template< typename Real,
           typename Device,
diff --git a/src/TNL/Matrices/details/DenseMatrix.h b/src/TNL/Matrices/details/DenseMatrix.h
index 813e58bc4..96930b386 100644
--- a/src/TNL/Matrices/details/DenseMatrix.h
+++ b/src/TNL/Matrices/details/DenseMatrix.h
@@ -62,6 +62,259 @@ class DenseDeviceDependentCode< Devices::Cuda >
       }
 };
 
+#ifdef HAVE_CUDA
+template< typename Real,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename Matrix1,
+          typename Matrix2,
+          int tileDim,
+          int tileRowBlockSize >
+__global__ void
+DenseMatrixProductKernel( Dense< Real, Devices::Cuda, Index, RowMajorOrder >* resultMatrix,
+                          const Matrix1* matrixA,
+                          const Matrix2* matrixB,
+                          const Real matrixAMultiplicator,
+                          const Real matrixBMultiplicator,
+                          const Index gridIdx_x,
+                          const Index gridIdx_y )
+{
+   /****
+    * Here we compute product C = A * B. To profit from the fast
+    * shared memory we do it by tiles.
+    */
+
+   typedef Index IndexType;
+   typedef Real RealType;
+   __shared__ Real tileA[ tileDim*tileDim ];
+   __shared__ Real tileB[ tileDim*tileDim ];
+   __shared__ Real tileC[ tileDim*tileDim ];
+
+   const IndexType& matrixARows = matrixA->getRows();
+   const IndexType& matrixAColumns = matrixA->getColumns();
+   const IndexType& matrixBRows = matrixB->getRows();
+   const IndexType& matrixBColumns = matrixB->getColumns();
+
+   /****
+    * Reset the tile C
+    */
+   for( IndexType row = 0; row < tileDim; row += tileRowBlockSize )
+      tileC[ ( row + threadIdx.y )*tileDim + threadIdx.x ] = 0.0;
+
+   /****
+    * Compute the result tile coordinates
+    */
+   const IndexType resultTileRow = ( gridIdx_y*gridDim.y + blockIdx.y )*tileDim;
+   const IndexType resultTileColumn = ( gridIdx_x*gridDim.x + blockIdx.x )*tileDim;
+
+   /****
+    * Sum over the matrix tiles
+    */
+   for( IndexType i = 0; i < matrixAColumns; i += tileDim )
+   {
+      for( IndexType row = 0; row < tileDim; row += tileRowBlockSize )
+      {
+         const IndexType matrixARow = resultTileRow + threadIdx.y + row;
+         const IndexType matrixAColumn = i + threadIdx.x;
+         if( matrixARow < matrixARows && matrixAColumn < matrixAColumns )
+            tileA[ (threadIdx.y + row)*tileDim + threadIdx.x ] =
+               matrixAMultiplicator * matrixA->getElementFast( matrixARow,  matrixAColumn );
+
+         const IndexType matrixBRow = i + threadIdx.y + row;
+         const IndexType matrixBColumn = resultTileColumn + threadIdx.x;
+         if( matrixBRow < matrixBRows && matrixBColumn < matrixBColumns )
+            tileB[ (threadIdx.y + row)*tileDim + threadIdx.x ] =
+               matrixBMultiplicator * matrixB->getElementFast( matrixBRow, matrixBColumn );
+      }
+      __syncthreads();
+
+      const IndexType tileALastRow = tnlCudaMin( tileDim, matrixARows - resultTileRow );
+      const IndexType tileALastColumn = tnlCudaMin( tileDim, matrixAColumns - i );
+      const IndexType tileBLastRow = tnlCudaMin( tileDim, matrixBRows - i );
+      const IndexType tileBLastColumn =
+         tnlCudaMin( tileDim, matrixBColumns - resultTileColumn );
+
+      for( IndexType row = 0; row < tileALastRow; row += tileRowBlockSize )
+      {
+         RealType sum( 0.0 );
+         for( IndexType j = 0; j < tileALastColumn; j++ )
+            sum += tileA[ ( threadIdx.y + row )*tileDim + j ]*
+                      tileB[ j*tileDim + threadIdx.x ];
+         tileC[ ( row + threadIdx.y )*tileDim + threadIdx.x ] += sum;
+      }
+      __syncthreads();
+   }
+
+   /****
+    * Write the result tile to the result matrix
+    */
+   const IndexType& matrixCRows = resultMatrix->getRows();
+   const IndexType& matrixCColumns = resultMatrix->getColumns();
+   for( IndexType row = 0; row < tileDim; row += tileRowBlockSize )
+   {
+      const IndexType matrixCRow = resultTileRow + row + threadIdx.y;
+      const IndexType matrixCColumn = resultTileColumn + threadIdx.x;
+      if( matrixCRow < matrixCRows && matrixCColumn < matrixCColumns )
+         resultMatrix->setElementFast( matrixCRow,
+                                       matrixCColumn,
+                                       tileC[ ( row + threadIdx.y )*tileDim + threadIdx.x ] );
+   }
+
+}
+
+template< typename Real,
+          typename Index,
+          typename Matrix,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          int tileDim,
+          int tileRowBlockSize >
+__global__ void DenseTranspositionAlignedKernel( Dense< Real, Devices::Cuda, Index >* resultMatrix,
+                                                          const Matrix* inputMatrix,
+                                                          const Real matrixMultiplicator,
+                                                          const Index gridIdx_x,
+                                                          const Index gridIdx_y )
+{
+   __shared__ Real tile[ tileDim*tileDim ];
+
+   const Index columns = inputMatrix->getColumns();
+   const Index rows = inputMatrix->getRows();
+
+
+   /****
+    * Diagonal mapping of the CUDA blocks
+    */
+   Index blockIdx_x, blockIdx_y;
+   if( columns == rows )
+   {
+      blockIdx_y = blockIdx.x;
+      blockIdx_x = (blockIdx.x+blockIdx.y)%gridDim.x;
+   }
+   else
+   {
+      Index bID = blockIdx.x + gridDim.x*blockIdx.y;
+      blockIdx_y = bID % gridDim.y;
+      blockIdx_x = ( ( bID / gridDim.y ) + blockIdx_y ) % gridDim.x;
+   }
+
+   /****
+    * Read the tile to the shared memory
+    */
+   const Index readRowPosition =
+      ( gridIdx_y*gridDim.y + blockIdx_y )*tileDim + threadIdx.y;
+   const Index readColumnPosition =
+      ( gridIdx_x*gridDim.x + blockIdx_x )*tileDim + threadIdx.x;
+   for( Index rowBlock = 0;
+        rowBlock < tileDim;
+        rowBlock += tileRowBlockSize )
+   {
+      tile[ Cuda::getInterleaving( threadIdx.x*tileDim +  threadIdx.y + rowBlock ) ] =
+               inputMatrix->getElementFast( readColumnPosition,
+                                            readRowPosition + rowBlock );
+   }
+   __syncthreads();
+
+   /****
+    * Write the tile to the global memory
+    */
+   const Index writeRowPosition =
+      ( gridIdx_x*gridDim.x + blockIdx_x )*tileDim + threadIdx.y;
+   const Index writeColumnPosition =
+      ( gridIdx_y*gridDim.y + blockIdx_y )*tileDim + threadIdx.x;
+   for( Index rowBlock = 0;
+        rowBlock < tileDim;
+        rowBlock += tileRowBlockSize )
+   {
+      resultMatrix->setElementFast( writeColumnPosition,
+                                    writeRowPosition + rowBlock,
+                                    matrixMultiplicator * tile[ Cuda::getInterleaving( ( threadIdx.y + rowBlock ) * tileDim + threadIdx.x ) ] );
+
+   }
+
+}
+
+template< typename Real,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename Matrix,
+          int tileDim,
+          int tileRowBlockSize >
+__global__ void DenseTranspositionNonAlignedKernel( Dense< Real, Devices::Cuda, Index >* resultMatrix,
+                                                             const Matrix* inputMatrix,
+                                                             const Real matrixMultiplicator,
+                                                             const Index gridIdx_x,
+                                                             const Index gridIdx_y )
+{
+   __shared__ Real tile[ tileDim*tileDim ];
+
+   const Index columns = inputMatrix->getColumns();
+   const Index rows = inputMatrix->getRows();
+
+   /****
+    * Diagonal mapping of the CUDA blocks
+    */
+   Index blockIdx_x, blockIdx_y;
+   if( columns == rows )
+   {
+      blockIdx_y = blockIdx.x;
+      blockIdx_x = (blockIdx.x+blockIdx.y)%gridDim.x;
+   }
+   else
+   {
+      Index bID = blockIdx.x + gridDim.x*blockIdx.y;
+      blockIdx_y = bID % gridDim.y;
+      blockIdx_x = ( ( bID / gridDim.y ) + blockIdx_y ) % gridDim.x;
+   }
+
+   /****
+    * Read the tile to the shared memory
+    */
+   const Index readRowPosition =
+      ( gridIdx_y*gridDim.y + blockIdx_y )*tileDim + threadIdx.y;
+   const Index readColumnPosition =
+      ( gridIdx_x*gridDim.x + blockIdx_x )*tileDim + threadIdx.x;
+   if( readColumnPosition < columns )
+   {
+      const Index readOffset = readRowPosition * columns + readColumnPosition;
+      for( Index rowBlock = 0;
+           rowBlock < tileDim;
+           rowBlock += tileRowBlockSize )
+      {
+         if( readRowPosition + rowBlock < rows )
+            tile[ Cuda::getInterleaving( threadIdx.x*tileDim +  threadIdx.y + rowBlock ) ] =
+               inputMatrix->getElementFast( readColumnPosition,
+                                            readRowPosition + rowBlock );
+      }
+   }
+   __syncthreads();
+
+   /****
+    * Write the tile to the global memory
+    */
+   const Index writeRowPosition =
+      ( gridIdx_x*gridDim.x + blockIdx_x )*tileDim + threadIdx.y;
+   const Index writeColumnPosition =
+      ( gridIdx_y*gridDim.y + blockIdx_y )*tileDim + threadIdx.x;
+   if( writeColumnPosition < rows )
+   {
+      const Index writeOffset = writeRowPosition * rows + writeColumnPosition;
+      for( Index rowBlock = 0;
+           rowBlock < tileDim;
+           rowBlock += tileRowBlockSize )
+      {
+         if( writeRowPosition + rowBlock < columns )
+            resultMatrix->setElementFast( writeColumnPosition,
+                                          writeRowPosition + rowBlock,
+                                          matrixMultiplicator * tile[ Cuda::getInterleaving( ( threadIdx.y + rowBlock ) * tileDim + threadIdx.x ) ] );
+      }
+   }
+
+}
+
+#endif
+
       } //namespace details
    } //namepsace Matrices
 } //namespace TNL
\ No newline at end of file
-- 
GitLab


From 41348dac910dad6d84ba9ebd111b0f05b038d942 Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Tue, 3 Mar 2020 10:59:17 +0100
Subject: [PATCH 173/179] Deleted unused dense matrix device dependent code.

---
 src/TNL/Matrices/DenseMatrixView.hpp | 46 ----------------------------
 1 file changed, 46 deletions(-)

diff --git a/src/TNL/Matrices/DenseMatrixView.hpp b/src/TNL/Matrices/DenseMatrixView.hpp
index 50f30d889..01415ec21 100644
--- a/src/TNL/Matrices/DenseMatrixView.hpp
+++ b/src/TNL/Matrices/DenseMatrixView.hpp
@@ -699,51 +699,5 @@ Index DenseMatrixView< Real, Device, Index, RowMajorOrder >::getElementIndex( co
    return this->segments.getGlobalIndex( row, column );
 }
 
-/*template<>
-class DenseDeviceDependentCode< Devices::Host >
-{
-   public:
-
-      typedef Devices::Host Device;
-
-      template< typename Real,
-                typename Index,
-                bool RowMajorOrder,
-                typename RealAllocator,
-                typename InVector,
-                typename OutVector >
-      static void vectorProduct( const DenseMatrixView< Real, Device, Index, RowMajorOrder >& matrix,
-                                 const InVector& inVector,
-                                 OutVector& outVector )
-      {
-#ifdef HAVE_OPENMP
-#pragma omp parallel for if( Devices::Host::isOMPEnabled() )
-#endif
-         for( Index row = 0; row < matrix.getRows(); row ++ )
-            outVector[ row ] = matrix.rowVectorProduct( row, inVector );
-      }
-};
-
-template<>
-class DenseDeviceDependentCode< Devices::Cuda >
-{
-   public:
-
-      typedef Devices::Cuda Device;
-
-      template< typename Real,
-                typename Index,
-                bool RowMajorOrder,
-                typename RealAllocator,
-                typename InVector,
-                typename OutVector >
-      static void vectorProduct( const DenseMatrixView< Real, Device, Index, RowMajorOrder >& matrix,
-                                 const InVector& inVector,
-                                 OutVector& outVector )
-      {
-         MatrixVectorProductCuda( matrix, inVector, outVector );
-      }
-};*/
-
 } // namespace Matrices
 } // namespace TNL
-- 
GitLab


From 939b17f9c5bac758cfd35bc57a48f4342f58fa64 Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Tue, 3 Mar 2020 13:21:17 +0100
Subject: [PATCH 174/179] Removed useless virtual methods from Matrix.

---
 src/TNL/Matrices/Legacy/AdEllpack.h          | 33 +++++++-------
 src/TNL/Matrices/Legacy/AdEllpack_impl.h     | 10 +++++
 src/TNL/Matrices/Legacy/BiEllpack.h          | 15 ++++---
 src/TNL/Matrices/Legacy/BiEllpack_impl.h     | 46 ++++++++++++--------
 src/TNL/Matrices/Legacy/CSR.h                | 11 +++--
 src/TNL/Matrices/Legacy/CSR_impl.h           | 22 +++++++---
 src/TNL/Matrices/Legacy/Ellpack.h            | 13 +++---
 src/TNL/Matrices/Legacy/Ellpack_impl.h       | 22 +++++++---
 src/TNL/Matrices/Legacy/SlicedEllpack.h      |  3 ++
 src/TNL/Matrices/Legacy/SlicedEllpack_impl.h | 13 +++++-
 src/TNL/Matrices/Matrix.h                    | 31 ++-----------
 src/TNL/Matrices/Matrix.hpp                  | 21 ---------
 12 files changed, 130 insertions(+), 110 deletions(-)

diff --git a/src/TNL/Matrices/Legacy/AdEllpack.h b/src/TNL/Matrices/Legacy/AdEllpack.h
index 3d2db7b96..1135084ee 100644
--- a/src/TNL/Matrices/Legacy/AdEllpack.h
+++ b/src/TNL/Matrices/Legacy/AdEllpack.h
@@ -10,8 +10,8 @@
 
 /****
  * This class implements AdELL format from:
- * 
- * Maggioni M., Berger-Wolf T., 
+ *
+ * Maggioni M., Berger-Wolf T.,
  * AdELL: An Adaptive Warp-Balancing ELL Format for Efficient Sparse Matrix-Vector Multiplication on GPUs,
  * In proceedings of 42nd International Conference on Parallel Processing, 2013.
  */
@@ -33,7 +33,7 @@ struct warpInfo
     using RealType = typename MatrixType::RealType;
     using DeviceType = typename MatrixType::DeviceType;
     using IndexType = typename MatrixType::IndexType;
-    
+
     IndexType offset;
     IndexType rowOffset;
     IndexType localLoad;
@@ -47,7 +47,7 @@ template< typename MatrixType >
 class warpList
 {
 public:
-    
+
     using RealType = typename MatrixType::RealType;
     using DeviceType = typename MatrixType::DeviceType;
     using IndexType = typename MatrixType::IndexType;
@@ -74,7 +74,7 @@ public:
     { return this->tail; }
 
     ~warpList();
-    
+
     void printList()
     {
         if( this->getHead() == this->getTail() )
@@ -114,7 +114,7 @@ private:
    // friend class will be needed for templated assignment operators
    template< typename Real2, typename Device2, typename Index2 >
    friend class AdEllpack;
-   
+
 public:
 
     typedef Real RealType;
@@ -122,6 +122,7 @@ public:
     typedef Index IndexType;
     typedef typename Sparse< RealType, DeviceType, IndexType >::CompressedRowLengthsVector CompressedRowLengthsVector;
     typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView;
+    typedef typename Sparse< RealType, DeviceType, IndexType >::CompressedRowLengthsVectorView CompressedRowLengthsVectorView;
 
     template< typename _Real = Real,
               typename _Device = Device,
@@ -132,6 +133,8 @@ public:
 
     void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths );
 
+    void getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const;
+
     IndexType getWarp( const IndexType row ) const;
 
     IndexType getInWarpOffset( const IndexType row,
@@ -143,7 +146,7 @@ public:
     void setLike( const AdEllpack< Real2, Device2, Index2 >& matrix );
 
     void reset();
-    
+
     template< typename Real2, typename Device2, typename Index2 >
     bool operator == ( const AdEllpack< Real2, Device2, Index2 >& matrix ) const;
 
@@ -186,7 +189,7 @@ public:
               typename OutVector >
     void vectorProduct( const InVector& inVector,
                         OutVector& outVector ) const;
-    
+
     // copy assignment
     AdEllpack& operator=( const AdEllpack& matrix );
 
@@ -194,7 +197,7 @@ public:
     template< typename Real2, typename Device2, typename Index2,
              typename = typename Enabler< Device2 >::type >
     AdEllpack& operator=( const AdEllpack< Real2, Device2, Index2 >& matrix );
-    
+
     void save( File& file ) const;
 
     void load( File& file );
@@ -242,29 +245,29 @@ public:
    void spmvCuda4( const InVector& inVector,
                    OutVector& outVector,
                    const int gridIdx ) const;
-   
+
    template< typename InVector,
           typename OutVector >
    __device__
    void spmvCuda8( const InVector& inVector,
                    OutVector& outVector,
                    const int gridIdx ) const;
-   
+
    template< typename InVector,
           typename OutVector >
    __device__
    void spmvCuda16( const InVector& inVector,
                     OutVector& outVector,
-                    const int gridIdx ) const;   
+                    const int gridIdx ) const;
 
    template< typename InVector,
           typename OutVector >
    __device__
    void spmvCuda32( const InVector& inVector,
                     OutVector& outVector,
-                    const int gridIdx ) const;   
-   
-   
+                    const int gridIdx ) const;
+
+
 #endif
 
 
diff --git a/src/TNL/Matrices/Legacy/AdEllpack_impl.h b/src/TNL/Matrices/Legacy/AdEllpack_impl.h
index 234e18f94..242a3c81f 100644
--- a/src/TNL/Matrices/Legacy/AdEllpack_impl.h
+++ b/src/TNL/Matrices/Legacy/AdEllpack_impl.h
@@ -220,6 +220,16 @@ setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths )
     }
 }
 
+template< typename Real,
+          typename Device,
+          typename Index >
+void AdEllpack< Real, Device, Index >::getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const
+{
+   TNL_ASSERT_EQ( rowLengths.getSize(), this->getRows(), "invalid size of the rowLengths vector" );
+   for( IndexType row = 0; row < this->getRows(); row++ )
+      rowLengths.setElement( row, this->getRowLength( row ) );
+}
+
 template< typename Real,
           typename Device,
           typename Index >
diff --git a/src/TNL/Matrices/Legacy/BiEllpack.h b/src/TNL/Matrices/Legacy/BiEllpack.h
index fe3fd9e11..1a92581c7 100644
--- a/src/TNL/Matrices/Legacy/BiEllpack.h
+++ b/src/TNL/Matrices/Legacy/BiEllpack.h
@@ -32,7 +32,7 @@ template< typename Real, typename Device, typename Index >
 class BiEllpack : public Sparse< Real, Device, Index >
 {
 private:
-    
+
     // convenient template alias for controlling the selection of copy-assignment operator
     template< typename Device2 >
     using Enabler = std::enable_if< ! std::is_same< Device2, Device >::value >;
@@ -40,13 +40,14 @@ private:
     // friend class will be needed for templated assignment operators
     template< typename Real2, typename Device2, typename Index2 >
     friend class BiEllpack;
-    
+
 public:
 	typedef Real RealType;
 	typedef Device DeviceType;
 	typedef Index IndexType;
 	typedef typename Sparse< RealType, DeviceType, IndexType >::CompressedRowLengthsVector CompressedRowLengthsVector;
    typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView;
+   typedef typename Sparse< RealType, DeviceType, IndexType >::CompressedRowLengthsVectorView CompressedRowLengthsVectorView;
 	typedef typename Sparse< RealType, DeviceType, IndexType >::ValuesVector ValuesVector;
 	typedef typename Sparse< RealType, DeviceType, IndexType >::ColumnIndexesVector ColumnIndexesVector;
 
@@ -62,15 +63,17 @@ public:
 
    void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths );
 
+   void getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const;
+
 	IndexType getRowLength( const IndexType row ) const;
 
 	template< typename Real2,
 			  typename Device2,
 			  typename Index2 >
 	void setLike( const BiEllpack< Real2, Device2, Index2 >& matrix );
-        
+
         void reset();
-        
+
         template< typename Real2, typename Device2, typename Index2 >
         bool operator == ( const BiEllpack< Real2, Device2, Index2 >& matrix ) const;
 
@@ -142,7 +145,7 @@ public:
 	IndexType getNumberOfGroups( const IndexType row ) const;
 
 	bool vectorProductTest() const;
-        
+
         // copy assignment
         BiEllpack& operator=( const BiEllpack& matrix );
 
@@ -160,7 +163,7 @@ public:
 	void load( const String& fileName );
 
 	void print( std::ostream& str ) const;
-        
+
         void printValues() const;
 
 	void performRowBubbleSort( Containers::Vector< Index, Device, Index >& tempRowLengths );
diff --git a/src/TNL/Matrices/Legacy/BiEllpack_impl.h b/src/TNL/Matrices/Legacy/BiEllpack_impl.h
index 36732a39a..6db2ed609 100644
--- a/src/TNL/Matrices/Legacy/BiEllpack_impl.h
+++ b/src/TNL/Matrices/Legacy/BiEllpack_impl.h
@@ -78,9 +78,9 @@ setCompressedRowLengths( ConstCompressedRowLengthsVectorView constRowLengths )
     CompressedRowLengthsVector rowLengths;
     rowLengths.reset();
     rowLengths.setLike( constRowLengths );
-    
+
     rowLengths = constRowLengths;
-    
+
     if( this->getRows() % this->warpSize != 0 )
             this->setVirtualRows( this->getRows() + this->warpSize - ( this->getRows() % this->warpSize ) );
     else
@@ -88,7 +88,7 @@ setCompressedRowLengths( ConstCompressedRowLengthsVectorView constRowLengths )
     IndexType strips = this->virtualRows / this->warpSize;
     this->rowPermArray.setSize( this->rows );
     this->groupPointers.setSize( strips * ( this->logWarpSize + 1 ) + 1 );
-    
+
     this->groupPointers.setValue( 0 );
 
     DeviceDependentCode::performRowBubbleSort( *this, rowLengths );
@@ -103,6 +103,16 @@ setCompressedRowLengths( ConstCompressedRowLengthsVectorView constRowLengths )
     return this->allocateMatrixElements( this->warpSize * this->groupPointers.getElement( strips * ( this->logWarpSize + 1 ) ) );
 }
 
+template< typename Real,
+          typename Device,
+          typename Index >
+void BiEllpack< Real, Device, Index >::getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const
+{
+   TNL_ASSERT_EQ( rowLengths.getSize(), this->getRows(), "invalid size of the rowLengths vector" );
+   for( IndexType row = 0; row < this->getRows(); row++ )
+      rowLengths.setElement( row, this->getRowLength( row ) );
+}
+
 template< typename Real,
           typename Device,
           typename Index >
@@ -128,7 +138,7 @@ Index BiEllpack< Real, Device, Index >::getNumberOfGroups( const IndexType row )
 	IndexType strip = row / this->warpSize;
 	IndexType rowStripPermutation = this->rowPermArray.getElement( row ) - this->warpSize * strip;
 	IndexType numberOfGroups = this->logWarpSize + 1;
-	IndexType bisection = 1;        
+	IndexType bisection = 1;
 	for( IndexType i = 0; i < this->logWarpSize + 1; i++ )
 	{
 		if( rowStripPermutation < bisection )
@@ -148,7 +158,7 @@ template< typename Real,
 		  typename Index >
 Index BiEllpack< Real, Device, Index >::getRowLength( const IndexType row ) const
 {
-	TNL_ASSERT( row >= 0 && row < this->getRows(), 
+	TNL_ASSERT( row >= 0 && row < this->getRows(),
                     std::cerr << "row = " << row << " this->getRows() = " << this->getRows() );
 
 	const IndexType strip = row / this->warpSize;
@@ -182,7 +192,7 @@ template< typename Real,
 			  typename Device2,
 			  typename Index2 >
 void BiEllpack< Real, Device, Index >::setLike( const BiEllpack< Real2, Device2, Index2 >& matrix )
-{        
+{
 	Sparse< Real, Device, Index >::setLike( matrix );
 	this->rowPermArray.setLike( matrix.rowPermArray );
 	this->groupPointers.setLike( matrix.groupPointers );
@@ -212,9 +222,9 @@ bool BiEllpack< Real, Device, Index >::operator == ( const BiEllpack< Real2, Dev
                     << " matrix.getRows() = " << matrix.getRows()
                     << " this->getColumns() = " << this->getColumns()
                     << " matrix.getColumns() = " << matrix.getColumns() );
-   
+
    TNL_ASSERT_TRUE( false, "operator == is not yet implemented for BiEllpack.");
-   
+
    // TODO: implement this
    return false;
 }
@@ -284,10 +294,10 @@ bool BiEllpack< Real, Device, Index >::addElement( const IndexType row,
                                                               const RealType& value,
                                                               const RealType& thisElementMultiplicator )
 {
-    const IndexType strip = row / this->warpSize;    
-    const IndexType groupBegin = strip * ( this->logWarpSize + 1 );    
-    const IndexType rowStripPerm = this->rowPermArray.getElement( row ) - strip * this->warpSize;    
-    IndexType elementPtr = this->groupPointers.getElement( groupBegin ) * this->warpSize + rowStripPerm;    
+    const IndexType strip = row / this->warpSize;
+    const IndexType groupBegin = strip * ( this->logWarpSize + 1 );
+    const IndexType rowStripPerm = this->rowPermArray.getElement( row ) - strip * this->warpSize;
+    IndexType elementPtr = this->groupPointers.getElement( groupBegin ) * this->warpSize + rowStripPerm;
     IndexType rowMultiplicator = 1;
     IndexType step = this->warpSize;
 
@@ -685,7 +695,7 @@ BiEllpack< Real, Device, Index >::operator=( const BiEllpack< Real2, Device2, In
                   "unknown device" );
    static_assert( std::is_same< Device2, Devices::Host >::value || std::is_same< Device2, Devices::Cuda >::value,
                   "unknown device" );
-   
+
    this->setLike( matrix );
    this->values = matrix.values;
    this->columnIndexes = matrix.columnIndexes;
@@ -777,14 +787,14 @@ void BiEllpack< Real, Device, Index >::printValues() const
 {
     for( Index i = 0; i < this->values.getSize(); i++ ) {
         if( this->columnIndexes.getElement( i ) != this->getColumns() )
-            std::cout << "values.getElement( " << i << " ) = " << this->values.getElement( i ) 
+            std::cout << "values.getElement( " << i << " ) = " << this->values.getElement( i )
              << "\tcolumnIndexes.getElement( " << i << " ) = " << this->columnIndexes.getElement( i ) << std::endl;
     }
-    
+
     for( Index i = 0; i < this->rowPermArray.getSize(); i++ ) {
         std::cout << "rowPermArray[ " << i << " ] = " << this->rowPermArray.getElement( i ) << std::endl;
     }
-    
+
     for( Index i = 0; i < this->groupPointers.getSize(); i++ ) {
         std::cout << "groupPointers[ " << i << " ] = " << this->groupPointers.getElement( i ) << std::endl;
     }
@@ -1146,7 +1156,7 @@ void BiEllpack< Real, Device, Index >::spmvCuda( const InVector& inVector,
     __syncthreads();
     if( warpStart + inWarpIdx >= this->getRows() )
         return;
-    
+
     outVector[ warpStart + inWarpIdx ] = results[ this->rowPermArray[ warpStart + inWarpIdx ] & ( cudaBlockSize - 1 ) ];
 }
 #endif
@@ -1321,7 +1331,7 @@ public:
                     const Index begin = matrix.groupPointers.getElement( groupBegin ) * matrix.warpSize + rowStripPerm * stripLength;
                     Index elementPtr = begin;
                     Index rowLength = 0;
-                    
+
                     for( Index group = 0; group < matrix.getNumberOfGroups( row ); group++ )
                     {
                         for( Index i = 0; i < matrix.getGroupLength( strip, group ); i++ )
diff --git a/src/TNL/Matrices/Legacy/CSR.h b/src/TNL/Matrices/Legacy/CSR.h
index b68434252..a31f3ee76 100644
--- a/src/TNL/Matrices/Legacy/CSR.h
+++ b/src/TNL/Matrices/Legacy/CSR.h
@@ -8,7 +8,7 @@
 
 /* See Copyright Notice in tnl/Copyright */
 
-#pragma once 
+#pragma once
 
 #include <TNL/Matrices/Legacy/Sparse.h>
 #include <TNL/Containers/Vector.h>
@@ -18,7 +18,7 @@
 
 namespace TNL {
 namespace Matrices {
-   
+
 #ifdef HAVE_UMFPACK
     template< typename Matrix, typename Preconditioner >
     class UmfpackWrapper;
@@ -48,6 +48,7 @@ public:
    using DeviceType = Device;
    using IndexType = Index;
    typedef typename Sparse< RealType, DeviceType, IndexType >::CompressedRowLengthsVector CompressedRowLengthsVector;
+   typedef typename Sparse< RealType, DeviceType, IndexType >::CompressedRowLengthsVectorView CompressedRowLengthsVectorView;
    typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView;
    typedef Sparse< Real, Device, Index > BaseType;
    using MatrixRow = typename BaseType::MatrixRow;
@@ -71,13 +72,15 @@ public:
 
    void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths );
 
+   void getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const;
+
    IndexType getRowLength( const IndexType row ) const;
 
    __cuda_callable__
    IndexType getRowLengthFast( const IndexType row ) const;
 
    IndexType getNonZeroRowLength( const IndexType row ) const;
-   
+
    __cuda_callable__
    IndexType getNonZeroRowLengthFast( const IndexType row ) const;
 
@@ -264,7 +267,7 @@ protected:
    int cudaWarpSize, hybridModeSplit;
 
    typedef CSRDeviceDependentCode< DeviceType > DeviceDependentCode;
-   
+
    friend class CSRDeviceDependentCode< DeviceType >;
    friend class CusparseCSR< RealType >;
 };
diff --git a/src/TNL/Matrices/Legacy/CSR_impl.h b/src/TNL/Matrices/Legacy/CSR_impl.h
index 08b35f563..5fec923f0 100644
--- a/src/TNL/Matrices/Legacy/CSR_impl.h
+++ b/src/TNL/Matrices/Legacy/CSR_impl.h
@@ -20,7 +20,7 @@
 #endif
 
 namespace TNL {
-namespace Matrices {   
+namespace Matrices {
 
 #ifdef HAVE_CUSPARSE
 template< typename Real, typename Index >
@@ -99,6 +99,16 @@ void CSR< Real, Device, Index >::setCompressedRowLengths( ConstCompressedRowLeng
    this->columnIndexes.setValue( this->columns );
 }
 
+template< typename Real,
+          typename Device,
+          typename Index >
+void CSR< Real, Device, Index >::getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const
+{
+   TNL_ASSERT_EQ( rowLengths.getSize(), this->getRows(), "invalid size of the rowLengths vector" );
+   for( IndexType row = 0; row < this->getRows(); row++ )
+      rowLengths.setElement( row, this->getRowLength( row ) );
+}
+
 template< typename Real,
           typename Device,
           typename Index >
@@ -131,7 +141,7 @@ template< typename Real,
           typename Index >
 __cuda_callable__
 Index CSR< Real, Device, Index >::getNonZeroRowLengthFast( const IndexType row ) const
-{  
+{
    ConstMatrixRow matrixRow = this->getRow( row );
    return matrixRow.getNonZeroElementsCount();
 }
@@ -884,10 +894,10 @@ template<>
 class tnlCusparseCSRWrapper< float, int >
 {
    public:
- 
+
       typedef float Real;
       typedef int Index;
- 
+
       static void vectorProduct( const Index rows,
                                  const Index columns,
                                  const Index nnz,
@@ -924,10 +934,10 @@ template<>
 class tnlCusparseCSRWrapper< double, int >
 {
    public:
- 
+
       typedef double Real;
       typedef int Index;
- 
+
       static void vectorProduct( const Index rows,
                                  const Index columns,
                                  const Index nnz,
diff --git a/src/TNL/Matrices/Legacy/Ellpack.h b/src/TNL/Matrices/Legacy/Ellpack.h
index 5f6e666f9..eea58b757 100644
--- a/src/TNL/Matrices/Legacy/Ellpack.h
+++ b/src/TNL/Matrices/Legacy/Ellpack.h
@@ -14,7 +14,7 @@
 #include <TNL/Containers/Vector.h>
 
 namespace TNL {
-namespace Matrices {   
+namespace Matrices {
 
 template< typename Device >
 class EllpackDeviceDependentCode;
@@ -37,6 +37,7 @@ public:
    typedef Index IndexType;
    typedef typename Sparse< RealType, DeviceType, IndexType >::CompressedRowLengthsVector CompressedRowLengthsVector;
    typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView;
+   typedef typename Sparse< RealType, DeviceType, IndexType >::CompressedRowLengthsVectorView CompressedRowLengthsVectorView;
    typedef typename Sparse< RealType, DeviceType, IndexType >::ValuesVector ValuesVector;
    typedef typename Sparse< RealType, DeviceType, IndexType >::ColumnIndexesVector ColumnIndexesVector;
    typedef Sparse< Real, Device, Index > BaseType;
@@ -59,20 +60,22 @@ public:
 
    void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths );
 
+   void getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const;
+
    void setConstantCompressedRowLengths( const IndexType& rowLengths );
 
    IndexType getRowLength( const IndexType row ) const;
 
    __cuda_callable__
    IndexType getRowLengthFast( const IndexType row ) const;
-   
+
    IndexType getNonZeroRowLength( const IndexType row ) const;
 
    template< typename Real2, typename Device2, typename Index2 >
    void setLike( const Ellpack< Real2, Device2, Index2 >& matrix );
 
    void reset();
- 
+
    template< typename Real2, typename Device2, typename Index2 >
    bool operator == ( const Ellpack< Real2, Device2, Index2 >& matrix ) const;
 
@@ -175,9 +178,9 @@ public:
 								const Vector& old_x,
 								Vector& x,
 								const RealType& omega ) const;
-   
+
    // copy assignment
-   Ellpack& operator=( const Ellpack& matrix );   
+   Ellpack& operator=( const Ellpack& matrix );
 
    // cross-device copy assignment
    template< typename Real2, typename Device2, typename Index2,
diff --git a/src/TNL/Matrices/Legacy/Ellpack_impl.h b/src/TNL/Matrices/Legacy/Ellpack_impl.h
index 656c3f7c2..04ca10385 100644
--- a/src/TNL/Matrices/Legacy/Ellpack_impl.h
+++ b/src/TNL/Matrices/Legacy/Ellpack_impl.h
@@ -33,7 +33,7 @@ String Ellpack< Real, Device, Index >::getSerializationType()
 {
    return String( "Matrices::Ellpack< " ) +
           String( TNL::getType< Real >() ) +
-          ", [any device], " + 
+          ", [any device], " +
           getType< Index >() +
           String( " >" );
 }
@@ -66,7 +66,7 @@ void Ellpack< Real, Device, Index >::setDimensions( const IndexType rows,
            IndexType missingRows = this->rows - this->alignedRows;
 
            missingRows = roundToMultiple( missingRows, Cuda::getWarpSize() );
-           
+
            this->alignedRows +=  missingRows;
        }
    }
@@ -86,10 +86,20 @@ void Ellpack< Real, Device, Index >::setCompressedRowLengths( ConstCompressedRow
    TNL_ASSERT_EQ( this->getRows(), rowLengths.getSize(), "wrong size of the rowLengths vector" );
 
    this->rowLengths = this->maxRowLength = max( rowLengths );
-   
+
    allocateElements();
 }
 
+template< typename Real,
+          typename Device,
+          typename Index >
+void Ellpack< Real, Device, Index >::getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const
+{
+   TNL_ASSERT_EQ( rowLengths.getSize(), this->getRows(), "invalid size of the rowLengths vector" );
+   for( IndexType row = 0; row < this->getRows(); row++ )
+      rowLengths.setElement( row, this->getRowLength( row ) );
+}
+
 template< typename Real,
           typename Device,
           typename Index >
@@ -769,13 +779,13 @@ template< typename Real,
 void Ellpack< Real, Device, Index >::allocateElements()
 {
    IndexType numMtxElmnts = this->alignedRows * this->rowLengths;
-   
+
    if( this->alignedRows != 0 )
    {
-       TNL_ASSERT_EQ( numMtxElmnts / this->alignedRows, this->rowLengths, 
+       TNL_ASSERT_EQ( numMtxElmnts / this->alignedRows, this->rowLengths,
                       "Ellpack cannot store this matrix. The number of matrix elements has overflown the value that IndexType is capable of storing" );
    }
-   
+
    Sparse< Real, Device, Index >::allocateMatrixElements( this->alignedRows * this->rowLengths );
 }
 
diff --git a/src/TNL/Matrices/Legacy/SlicedEllpack.h b/src/TNL/Matrices/Legacy/SlicedEllpack.h
index b79913b23..63b433087 100644
--- a/src/TNL/Matrices/Legacy/SlicedEllpack.h
+++ b/src/TNL/Matrices/Legacy/SlicedEllpack.h
@@ -66,6 +66,7 @@ public:
    typedef Index IndexType;
    typedef typename Sparse< RealType, DeviceType, IndexType >::CompressedRowLengthsVector CompressedRowLengthsVector;
    typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView;
+   typedef typename Sparse< RealType, DeviceType, IndexType >::CompressedRowLengthsVectorView CompressedRowLengthsVectorView;
    typedef typename Sparse< RealType, DeviceType, IndexType >::ValuesVector ValuesVector;
    typedef typename Sparse< RealType, DeviceType, IndexType >::ColumnIndexesVector ColumnIndexesVector;
    typedef Sparse< Real, Device, Index > BaseType;
@@ -89,6 +90,8 @@ public:
 
    void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths );
 
+   void getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const;
+
    IndexType getRowLength( const IndexType row ) const;
 
    __cuda_callable__
diff --git a/src/TNL/Matrices/Legacy/SlicedEllpack_impl.h b/src/TNL/Matrices/Legacy/SlicedEllpack_impl.h
index bfba092ff..8673a02c5 100644
--- a/src/TNL/Matrices/Legacy/SlicedEllpack_impl.h
+++ b/src/TNL/Matrices/Legacy/SlicedEllpack_impl.h
@@ -16,7 +16,7 @@
 #include <TNL/Exceptions/NotImplementedError.h>
 
 namespace TNL {
-namespace Matrices {   
+namespace Matrices {
 
 template< typename Real,
           typename Device,
@@ -83,6 +83,17 @@ void SlicedEllpack< Real, Device, Index, SliceSize >::setCompressedRowLengths( C
    this->allocateMatrixElements( this->slicePointers.getElement( slices ) );
 }
 
+template< typename Real,
+          typename Device,
+          typename Index,
+          int SliceSize >
+void SlicedEllpack< Real, Device, Index, SliceSize >::getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const
+{
+   TNL_ASSERT_EQ( rowLengths.getSize(), this->getRows(), "invalid size of the rowLengths vector" );
+   for( IndexType row = 0; row < this->getRows(); row++ )
+      rowLengths.setElement( row, this->getRowLength( row ) );
+}
+
 template< typename Real,
           typename Device,
           typename Index,
diff --git a/src/TNL/Matrices/Matrix.h b/src/TNL/Matrices/Matrix.h
index 3c0fd8a9b..4fce8358d 100644
--- a/src/TNL/Matrices/Matrix.h
+++ b/src/TNL/Matrices/Matrix.h
@@ -47,26 +47,15 @@ public:
            const IndexType columns,
            const RealAllocatorType& allocator = RealAllocatorType() );
 
-   virtual void setDimensions( const IndexType rows,
-                               const IndexType columns );
-
-   virtual void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ) = 0;
-
-   [[deprecated]]
-   virtual IndexType getRowLength( const IndexType row ) const = 0;
-
-   // TODO: implementation is not parallel
-   // TODO: it would be nice if padding zeros could be stripped
-   //void getCompressedRowLengths( CompressedRowLengthsVector& rowLengths ) const;
-
-   virtual void getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const;
+   void setDimensions( const IndexType rows,
+                       const IndexType columns );
 
    template< typename Matrix_ >
    void setLike( const Matrix_& matrix );
 
    IndexType getAllocatedElementsCount() const;
 
-   virtual IndexType getNumberOfNonzeroMatrixElements() const = 0;
+   IndexType getNumberOfNonzeroMatrixElements() const;
 
    void reset();
 
@@ -76,20 +65,6 @@ public:
    __cuda_callable__
    IndexType getColumns() const;
 
-   //virtual TODO: uncomment
-   void setElement( const IndexType row,
-                            const IndexType column,
-                            const RealType& value );// = 0;
-
-   //virtual TODO: uncomment
-   void addElement( const IndexType row,
-                            const IndexType column,
-                            const RealType& value,
-                            const RealType& thisElementMultiplicator = 1.0 );// = 0;
-
-   virtual Real getElement( const IndexType row,
-                            const IndexType column ) const = 0;
-
    const ValuesVectorType& getValues() const;
 
    ValuesVectorType& getValues();
diff --git a/src/TNL/Matrices/Matrix.hpp b/src/TNL/Matrices/Matrix.hpp
index 0236f94f7..84dc6ef47 100644
--- a/src/TNL/Matrices/Matrix.hpp
+++ b/src/TNL/Matrices/Matrix.hpp
@@ -56,27 +56,6 @@ void Matrix< Real, Device, Index, RealAllocator >::setDimensions( const IndexTyp
    this->columns = columns;
 }
 
-/*template< typename Real,
-          typename Device,
-          typename Index,
-          typename RealAllocator >
-void Matrix< Real, Device, Index, RealAllocator >::getCompressedRowLengths( CompressedRowLengthsVector& rowLengths ) const
-{
-   rowLengths.setSize( this->getRows() );
-   getCompressedRowLengths( rowLengths.getView() );
-}*/
-
-template< typename Real,
-          typename Device,
-          typename Index,
-          typename RealAllocator >
-void Matrix< Real, Device, Index, RealAllocator >::getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const
-{
-   TNL_ASSERT_EQ( rowLengths.getSize(), this->getRows(), "invalid size of the rowLengths vector" );
-   for( IndexType row = 0; row < this->getRows(); row++ )
-      rowLengths.setElement( row, this->getRowLength( row ) );
-}
-
 template< typename Real,
           typename Device,
           typename Index,
-- 
GitLab


From 8d13b8225bccc41087257f961e8f516bfbfaed75 Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Tue, 3 Mar 2020 13:35:37 +0100
Subject: [PATCH 175/179] Matrix methods for coloring marked as deprecated,
 method Matrix::help was erased.

---
 src/TNL/Matrices/Matrix.h | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/TNL/Matrices/Matrix.h b/src/TNL/Matrices/Matrix.h
index 4fce8358d..48e6ebf7c 100644
--- a/src/TNL/Matrices/Matrix.h
+++ b/src/TNL/Matrices/Matrix.h
@@ -84,16 +84,14 @@ public:
 
 
    // TODO: method for symmetric matrices, should not be in general Matrix interface
+   [[deprecated]]
    __cuda_callable__
    const IndexType& getNumberOfColors() const;
 
    // TODO: method for symmetric matrices, should not be in general Matrix interface
+   [[deprecated]]
    void computeColorsVector(Containers::Vector<Index, Device, Index> &colorsVector);
 
-   // TODO: what is this supposed to do?!?  There are redefinitions only in the
-   // EllpackSymmetricGraph and SlicedEllpackSymmetricGraph classes...
-   bool help( bool verbose = false ) { return true;};
-
    // TODO: copy should be done in the operator= and it should work the other way too
    void copyFromHostToCuda( Matrices::Matrix< Real, Devices::Host, Index >& matrix );
 
@@ -103,7 +101,10 @@ public:
 
    protected:
 
-   IndexType rows, columns, numberOfColors;
+   IndexType rows, columns;
+
+   // TODO: remove1
+   IndexType numberOfColors;
 
    ValuesVectorType values;
 };
-- 
GitLab


From 5777de50e8e7bedddcc688a7a88e9f333cdfeef6 Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Tue, 3 Mar 2020 16:37:46 +0100
Subject: [PATCH 176/179] Deleted unused method Matrix::getValuesSize().

---
 src/TNL/Matrices/Matrix.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/TNL/Matrices/Matrix.h b/src/TNL/Matrices/Matrix.h
index 48e6ebf7c..8e467b004 100644
--- a/src/TNL/Matrices/Matrix.h
+++ b/src/TNL/Matrices/Matrix.h
@@ -96,8 +96,8 @@ public:
    void copyFromHostToCuda( Matrices::Matrix< Real, Devices::Host, Index >& matrix );
 
    // TODO: missing implementation!
-   __cuda_callable__
-   Index getValuesSize() const;
+   //__cuda_callable__
+   //Index getValuesSize() const;
 
    protected:
 
-- 
GitLab


From 5b34b208296251963c1f3d010c6e3b69b4ade2f2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Tue, 3 Mar 2020 17:20:28 +0100
Subject: [PATCH 177/179] Deleted useless methods in Matrix and MatrixView.

---
 src/TNL/Matrices/Matrix.h       |  9 +---
 src/TNL/Matrices/Matrix.hpp     | 15 -------
 src/TNL/Matrices/MatrixView.h   | 35 ----------------
 src/TNL/Matrices/MatrixView.hpp | 74 ---------------------------------
 4 files changed, 1 insertion(+), 132 deletions(-)

diff --git a/src/TNL/Matrices/Matrix.h b/src/TNL/Matrices/Matrix.h
index 8e467b004..9ce1a109a 100644
--- a/src/TNL/Matrices/Matrix.h
+++ b/src/TNL/Matrices/Matrix.h
@@ -92,18 +92,11 @@ public:
    [[deprecated]]
    void computeColorsVector(Containers::Vector<Index, Device, Index> &colorsVector);
 
-   // TODO: copy should be done in the operator= and it should work the other way too
-   void copyFromHostToCuda( Matrices::Matrix< Real, Devices::Host, Index >& matrix );
-
-   // TODO: missing implementation!
-   //__cuda_callable__
-   //Index getValuesSize() const;
-
    protected:
 
    IndexType rows, columns;
 
-   // TODO: remove1
+   // TODO: remove
    IndexType numberOfColors;
 
    ValuesVectorType values;
diff --git a/src/TNL/Matrices/Matrix.hpp b/src/TNL/Matrices/Matrix.hpp
index 84dc6ef47..b7e000670 100644
--- a/src/TNL/Matrices/Matrix.hpp
+++ b/src/TNL/Matrices/Matrix.hpp
@@ -250,21 +250,6 @@ computeColorsVector(Containers::Vector<Index, Device, Index> &colorsVector)
     }
 }
 
-template< typename Real,
-          typename Device,
-          typename Index,
-          typename RealAllocator >
-void
-Matrix< Real, Device, Index, RealAllocator >::
-copyFromHostToCuda( Matrix< Real, Devices::Host, Index >& matrix )
-{
-    this->numberOfColors = matrix.getNumberOfColors();
-    this->columns = matrix.getColumns();
-    this->rows = matrix.getRows();
-
-    this->values.setSize( matrix.getValuesSize() );
-}
-
 #ifdef HAVE_CUDA
 template< typename Matrix,
           typename InVector,
diff --git a/src/TNL/Matrices/MatrixView.h b/src/TNL/Matrices/MatrixView.h
index cd1beda9c..d52b9a24a 100644
--- a/src/TNL/Matrices/MatrixView.h
+++ b/src/TNL/Matrices/MatrixView.h
@@ -49,14 +49,6 @@ public:
    __cuda_callable__
    MatrixView( const MatrixView& view ) = default;
 
-   virtual IndexType getRowLength( const IndexType row ) const = 0;
-
-   // TODO: implementation is not parallel
-   // TODO: it would be nice if padding zeros could be stripped
-   void getCompressedRowLengths( CompressedRowLengthsVector& rowLengths ) const;
-
-   virtual void getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const;
-
    IndexType getAllocatedElementsCount() const;
 
    virtual IndexType getNumberOfNonzeroMatrixElements() const;
@@ -67,24 +59,6 @@ public:
    __cuda_callable__
    IndexType getColumns() const;
 
-   /****
-    * TODO: The fast variants of the following methods cannot be virtual.
-    * If they were, they could not be used in the CUDA kernels. If CUDA allows it
-    * in the future and it does not slow down, declare them as virtual here.
-    */
-
-   virtual void setElement( const IndexType row,
-                            const IndexType column,
-                            const RealType& value ) = 0;
-
-   virtual void addElement( const IndexType row,
-                            const IndexType column,
-                            const RealType& value,
-                            const RealType& thisElementMultiplicator = 1.0 ) = 0;
-
-   virtual Real getElement( const IndexType row,
-                            const IndexType column ) const = 0;
-
    __cuda_callable__
    const ValuesView& getValues() const;
 
@@ -135,15 +109,6 @@ std::ostream& operator << ( std::ostream& str, const MatrixView< Real, Device, I
    return str;
 }
 
-/*
-template< typename Matrix,
-          typename InVector,
-          typename OutVector >
-void MatrixVectorProductCuda( const Matrix& matrix,
-                              const InVector& inVector,
-                              OutVector& outVector );
-*/
-
 } // namespace Matrices
 } // namespace TNL
 
diff --git a/src/TNL/Matrices/MatrixView.hpp b/src/TNL/Matrices/MatrixView.hpp
index 9fd73e519..dfac8f3af 100644
--- a/src/TNL/Matrices/MatrixView.hpp
+++ b/src/TNL/Matrices/MatrixView.hpp
@@ -42,29 +42,6 @@ MatrixView( const IndexType rows_,
 {
 }
 
-template< typename Real,
-          typename Device,
-          typename Index >
-void
-MatrixView< Real, Device, Index >::
-getCompressedRowLengths( CompressedRowLengthsVector& rowLengths ) const
-{
-   rowLengths.setSize( this->getRows() );
-   getCompressedRowLengths( rowLengths.getView() );
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-void
-MatrixView< Real, Device, Index >::
-getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const
-{
-   TNL_ASSERT_EQ( rowLengths.getSize(), this->getRows(), "invalid size of the rowLengths vector" );
-   for( IndexType row = 0; row < this->getRows(); row++ )
-      rowLengths.setElement( row, this->getRowLength( row ) );
-}
-
 template< typename Real,
           typename Device,
           typename Index >
@@ -244,56 +221,5 @@ computeColorsVector(Containers::Vector<Index, Device, Index> &colorsVector)
     }
 }
 
-/*
-#ifdef HAVE_CUDA
-template< typename Matrix,
-          typename InVector,
-          typename OutVector >
-__global__ void MatrixVectorProductCudaKernel( const Matrix* matrix,
-                                               const InVector* inVector,
-                                               OutVector* outVector,
-                                               int gridIdx )
-{
-   static_assert( std::is_same< typename Matrix::DeviceType, Devices::Cuda >::value, "" );
-   const typename Matrix::IndexType rowIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
-   if( rowIdx < matrix->getRows() )
-      ( *outVector )[ rowIdx ] = matrix->rowVectorProduct( rowIdx, *inVector );
-}
-#endif
-
-template< typename Matrix,
-          typename InVector,
-          typename OutVector >
-void MatrixVectorProductCuda( const Matrix& matrix,
-                                 const InVector& inVector,
-                                 OutVector& outVector )
-{
-#ifdef HAVE_CUDA
-   typedef typename Matrix::IndexType IndexType;
-   Matrix* kernel_this = Cuda::passToDevice( matrix );
-   InVector* kernel_inVector = Cuda::passToDevice( inVector );
-   OutVector* kernel_outVector = Cuda::passToDevice( outVector );
-   dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() );
-   const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x );
-   const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() );
-   for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
-   {
-      if( gridIdx == cudaGrids - 1 )
-         cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize();
-      MatrixVectorProductCudaKernel<<< cudaGridSize, cudaBlockSize >>>
-                                     ( kernel_this,
-                                       kernel_inVector,
-                                       kernel_outVector,
-                                       gridIdx );
-      TNL_CHECK_CUDA_DEVICE;
-   }
-   Cuda::freeFromDevice( kernel_this );
-   Cuda::freeFromDevice( kernel_inVector );
-   Cuda::freeFromDevice( kernel_outVector );
-   TNL_CHECK_CUDA_DEVICE;
-#endif
-}
-*/
-
 } // namespace Matrices
 } // namespace TNL
-- 
GitLab


From aeb3be4c38d021feeec5e7eca9e9a20e6c456a42 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Tue, 3 Mar 2020 17:21:12 +0100
Subject: [PATCH 178/179] Marked methods related to matrix coloring as
 deprecated. They will be moved to some other place.

---
 src/TNL/Matrices/MatrixView.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/TNL/Matrices/MatrixView.h b/src/TNL/Matrices/MatrixView.h
index d52b9a24a..895510181 100644
--- a/src/TNL/Matrices/MatrixView.h
+++ b/src/TNL/Matrices/MatrixView.h
@@ -89,10 +89,12 @@ public:
 
 
    // TODO: method for symmetric matrices, should not be in general Matrix interface
+   [[deprecated]]
    __cuda_callable__
    const IndexType& getNumberOfColors() const;
 
    // TODO: method for symmetric matrices, should not be in general Matrix interface
+   [[deprecated]]
    void computeColorsVector(Containers::Vector<Index, Device, Index> &colorsVector);
 
    protected:
-- 
GitLab


From 456375d0c8dd4caeb1d64776e81b45f0c48880e0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Tue, 3 Mar 2020 17:41:33 +0100
Subject: [PATCH 179/179] Deleted unused method
 Matrix::MatrixVectorProductCuda.

---
 src/TNL/Matrices/Matrix.h   |  7 ------
 src/TNL/Matrices/Matrix.hpp | 49 -------------------------------------
 2 files changed, 56 deletions(-)

diff --git a/src/TNL/Matrices/Matrix.h b/src/TNL/Matrices/Matrix.h
index 9ce1a109a..129a54cbe 100644
--- a/src/TNL/Matrices/Matrix.h
+++ b/src/TNL/Matrices/Matrix.h
@@ -109,13 +109,6 @@ std::ostream& operator << ( std::ostream& str, const Matrix< Real, Device, Index
    return str;
 }
 
-template< typename Matrix,
-          typename InVector,
-          typename OutVector >
-void MatrixVectorProductCuda( const Matrix& matrix,
-                              const InVector& inVector,
-                              OutVector& outVector );
-
 } // namespace Matrices
 } // namespace TNL
 
diff --git a/src/TNL/Matrices/Matrix.hpp b/src/TNL/Matrices/Matrix.hpp
index b7e000670..ce5f52274 100644
--- a/src/TNL/Matrices/Matrix.hpp
+++ b/src/TNL/Matrices/Matrix.hpp
@@ -250,54 +250,5 @@ computeColorsVector(Containers::Vector<Index, Device, Index> &colorsVector)
     }
 }
 
-#ifdef HAVE_CUDA
-template< typename Matrix,
-          typename InVector,
-          typename OutVector >
-__global__ void MatrixVectorProductCudaKernel( const Matrix* matrix,
-                                                  const InVector* inVector,
-                                                  OutVector* outVector,
-                                                  int gridIdx )
-{
-   static_assert( std::is_same< typename Matrix::DeviceType, Devices::Cuda >::value, "" );
-   const typename Matrix::IndexType rowIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
-   if( rowIdx < matrix->getRows() )
-      ( *outVector )[ rowIdx ] = matrix->rowVectorProduct( rowIdx, *inVector );
-}
-#endif
-
-template< typename Matrix,
-          typename InVector,
-          typename OutVector >
-void MatrixVectorProductCuda( const Matrix& matrix,
-                                 const InVector& inVector,
-                                 OutVector& outVector )
-{
-#ifdef HAVE_CUDA
-   typedef typename Matrix::IndexType IndexType;
-   Matrix* kernel_this = Cuda::passToDevice( matrix );
-   InVector* kernel_inVector = Cuda::passToDevice( inVector );
-   OutVector* kernel_outVector = Cuda::passToDevice( outVector );
-   dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() );
-   const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x );
-   const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() );
-   for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
-   {
-      if( gridIdx == cudaGrids - 1 )
-         cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize();
-      MatrixVectorProductCudaKernel<<< cudaGridSize, cudaBlockSize >>>
-                                     ( kernel_this,
-                                       kernel_inVector,
-                                       kernel_outVector,
-                                       gridIdx );
-      TNL_CHECK_CUDA_DEVICE;
-   }
-   Cuda::freeFromDevice( kernel_this );
-   Cuda::freeFromDevice( kernel_inVector );
-   Cuda::freeFromDevice( kernel_outVector );
-   TNL_CHECK_CUDA_DEVICE;
-#endif
-}
-
 } // namespace Matrices
 } // namespace TNL
-- 
GitLab