From 16f3e12af5a32bd121e86d13af5bb59c3f5bb0b1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Sun, 5 Jan 2020 20:57:18 +0100
Subject: [PATCH] Fixing DenseMatrix unit tests.

---
 src/TNL/Containers/Segments/Ellpack.hpp  |  10 +-
 src/TNL/Matrices/Dense.h                 |   4 +-
 src/TNL/Matrices/Dense.hpp               |  72 ++-----
 src/TNL/Matrices/DenseMatrixView.h       |  18 +-
 src/TNL/Matrices/DenseMatrixView.hpp     | 162 ++++++---------
 src/TNL/Matrices/details/DenseMatrix.h   |  67 +++++++
 src/UnitTests/Matrices/DenseMatrixTest.h | 240 ++++++++++-------------
 7 files changed, 270 insertions(+), 303 deletions(-)
 create mode 100644 src/TNL/Matrices/details/DenseMatrix.h

diff --git a/src/TNL/Containers/Segments/Ellpack.hpp b/src/TNL/Containers/Segments/Ellpack.hpp
index 97a256c9e2..8763c2e5dc 100644
--- a/src/TNL/Containers/Segments/Ellpack.hpp
+++ b/src/TNL/Containers/Segments/Ellpack.hpp
@@ -306,7 +306,7 @@ void
 Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >::
 segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
 {
-   using RealType = decltype( fetch( IndexType(), IndexType(), std::declval< bool& >(), args... ) );
+   using RealType = decltype( fetch( IndexType(), IndexType(), IndexType(), std::declval< bool& >(), args... ) );
    if( RowMajorOrder )
    {
       const IndexType segmentSize = this->segmentSize;
@@ -315,8 +315,8 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red
          const IndexType end = begin + segmentSize;
          RealType aux( zero );
          bool compute( true );
-         for( IndexType j = begin; j < end && compute; j++  )
-            reduction( aux, fetch( i, j, compute, args... ) );
+         for( IndexType j = begin, localIdx = 0; j < end && compute; j++, localIdx++  )
+            reduction( aux, fetch( i, localIdx, j, compute, args... ) );
          keeper( i, aux );
       };
       Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
@@ -330,8 +330,8 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red
          const IndexType end = storageSize;
          RealType aux( zero );
          bool compute( true );
-         for( IndexType j = begin; j < end && compute; j += alignedSize  )
-            reduction( aux, fetch( i, j, compute, args... ) );
+         for( IndexType j = begin, localIdx = 0; j < end && compute; j += alignedSize, localIdx++  )
+            reduction( aux, fetch( i, localIdx, j, compute, args... ) );
          keeper( i, aux );
       };
       Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
diff --git a/src/TNL/Matrices/Dense.h b/src/TNL/Matrices/Dense.h
index 18249a7b16..90aa57170b 100644
--- a/src/TNL/Matrices/Dense.h
+++ b/src/TNL/Matrices/Dense.h
@@ -48,8 +48,8 @@ class Dense : public Matrix< Real, Device, Index >
       using ValuesViewType = typename ValuesType::ViewType;
       using SegmentsType = Containers::Segments::Ellpack< DeviceType, IndexType, typename Allocators::Default< Device >::template Allocator< IndexType >, RowMajorOrder, 1 >;
       using SegmentViewType = typename SegmentsType::SegmentViewType;
-      using ViewType = DenseMatrixView< Real, Device, Index, MatrixType, SegmentsViewTemplate >;
-      using ConstViewType = DenseMatrixView< typename std::add_const< Real >::type, Device, Index, MatrixType, SegmentsViewTemplate >;
+      using ViewType = DenseMatrixView< Real, Device, Index, RowMajorOrder >;
+      using ConstViewType = DenseMatrixView< typename std::add_const< Real >::type, Device, Index, RowMajorOrder >;
       using RowView = DenseMatrixRowView< SegmentViewType, ValuesViewType >;
 
       // TODO: remove this
diff --git a/src/TNL/Matrices/Dense.hpp b/src/TNL/Matrices/Dense.hpp
index 85f1b560d0..fe11d67593 100644
--- a/src/TNL/Matrices/Dense.hpp
+++ b/src/TNL/Matrices/Dense.hpp
@@ -41,10 +41,9 @@ template< typename Real,
           typename Device,
           typename Index,
           bool RowMajorOrder,
-          template< typename, typename, typename > class Segments,
           typename RealAllocator >
 auto
-Dense< Real, Device, Index, RowMajorOrder, Segments, RealAllocator >::
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
 getView() -> ViewType
 {
    return ViewType( this->getRows(), 
@@ -57,10 +56,9 @@ template< typename Real,
           typename Device,
           typename Index,
           bool RowMajorOrder,
-          template< typename, typename, typename > class Segments,
           typename RealAllocator >
 auto
-Dense< Real, Device, Index, RowMajorOrder, Segments, RealAllocator >::
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
 getConstView() const -> ConstViewType
 {
    return ConstViewType( this->getRows(),
@@ -451,8 +449,9 @@ template< typename Real,
           typename RealAllocator >
    template< typename InVector,
              typename OutVector >
-void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::vectorProduct( const InVector& inVector,
-                                                           OutVector& outVector ) const
+void 
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
+vectorProduct( const InVector& inVector, OutVector& outVector ) const
 {
    TNL_ASSERT( this->getColumns() == inVector.getSize(),
             std::cerr << "Matrix columns: " << this->getColumns() << std::endl
@@ -461,7 +460,20 @@ void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::vectorProduct(
                std::cerr << "Matrix rows: " << this->getRows() << std::endl
                     << "Vector size: " << outVector.getSize() << std::endl );
 
-   DeviceDependentCode::vectorProduct( *this, inVector, outVector );
+   //DeviceDependentCode::vectorProduct( *this, inVector, outVector );
+   const auto inVectorView = inVector.getConstView();
+   auto outVectorView = outVector.getView();
+   const auto valuesView = this->values.getConstView();
+   auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType column, IndexType offset, bool& compute ) -> RealType {
+      return valuesView[ offset ] * inVectorView[ column ];
+   };
+   auto reduction = [] __cuda_callable__ ( RealType& sum, const RealType& value ) {
+      sum += value;
+   };
+   auto keeper = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable {
+      outVectorView[ row ] = value;
+   };
+   this->segments.segmentsReduction( 0, this->getRows(), fetch, reduction, keeper, ( RealType ) 0.0 );
 }
 
 template< typename Real,
@@ -1051,51 +1063,5 @@ Index Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getElementInde
    return this->segments.getGlobalIndex( row, column );
 }
 
-template<>
-class DenseDeviceDependentCode< Devices::Host >
-{
-   public:
-
-      typedef Devices::Host Device;
-
-      template< typename Real,
-                typename Index,
-                bool RowMajorOrder,
-                typename RealAllocator,
-                typename InVector,
-                typename OutVector >
-      static void vectorProduct( const Dense< Real, Device, Index, RowMajorOrder, RealAllocator >& matrix,
-                                 const InVector& inVector,
-                                 OutVector& outVector )
-      {
-#ifdef HAVE_OPENMP
-#pragma omp parallel for if( Devices::Host::isOMPEnabled() )
-#endif
-         for( Index row = 0; row < matrix.getRows(); row ++ )
-            outVector[ row ] = matrix.rowVectorProduct( row, inVector );
-      }
-};
-
-template<>
-class DenseDeviceDependentCode< Devices::Cuda >
-{
-   public:
-
-      typedef Devices::Cuda Device;
-
-      template< typename Real,
-                typename Index,
-                bool RowMajorOrder,
-                typename RealAllocator,
-                typename InVector,
-                typename OutVector >
-      static void vectorProduct( const Dense< Real, Device, Index, RowMajorOrder, RealAllocator >& matrix,
-                                 const InVector& inVector,
-                                 OutVector& outVector )
-      {
-         MatrixVectorProductCuda( matrix, inVector, outVector );
-      }
-};
-
 } // namespace Matrices
 } // namespace TNL
diff --git a/src/TNL/Matrices/DenseMatrixView.h b/src/TNL/Matrices/DenseMatrixView.h
index 2334eb6364..23f5d73178 100644
--- a/src/TNL/Matrices/DenseMatrixView.h
+++ b/src/TNL/Matrices/DenseMatrixView.h
@@ -14,14 +14,11 @@
 #include <TNL/Devices/Host.h>
 #include <TNL/Matrices/DenseMatrixRowView.h>
 #include <TNL/Matrices/MatrixView.h>
-#include <TNL/Containers/Segments/EllpackView.h>
+#include <TNL/Containers/Segments/Ellpack.h>
 
 namespace TNL {
 namespace Matrices {
 
-//template< typename Device >
-//class DenseDeviceDependentCode;
-
 template< typename Real = double,
           typename Device = Devices::Host,
           typename Index = int,
@@ -48,6 +45,9 @@ class DenseMatrixView : public MatrixView< Real, Device, Index >
       using SegmentsViewType = typename SegmentsType::ViewType;
       using SegmentViewType = typename SegmentsType::SegmentViewType;
       using RowView = DenseMatrixRowView< SegmentViewType, ValuesViewType >;
+      using ViewType = DenseMatrixView< Real, Device, Index, RowMajorOrder >;
+      using ConstViewType = DenseMatrixView< typename std::add_const< Real >::type, Device, Index, RowMajorOrder >;
+
 
       // TODO: remove this
       using CompressedRowLengthsVector = typename Matrix< Real, Device, Index >::CompressedRowLengthsVector;
@@ -56,7 +56,7 @@ class DenseMatrixView : public MatrixView< Real, Device, Index >
       template< typename _Real = Real,
                 typename _Device = Device,
                 typename _Index = Index >
-      using Self = Dense< _Real, _Device, _Index >;
+      using Self = DenseMatrixView< _Real, _Device, _Index >;
 
       __cuda_callable__
       DenseMatrixView();
@@ -172,12 +172,12 @@ class DenseMatrixView : public MatrixView< Real, Device, Index >
                                 const RealType& omega = 1.0 ) const;
 
       // copy assignment
-      Dense& operator=( const Dense& matrix );
+      DenseMatrixView& operator=( const DenseMatrixView& matrix );
 
       // cross-device copy assignment
       template< typename Real2, typename Device2, typename Index2,
                 typename = typename Enabler< Device2 >::type >
-      Dense& operator=( const Dense< Real2, Device2, Index2 >& matrix );
+      DenseMatrixView& operator=( const DenseMatrixView< Real2, Device2, Index2 >& matrix );
 
       void save( const String& fileName ) const;
 
@@ -195,8 +195,8 @@ class DenseMatrixView : public MatrixView< Real, Device, Index >
       IndexType getElementIndex( const IndexType row,
                                  const IndexType column ) const;
 
-      typedef DenseDeviceDependentCode< DeviceType > DeviceDependentCode;
-      friend class DenseDeviceDependentCode< DeviceType >;
+      //typedef DenseDeviceDependentCode< DeviceType > DeviceDependentCode;
+      //friend class DenseDeviceDependentCode< DeviceType >;
 
       SegmentsViewType segments;
 };
diff --git a/src/TNL/Matrices/DenseMatrixView.hpp b/src/TNL/Matrices/DenseMatrixView.hpp
index 18d6574ac3..08cfab8437 100644
--- a/src/TNL/Matrices/DenseMatrixView.hpp
+++ b/src/TNL/Matrices/DenseMatrixView.hpp
@@ -20,10 +20,9 @@ namespace Matrices {
 template< typename Real,
           typename Device,
           typename Index,
-          typename MatrixType,
-          template< typename, typename > class SegmentsView >
+          bool RowMajorOrder >
 __cuda_callable__
-DenseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+DenseMatrixView< Real, Device, Index, RowMajorOrder >::
 DenseMatrixView()
 {
 }
@@ -31,27 +30,24 @@ DenseMatrixView()
 template< typename Real,
           typename Device,
           typename Index,
-          typename MatrixType,
-          template< typename, typename > class SegmentsView >
+          bool RowMajorOrder >
 __cuda_callable__
-DenseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+DenseMatrixView< Real, Device, Index, RowMajorOrder >::
 DenseMatrixView( const IndexType rows,
-                  const IndexType columns,
-                  const ValuesViewType& values,
-                  const ColumnsIndexesViewType& columnIndexes,
-                  const SegmentsViewType& segments )
- : MatrixView< Real, Device, Index >( rows, columns, values ), columnIndexes( columnIndexes ), segments( segments )
+                 const IndexType columns,
+                 const ValuesViewType& values,
+                 const SegmentsViewType& segments )
+ : MatrixView< Real, Device, Index >( rows, columns, values ), segments( segments )
 {
 }
 
 template< typename Real,
           typename Device,
           typename Index,
-          typename MatrixType,
-          template< typename, typename > class SegmentsView >
+          bool RowMajorOrder >
 __cuda_callable__
 auto
-DenseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+DenseMatrixView< Real, Device, Index, RowMajorOrder >::
 getView() -> ViewType
 {
    return ViewType( this->getRows(), 
@@ -64,11 +60,10 @@ getView() -> ViewType
 template< typename Real,
           typename Device,
           typename Index,
-          typename MatrixType,
-          template< typename, typename > class SegmentsView >
+          bool RowMajorOrder >
 __cuda_callable__
 auto
-DenseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+DenseMatrixView< Real, Device, Index, RowMajorOrder >::
 getConstView() const -> ConstViewType
 {
    return ConstViewType( this->getRows(),
@@ -95,8 +90,7 @@ getSerializationType()
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
 String
 DenseMatrixView< Real, Device, Index, RowMajorOrder >::
 getSerializationTypeVirtual() const
@@ -107,8 +101,7 @@ getSerializationTypeVirtual() const
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
    template< typename Vector >
 void
 DenseMatrixView< Real, Device, Index, RowMajorOrder >::
@@ -132,8 +125,7 @@ getCompressedRowLengths( Vector& rowLengths ) const
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
 Index DenseMatrixView< Real, Device, Index, RowMajorOrder >::getRowLength( const IndexType row ) const
 {
    return this->getColumns();
@@ -142,8 +134,7 @@ Index DenseMatrixView< Real, Device, Index, RowMajorOrder >::getRowLength( const
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
 Index DenseMatrixView< Real, Device, Index, RowMajorOrder >::getMaxRowLength() const
 {
    return this->getColumns();
@@ -152,8 +143,7 @@ Index DenseMatrixView< Real, Device, Index, RowMajorOrder >::getMaxRowLength() c
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
 Index DenseMatrixView< Real, Device, Index, RowMajorOrder >::getNumberOfMatrixElements() const
 {
    return this->getRows() * this->getColumns();
@@ -162,8 +152,7 @@ Index DenseMatrixView< Real, Device, Index, RowMajorOrder >::getNumberOfMatrixEl
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
 Index DenseMatrixView< Real, Device, Index, RowMajorOrder >::getNumberOfNonzeroMatrixElements() const
 {
    const auto values_view = this->values.getConstView();
@@ -176,8 +165,7 @@ Index DenseMatrixView< Real, Device, Index, RowMajorOrder >::getNumberOfNonzeroM
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
 void DenseMatrixView< Real, Device, Index, RowMajorOrder >::reset()
 {
    Matrix< Real, Device, Index >::reset();
@@ -186,8 +174,7 @@ void DenseMatrixView< Real, Device, Index, RowMajorOrder >::reset()
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
 void DenseMatrixView< Real, Device, Index, RowMajorOrder >::setValue( const Real& value )
 {
    this->values = value;
@@ -196,8 +183,7 @@ void DenseMatrixView< Real, Device, Index, RowMajorOrder >::setValue( const Real
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
 __cuda_callable__ auto
 DenseMatrixView< Real, Device, Index, RowMajorOrder >::
 getRow( const IndexType& rowIdx ) const -> const RowView
@@ -209,8 +195,7 @@ getRow( const IndexType& rowIdx ) const -> const RowView
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
 __cuda_callable__ auto
 DenseMatrixView< Real, Device, Index, RowMajorOrder >::
 getRow( const IndexType& rowIdx ) -> RowView
@@ -222,8 +207,7 @@ getRow( const IndexType& rowIdx ) -> RowView
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
 __cuda_callable__
 Real& DenseMatrixView< Real, Device, Index, RowMajorOrder >::operator()( const IndexType row,
                                                 const IndexType column )
@@ -239,8 +223,7 @@ Real& DenseMatrixView< Real, Device, Index, RowMajorOrder >::operator()( const I
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
 __cuda_callable__
 const Real& DenseMatrixView< Real, Device, Index, RowMajorOrder >::operator()( const IndexType row,
                                                       const IndexType column ) const
@@ -256,8 +239,7 @@ const Real& DenseMatrixView< Real, Device, Index, RowMajorOrder >::operator()( c
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
 bool DenseMatrixView< Real, Device, Index, RowMajorOrder >::setElement( const IndexType row,
                                                const IndexType column,
                                                const RealType& value )
@@ -269,8 +251,7 @@ bool DenseMatrixView< Real, Device, Index, RowMajorOrder >::setElement( const In
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
 bool DenseMatrixView< Real, Device, Index, RowMajorOrder >::addElement( const IndexType row,
                                                         const IndexType column,
                                                         const RealType& value,
@@ -289,8 +270,7 @@ bool DenseMatrixView< Real, Device, Index, RowMajorOrder >::addElement( const In
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
 Real 
 DenseMatrixView< Real, Device, Index, RowMajorOrder >::
 getElement( const IndexType row,
@@ -302,8 +282,7 @@ getElement( const IndexType row,
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
    template< typename Fetch, typename Reduce, typename Keep, typename FetchValue >
 void
 DenseMatrixView< Real, Device, Index, RowMajorOrder >::
@@ -320,8 +299,7 @@ rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Ke
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
    template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
 void
 DenseMatrixView< Real, Device, Index, RowMajorOrder >::
@@ -333,8 +311,7 @@ allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zer
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
    template< typename Function >
 void
 DenseMatrixView< Real, Device, Index, RowMajorOrder >::
@@ -352,8 +329,7 @@ forRows( IndexType first, IndexType last, Function& function ) const
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
    template< typename Function >
 void
 DenseMatrixView< Real, Device, Index, RowMajorOrder >::
@@ -371,8 +347,7 @@ forRows( IndexType first, IndexType last, Function& function )
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
    template< typename Function >
 void
 DenseMatrixView< Real, Device, Index, RowMajorOrder >::
@@ -384,8 +359,7 @@ forAllRows( Function& function ) const
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
    template< typename Function >
 void
 DenseMatrixView< Real, Device, Index, RowMajorOrder >::
@@ -397,8 +371,7 @@ forAllRows( Function& function )
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
    template< typename Vector >
 __cuda_callable__
 typename Vector::RealType DenseMatrixView< Real, Device, Index, RowMajorOrder >::rowVectorProduct( const IndexType row,
@@ -414,8 +387,7 @@ typename Vector::RealType DenseMatrixView< Real, Device, Index, RowMajorOrder >:
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
    template< typename InVector,
              typename OutVector >
 void DenseMatrixView< Real, Device, Index, RowMajorOrder >::vectorProduct( const InVector& inVector,
@@ -428,14 +400,13 @@ void DenseMatrixView< Real, Device, Index, RowMajorOrder >::vectorProduct( const
                std::cerr << "Matrix rows: " << this->getRows() << std::endl
                     << "Vector size: " << outVector.getSize() << std::endl );
 
-   DeviceDependentCode::vectorProduct( *this, inVector, outVector );
+   //DeviceDependentCode::vectorProduct( *this, inVector, outVector );
 }
 
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
    template< typename Matrix >
 void DenseMatrixView< Real, Device, Index, RowMajorOrder >::addMatrix( const Matrix& matrix,
                                               const RealType& matrixMultiplicator,
@@ -454,7 +425,7 @@ void DenseMatrixView< Real, Device, Index, RowMajorOrder >::addMatrix( const Mat
       this->values = thisMatrixMultiplicator * this->values + matrixMultiplicator * matrix.values;
 }
 
-#ifdef HAVE_CUDA
+#ifdef HAVE_CUDA_______________
 template< typename Real,
           typename Index,
           bool RowMajorOrder,
@@ -463,7 +434,7 @@ template< typename Real,
           typename Matrix2,
           int tileDim,
           int tileRowBlockSize >
-__global__ void DenseMatrixProductKernel( Dense< Real, Devices::Cuda, Index >* resultMatrix,
+__global__ void DenseMatrixProductKernel( Dense< Real, Devices::Cuda, Index, RowMajorOrder >* resultMatrix,
                                                    const Matrix1* matrixA,
                                                    const Matrix2* matrixB,
                                                    const Real matrixAMultiplicator,
@@ -558,8 +529,7 @@ __global__ void DenseMatrixProductKernel( Dense< Real, Devices::Cuda, Index >* r
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
    template< typename Matrix1, typename Matrix2, int tileDim >
 void DenseMatrixView< Real, Device, Index, RowMajorOrder >::getMatrixProduct( const Matrix1& matrix1,
                                                               const Matrix2& matrix2,
@@ -599,7 +569,7 @@ void DenseMatrixView< Real, Device, Index, RowMajorOrder >::getMatrixProduct( co
    if( std::is_same< Device, Devices::Cuda >::value )
    {
 #ifdef HAVE_CUDA
-      dim3 cudaBlockSize( 0 ), cudaGridSize( 0 );
+      /*dim3 cudaBlockSize( 0 ), cudaGridSize( 0 );
       const IndexType matrixProductCudaBlockSize( 256 );
       const IndexType rowTiles = roundUpDivision( this->getRows(), tileDim );
       const IndexType columnTiles = roundUpDivision( this->getColumns(), tileDim );
@@ -640,12 +610,12 @@ void DenseMatrixView< Real, Device, Index, RowMajorOrder >::getMatrixProduct( co
             Cuda::freeFromDevice( this_kernel );
             Cuda::freeFromDevice( matrix1_kernel );
             Cuda::freeFromDevice( matrix2_kernel );
-         }
+         }*/
 #endif
    }
 }
 
-#ifdef HAVE_CUDA
+#ifdef HAVE_CUDA________________________
 template< typename Real,
           typename Index,
           typename Matrix,
@@ -802,8 +772,7 @@ __global__ void DenseTranspositionNonAlignedKernel( Dense< Real, Devices::Cuda,
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
    template< typename Matrix, int tileDim >
 void DenseMatrixView< Real, Device, Index, RowMajorOrder >::getTransposition( const Matrix& matrix,
                                                               const RealType& matrixMultiplicator )
@@ -828,7 +797,7 @@ void DenseMatrixView< Real, Device, Index, RowMajorOrder >::getTransposition( co
    if( std::is_same< Device, Devices::Cuda >::value )
    {
 #ifdef HAVE_CUDA
-      dim3 cudaBlockSize( 0 ), cudaGridSize( 0 );
+      /*dim3 cudaBlockSize( 0 ), cudaGridSize( 0 );
       const IndexType matrixProductCudaBlockSize( 256 );
       const IndexType rowTiles = roundUpDivision( this->getRows(), tileDim );
       const IndexType columnTiles = roundUpDivision( this->getColumns(), tileDim );
@@ -887,7 +856,7 @@ void DenseMatrixView< Real, Device, Index, RowMajorOrder >::getTransposition( co
             TNL_CHECK_CUDA_DEVICE;
          }
       Cuda::freeFromDevice( this_device );
-      Cuda::freeFromDevice( matrix_device );
+      Cuda::freeFromDevice( matrix_device );*/
 #endif
    }
 }
@@ -895,8 +864,7 @@ void DenseMatrixView< Real, Device, Index, RowMajorOrder >::getTransposition( co
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
    template< typename Vector1, typename Vector2 >
 void DenseMatrixView< Real, Device, Index, RowMajorOrder >::performSORIteration( const Vector1& b,
                                                         const IndexType row,
@@ -919,10 +887,9 @@ void DenseMatrixView< Real, Device, Index, RowMajorOrder >::performSORIteration(
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
 DenseMatrixView< Real, Device, Index, RowMajorOrder >&
-DenseMatrixView< Real, Device, Index, RowMajorOrder >::operator=( const Dense& matrix )
+DenseMatrixView< Real, Device, Index, RowMajorOrder >::operator=( const DenseMatrixView& matrix )
 {
    this->setLike( matrix );
    this->values = matrix.values;
@@ -933,11 +900,10 @@ DenseMatrixView< Real, Device, Index, RowMajorOrder >::operator=( const Dense& m
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
    template< typename Real2, typename Device2, typename Index2, typename >
 DenseMatrixView< Real, Device, Index, RowMajorOrder >&
-DenseMatrixView< Real, Device, Index, RowMajorOrder >::operator=( const Dense< Real2, Device2, Index2 >& matrix )
+DenseMatrixView< Real, Device, Index, RowMajorOrder >::operator=( const DenseMatrixView< Real2, Device2, Index2 >& matrix )
 {
    static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Cuda >::value,
                   "unknown device" );
@@ -953,8 +919,7 @@ DenseMatrixView< Real, Device, Index, RowMajorOrder >::operator=( const Dense< R
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
 void DenseMatrixView< Real, Device, Index, RowMajorOrder >::save( const String& fileName ) const
 {
    Object::save( fileName );
@@ -963,8 +928,7 @@ void DenseMatrixView< Real, Device, Index, RowMajorOrder >::save( const String&
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
 void DenseMatrixView< Real, Device, Index, RowMajorOrder >::load( const String& fileName )
 {
    Object::load( fileName );
@@ -973,28 +937,25 @@ void DenseMatrixView< Real, Device, Index, RowMajorOrder >::load( const String&
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
 void DenseMatrixView< Real, Device, Index, RowMajorOrder >::save( File& file ) const
 {
-   Matrix< Real, Device, Index >::save( file );
+   MatrixView< Real, Device, Index >::save( file );
 }
 
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
 void DenseMatrixView< Real, Device, Index, RowMajorOrder >::load( File& file )
 {
-   Matrix< Real, Device, Index >::load( file );
+   MatrixView< Real, Device, Index >::load( file );
 }
 
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
 void DenseMatrixView< Real, Device, Index, RowMajorOrder >::print( std::ostream& str ) const
 {
    for( IndexType row = 0; row < this->getRows(); row++ )
@@ -1009,8 +970,7 @@ void DenseMatrixView< Real, Device, Index, RowMajorOrder >::print( std::ostream&
 template< typename Real,
           typename Device,
           typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
+          bool RowMajorOrder >
 __cuda_callable__
 Index DenseMatrixView< Real, Device, Index, RowMajorOrder >::getElementIndex( const IndexType row,
                                                               const IndexType column ) const
@@ -1018,7 +978,7 @@ Index DenseMatrixView< Real, Device, Index, RowMajorOrder >::getElementIndex( co
    return this->segments.getGlobalIndex( row, column );
 }
 
-template<>
+/*template<>
 class DenseDeviceDependentCode< Devices::Host >
 {
    public:
@@ -1062,7 +1022,7 @@ class DenseDeviceDependentCode< Devices::Cuda >
       {
          MatrixVectorProductCuda( matrix, inVector, outVector );
       }
-};
+};*/
 
 } // namespace Matrices
 } // namespace TNL
diff --git a/src/TNL/Matrices/details/DenseMatrix.h b/src/TNL/Matrices/details/DenseMatrix.h
new file mode 100644
index 0000000000..813e58bc44
--- /dev/null
+++ b/src/TNL/Matrices/details/DenseMatrix.h
@@ -0,0 +1,67 @@
+/***************************************************************************
+                          DenseMatrix.h  -  description
+                             -------------------
+    begin                : Jan 5, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+namespace TNL {
+   namespace Matrices {
+      namespace details {
+
+template< typename Device >
+class DenseDeviceDependentCode;
+template<>
+class DenseDeviceDependentCode< Devices::Host >
+{
+   public:
+
+      typedef Devices::Host Device;
+
+      template< typename Real,
+                typename Index,
+                bool RowMajorOrder,
+                typename RealAllocator,
+                typename InVector,
+                typename OutVector >
+      static void vectorProduct( const DenseMatrixView< Real, Device, Index, RowMajorOrder >& matrix,
+                                 const InVector& inVector,
+                                 OutVector& outVector )
+      {
+#ifdef HAVE_OPENMP
+#pragma omp parallel for if( Devices::Host::isOMPEnabled() )
+#endif
+         for( Index row = 0; row < matrix.getRows(); row ++ )
+            outVector[ row ] = matrix.rowVectorProduct( row, inVector );
+      }
+};
+
+template<>
+class DenseDeviceDependentCode< Devices::Cuda >
+{
+   public:
+
+      typedef Devices::Cuda Device;
+
+      template< typename Real,
+                typename Index,
+                bool RowMajorOrder,
+                typename RealAllocator,
+                typename InVector,
+                typename OutVector >
+      static void vectorProduct( const DenseMatrixView< Real, Device, Index, RowMajorOrder >& matrix,
+                                 const InVector& inVector,
+                                 OutVector& outVector )
+      {
+         MatrixVectorProductCuda( matrix, inVector, outVector );
+      }
+};
+
+      } //namespace details
+   } //namepsace Matrices
+} //namespace TNL
\ No newline at end of file
diff --git a/src/UnitTests/Matrices/DenseMatrixTest.h b/src/UnitTests/Matrices/DenseMatrixTest.h
index c0f9b92ff0..897861f7fa 100644
--- a/src/UnitTests/Matrices/DenseMatrixTest.h
+++ b/src/UnitTests/Matrices/DenseMatrixTest.h
@@ -12,8 +12,6 @@
 #include <TNL/Matrices/Matrix.h>
 #include <TNL/Matrices/Dense.h>
 #include <TNL/Containers/Array.h>
-#include <TNL/Pointers/SharedPointer.h>
-#include <TNL/Pointers/SmartPointersRegister.h>
 
 #include <TNL/Containers/Vector.h>
 #include <TNL/Containers/VectorView.h>
@@ -576,17 +574,16 @@ void test_SetRow()
    const IndexType rows = 3;
    const IndexType cols = 7;
 
-   TNL::Pointers::SharedPointer< Matrix > m;
-   m->reset();
-   m->setDimensions( rows, cols );
+   Matrix m;
+   m.reset();
+   m.setDimensions( rows, cols );
 
    RealType value = 1;
    for( IndexType i = 0; i < rows; i++ )
       for( IndexType j = 0; j < cols; j++ )
-         m->setElement( i, j, value++ );
+         m.setElement( i, j, value++ );
 
-   // TODO: replace this with dense matrix view
-   Matrix* m_ptr = &m.template modifyData< DeviceType >();
+   auto matrix_view = m.getView();
    auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
       RealType values[ 3 ][ 5 ] {
          { 11, 11, 11, 11, 11 },
@@ -596,36 +593,35 @@ void test_SetRow()
          { 0, 1, 2, 3, 4 },
          { 0, 1, 2, 3, 4 },
          { 2, 3, 4, 5, 6 } };
-      auto row = m_ptr->getRow( rowIdx );
-      //for( IndexType i = 0; i < 5; i++ )
-      ///   row.setElement( rowIdx, i ); //columnIndexes[ rowIdx ][ i ], values[ rowIdx ][ i ] );
+      auto row = matrix_view.getRow( rowIdx );
+      for( IndexType i = 0; i < 5; i++ )
+        row.setElement( columnIndexes[ rowIdx ][ i ], values[ rowIdx ][ i ] );
    };
-   TNL::Pointers::synchronizeSmartPointersOnDevice< DeviceType >();
    TNL::Algorithms::ParallelFor< DeviceType >::exec( 0, 3, f );
 
-   EXPECT_EQ( m->getElement( 0, 0 ), 11 );
-   EXPECT_EQ( m->getElement( 0, 1 ), 11 );
-   EXPECT_EQ( m->getElement( 0, 2 ), 11 );
-   EXPECT_EQ( m->getElement( 0, 3 ), 11 );
-   EXPECT_EQ( m->getElement( 0, 4 ), 11 );
-   EXPECT_EQ( m->getElement( 0, 5 ),  6 );
-   EXPECT_EQ( m->getElement( 0, 6 ),  7 );
-
-   EXPECT_EQ( m->getElement( 1, 0 ), 22 );
-   EXPECT_EQ( m->getElement( 1, 1 ), 22 );
-   EXPECT_EQ( m->getElement( 1, 2 ), 22 );
-   EXPECT_EQ( m->getElement( 1, 3 ), 22 );
-   EXPECT_EQ( m->getElement( 1, 4 ), 22 );
-   EXPECT_EQ( m->getElement( 1, 5 ), 13 );
-   EXPECT_EQ( m->getElement( 1, 6 ), 14 );
-
-   EXPECT_EQ( m->getElement( 2, 0 ), 15 );
-   EXPECT_EQ( m->getElement( 2, 1 ), 16 );
-   EXPECT_EQ( m->getElement( 2, 2 ), 33 );
-   EXPECT_EQ( m->getElement( 2, 3 ), 33 );
-   EXPECT_EQ( m->getElement( 2, 4 ), 33 );
-   EXPECT_EQ( m->getElement( 2, 5 ), 33 );
-   EXPECT_EQ( m->getElement( 2, 6 ), 33 );
+   EXPECT_EQ( m.getElement( 0, 0 ), 11 );
+   EXPECT_EQ( m.getElement( 0, 1 ), 11 );
+   EXPECT_EQ( m.getElement( 0, 2 ), 11 );
+   EXPECT_EQ( m.getElement( 0, 3 ), 11 );
+   EXPECT_EQ( m.getElement( 0, 4 ), 11 );
+   EXPECT_EQ( m.getElement( 0, 5 ),  6 );
+   EXPECT_EQ( m.getElement( 0, 6 ),  7 );
+
+   EXPECT_EQ( m.getElement( 1, 0 ), 22 );
+   EXPECT_EQ( m.getElement( 1, 1 ), 22 );
+   EXPECT_EQ( m.getElement( 1, 2 ), 22 );
+   EXPECT_EQ( m.getElement( 1, 3 ), 22 );
+   EXPECT_EQ( m.getElement( 1, 4 ), 22 );
+   EXPECT_EQ( m.getElement( 1, 5 ), 13 );
+   EXPECT_EQ( m.getElement( 1, 6 ), 14 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ), 15 );
+   EXPECT_EQ( m.getElement( 2, 1 ), 16 );
+   EXPECT_EQ( m.getElement( 2, 2 ), 33 );
+   EXPECT_EQ( m.getElement( 2, 3 ), 33 );
+   EXPECT_EQ( m.getElement( 2, 4 ), 33 );
+   EXPECT_EQ( m.getElement( 2, 5 ), 33 );
+   EXPECT_EQ( m.getElement( 2, 6 ), 33 );
 }
 
 template< typename Matrix >
@@ -648,49 +644,49 @@ void test_AddRow()
    const IndexType rows = 6;
    const IndexType cols = 5;
 
-   TNL::Pointers::SharedPointer< Matrix > m( rows, cols );
+   Matrix m( rows, cols );
 
    RealType value = 1;
    for( IndexType i = 0; i < rows; i++ )
       for( IndexType j = 0; j < cols; j++ )
-         m->setElement( i, j, value++ );
+         m.setElement( i, j, value++ );
 
    // Check the added elements
-   EXPECT_EQ( m->getElement( 0, 0 ),  1 );
-   EXPECT_EQ( m->getElement( 0, 1 ),  2 );
-   EXPECT_EQ( m->getElement( 0, 2 ),  3 );
-   EXPECT_EQ( m->getElement( 0, 3 ),  4 );
-   EXPECT_EQ( m->getElement( 0, 4 ),  5 );
-
-   EXPECT_EQ( m->getElement( 1, 0 ),  6 );
-   EXPECT_EQ( m->getElement( 1, 1 ),  7 );
-   EXPECT_EQ( m->getElement( 1, 2 ),  8 );
-   EXPECT_EQ( m->getElement( 1, 3 ),  9 );
-   EXPECT_EQ( m->getElement( 1, 4 ), 10 );
-
-   EXPECT_EQ( m->getElement( 2, 0 ), 11 );
-   EXPECT_EQ( m->getElement( 2, 1 ), 12 );
-   EXPECT_EQ( m->getElement( 2, 2 ), 13 );
-   EXPECT_EQ( m->getElement( 2, 3 ), 14 );
-   EXPECT_EQ( m->getElement( 2, 4 ), 15 );
-
-   EXPECT_EQ( m->getElement( 3, 0 ), 16 );
-   EXPECT_EQ( m->getElement( 3, 1 ), 17 );
-   EXPECT_EQ( m->getElement( 3, 2 ), 18 );
-   EXPECT_EQ( m->getElement( 3, 3 ), 19 );
-   EXPECT_EQ( m->getElement( 3, 4 ), 20 );
-
-   EXPECT_EQ( m->getElement( 4, 0 ), 21 );
-   EXPECT_EQ( m->getElement( 4, 1 ), 22 );
-   EXPECT_EQ( m->getElement( 4, 2 ), 23 );
-   EXPECT_EQ( m->getElement( 4, 3 ), 24 );
-   EXPECT_EQ( m->getElement( 4, 4 ), 25 );
-
-   EXPECT_EQ( m->getElement( 5, 0 ), 26 );
-   EXPECT_EQ( m->getElement( 5, 1 ), 27 );
-   EXPECT_EQ( m->getElement( 5, 2 ), 28 );
-   EXPECT_EQ( m->getElement( 5, 3 ), 29 );
-   EXPECT_EQ( m->getElement( 5, 4 ), 30 );
+   EXPECT_EQ( m.getElement( 0, 0 ),  1 );
+   EXPECT_EQ( m.getElement( 0, 1 ),  2 );
+   EXPECT_EQ( m.getElement( 0, 2 ),  3 );
+   EXPECT_EQ( m.getElement( 0, 3 ),  4 );
+   EXPECT_EQ( m.getElement( 0, 4 ),  5 );
+
+   EXPECT_EQ( m.getElement( 1, 0 ),  6 );
+   EXPECT_EQ( m.getElement( 1, 1 ),  7 );
+   EXPECT_EQ( m.getElement( 1, 2 ),  8 );
+   EXPECT_EQ( m.getElement( 1, 3 ),  9 );
+   EXPECT_EQ( m.getElement( 1, 4 ), 10 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ), 11 );
+   EXPECT_EQ( m.getElement( 2, 1 ), 12 );
+   EXPECT_EQ( m.getElement( 2, 2 ), 13 );
+   EXPECT_EQ( m.getElement( 2, 3 ), 14 );
+   EXPECT_EQ( m.getElement( 2, 4 ), 15 );
+
+   EXPECT_EQ( m.getElement( 3, 0 ), 16 );
+   EXPECT_EQ( m.getElement( 3, 1 ), 17 );
+   EXPECT_EQ( m.getElement( 3, 2 ), 18 );
+   EXPECT_EQ( m.getElement( 3, 3 ), 19 );
+   EXPECT_EQ( m.getElement( 3, 4 ), 20 );
+
+   EXPECT_EQ( m.getElement( 4, 0 ), 21 );
+   EXPECT_EQ( m.getElement( 4, 1 ), 22 );
+   EXPECT_EQ( m.getElement( 4, 2 ), 23 );
+   EXPECT_EQ( m.getElement( 4, 3 ), 24 );
+   EXPECT_EQ( m.getElement( 4, 4 ), 25 );
+
+   EXPECT_EQ( m.getElement( 5, 0 ), 26 );
+   EXPECT_EQ( m.getElement( 5, 1 ), 27 );
+   EXPECT_EQ( m.getElement( 5, 2 ), 28 );
+   EXPECT_EQ( m.getElement( 5, 3 ), 29 );
+   EXPECT_EQ( m.getElement( 5, 4 ), 30 );
 
    // Add new elements to the old elements with a multiplying factor applied to the old elements.
    /*
@@ -704,26 +700,7 @@ void test_AddRow()
     *    \ 78 81 84 87 90 /
     */
 
-    RealType row0 [ 5 ] = { 11, 11, 11, 11, 0 }; IndexType colIndexes0 [ 5 ] = { 0, 1, 2, 3, 4 };
-    RealType row1 [ 5 ] = { 22, 22, 22, 22, 0 }; IndexType colIndexes1 [ 5 ] = { 0, 1, 2, 3, 4 };
-    RealType row2 [ 5 ] = { 33, 33, 33, 33, 0 }; IndexType colIndexes2 [ 5 ] = { 0, 1, 2, 3, 4 };
-    RealType row3 [ 5 ] = { 44, 44, 44, 44, 0 }; IndexType colIndexes3 [ 5 ] = { 0, 1, 2, 3, 4 };
-    RealType row4 [ 5 ] = { 55, 55, 55, 55, 0 }; IndexType colIndexes4 [ 5 ] = { 0, 1, 2, 3, 4 };
-    RealType row5 [ 5 ] = { 66, 66, 66, 66, 0 }; IndexType colIndexes5 [ 5 ] = { 0, 1, 2, 3, 4 };
-    
-    IndexType row = 0;
-    IndexType elements = 5;
-    RealType thisRowMultiplicator = 0;
-    
-    // TODO: Fix this
-    /*m.addRow( row++, colIndexes0, row0, elements, thisRowMultiplicator++ );
-    m.addRow( row++, colIndexes1, row1, elements, thisRowMultiplicator++ );
-    m.addRow( row++, colIndexes2, row2, elements, thisRowMultiplicator++ );
-    m.addRow( row++, colIndexes3, row3, elements, thisRowMultiplicator++ );
-    m.addRow( row++, colIndexes4, row4, elements, thisRowMultiplicator++ );
-    m.addRow( row++, colIndexes5, row5, elements, thisRowMultiplicator++ );*/
-
-   Matrix* m_ptr = &m.template modifyData< DeviceType >();
+   auto matrix_view = m.getView();
    auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
       RealType values[ 6 ][ 5 ] {
          { 11, 11, 11, 11, 0 },
@@ -732,52 +709,51 @@ void test_AddRow()
          { 44, 44, 44, 44, 0 },
          { 55, 55, 55, 55, 0 },
          { 66, 66, 66, 66, 0 } };
-      auto row = m_ptr->getRow( rowIdx );
+      auto row = matrix_view.getRow( rowIdx );
       for( IndexType i = 0; i < 5; i++ )
       {
          RealType& val = row.getValue( i );
          val = rowIdx * val + values[ rowIdx ][ i ];
       }
    };
-   TNL::Pointers::synchronizeSmartPointersOnDevice< DeviceType >();
    TNL::Algorithms::ParallelFor< DeviceType >::exec( 0, 6, f );
 
     
-    EXPECT_EQ( m->getElement( 0, 0 ),  11 );
-    EXPECT_EQ( m->getElement( 0, 1 ),  11 );
-    EXPECT_EQ( m->getElement( 0, 2 ),  11 );
-    EXPECT_EQ( m->getElement( 0, 3 ),  11 );
-    EXPECT_EQ( m->getElement( 0, 4 ),   0 );
-    
-    EXPECT_EQ( m->getElement( 1, 0 ),  28 );
-    EXPECT_EQ( m->getElement( 1, 1 ),  29 );
-    EXPECT_EQ( m->getElement( 1, 2 ),  30 );
-    EXPECT_EQ( m->getElement( 1, 3 ),  31 );
-    EXPECT_EQ( m->getElement( 1, 4 ),  10 );
-    
-    EXPECT_EQ( m->getElement( 2, 0 ),  55 );
-    EXPECT_EQ( m->getElement( 2, 1 ),  57 );
-    EXPECT_EQ( m->getElement( 2, 2 ),  59 );
-    EXPECT_EQ( m->getElement( 2, 3 ),  61 );
-    EXPECT_EQ( m->getElement( 2, 4 ),  30 );
-    
-    EXPECT_EQ( m->getElement( 3, 0 ),  92 );
-    EXPECT_EQ( m->getElement( 3, 1 ),  95 );
-    EXPECT_EQ( m->getElement( 3, 2 ),  98 );
-    EXPECT_EQ( m->getElement( 3, 3 ), 101 );
-    EXPECT_EQ( m->getElement( 3, 4 ),  60 );
-    
-    EXPECT_EQ( m->getElement( 4, 0 ), 139 );
-    EXPECT_EQ( m->getElement( 4, 1 ), 143 );
-    EXPECT_EQ( m->getElement( 4, 2 ), 147 );
-    EXPECT_EQ( m->getElement( 4, 3 ), 151 );
-    EXPECT_EQ( m->getElement( 4, 4 ), 100 );
-    
-    EXPECT_EQ( m->getElement( 5, 0 ), 196 );
-    EXPECT_EQ( m->getElement( 5, 1 ), 201 );
-    EXPECT_EQ( m->getElement( 5, 2 ), 206 );
-    EXPECT_EQ( m->getElement( 5, 3 ), 211 );
-    EXPECT_EQ( m->getElement( 5, 4 ), 150 );
+    EXPECT_EQ( m.getElement( 0, 0 ),  11 );
+    EXPECT_EQ( m.getElement( 0, 1 ),  11 );
+    EXPECT_EQ( m.getElement( 0, 2 ),  11 );
+    EXPECT_EQ( m.getElement( 0, 3 ),  11 );
+    EXPECT_EQ( m.getElement( 0, 4 ),   0 );
+    
+    EXPECT_EQ( m.getElement( 1, 0 ),  28 );
+    EXPECT_EQ( m.getElement( 1, 1 ),  29 );
+    EXPECT_EQ( m.getElement( 1, 2 ),  30 );
+    EXPECT_EQ( m.getElement( 1, 3 ),  31 );
+    EXPECT_EQ( m.getElement( 1, 4 ),  10 );
+    
+    EXPECT_EQ( m.getElement( 2, 0 ),  55 );
+    EXPECT_EQ( m.getElement( 2, 1 ),  57 );
+    EXPECT_EQ( m.getElement( 2, 2 ),  59 );
+    EXPECT_EQ( m.getElement( 2, 3 ),  61 );
+    EXPECT_EQ( m.getElement( 2, 4 ),  30 );
+    
+    EXPECT_EQ( m.getElement( 3, 0 ),  92 );
+    EXPECT_EQ( m.getElement( 3, 1 ),  95 );
+    EXPECT_EQ( m.getElement( 3, 2 ),  98 );
+    EXPECT_EQ( m.getElement( 3, 3 ), 101 );
+    EXPECT_EQ( m.getElement( 3, 4 ),  60 );
+    
+    EXPECT_EQ( m.getElement( 4, 0 ), 139 );
+    EXPECT_EQ( m.getElement( 4, 1 ), 143 );
+    EXPECT_EQ( m.getElement( 4, 2 ), 147 );
+    EXPECT_EQ( m.getElement( 4, 3 ), 151 );
+    EXPECT_EQ( m.getElement( 4, 4 ), 100 );
+    
+    EXPECT_EQ( m.getElement( 5, 0 ), 196 );
+    EXPECT_EQ( m.getElement( 5, 1 ), 201 );
+    EXPECT_EQ( m.getElement( 5, 2 ), 206 );
+    EXPECT_EQ( m.getElement( 5, 3 ), 211 );
+    EXPECT_EQ( m.getElement( 5, 4 ), 150 );
 }
 
 template< typename Matrix >
@@ -1263,8 +1239,6 @@ void test_SaveAndLoad()
     EXPECT_EQ( savedMatrix.getElement( 3, 1 ), 14 );
     EXPECT_EQ( savedMatrix.getElement( 3, 2 ), 15 );
     EXPECT_EQ( savedMatrix.getElement( 3, 3 ), 16 );
-    
-    std::cout << "\nThis will create a file called '" << TEST_FILE_NAME << "' (of the matrix created in the test function), in .../tnl-dev/Debug/bin/\n\n";
 }
 
 template< typename Matrix >
@@ -1432,12 +1406,12 @@ TYPED_TEST( MatrixTest, addRowTest )
     test_AddRow< MatrixType >();
 }
 
-/*TYPED_TEST( MatrixTest, vectorProductTest )
+TYPED_TEST( MatrixTest, vectorProductTest )
 {
     using MatrixType = typename TestFixture::MatrixType;
     
     test_VectorProduct< MatrixType >();
-}*/
+}
 
 TYPED_TEST( MatrixTest, addMatrixTest )
 {
-- 
GitLab