diff --git a/src/TNL/Matrices/Dense.h b/src/TNL/Matrices/Dense.h
index c469927234cd835bef7bcfe36599a47cb843b6cc..3fc6d890899096f3f4f4716f27d1f68856a2786e 100644
--- a/src/TNL/Matrices/Dense.h
+++ b/src/TNL/Matrices/Dense.h
@@ -10,6 +10,7 @@
 
 #pragma once
 
+#include <TNL/Allocators/Default.h>
 #include <TNL/Devices/Host.h>
 #include <TNL/Matrices/Matrix.h>
 #include <TNL/Matrices/DenseRow.h>
@@ -23,7 +24,9 @@ class DenseDeviceDependentCode;
 
 template< typename Real = double,
           typename Device = Devices::Host,
-          typename Index = int >
+          typename Index = int,
+          bool RowMajorOrder = std::is_same< Device, Devices::Host >::value,
+          typename RealAllocator = typename Allocators::Default< Device >::template Allocator< Real > >
 class Dense : public Matrix< Real, Device, Index >
 {
 private:
@@ -32,17 +35,17 @@ private:
    using Enabler = std::enable_if< ! std::is_same< Device2, Device >::value >;
 
    // friend class will be needed for templated assignment operators
-   template< typename Real2, typename Device2, typename Index2 >
-   friend class Dense;
+   //template< typename Real2, typename Device2, typename Index2 >
+   //friend class Dense;
 
 public:
-   typedef Real RealType;
-   typedef Device DeviceType;
-   typedef Index IndexType;
-   typedef typename Matrix< Real, Device, Index >::CompressedRowLengthsVector CompressedRowLengthsVector;
-   typedef typename Matrix< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView;
-   typedef Matrix< Real, Device, Index > BaseType;
-   typedef DenseRow< Real, Index > MatrixRow;
+   using RealType = Real;
+   using DeviceType = Device;
+   using IndexType = Index;
+   using CompressedRowLengthsVector = typename Matrix< Real, Device, Index >::CompressedRowLengthsVector;
+   using ConstCompressedRowLengthsVectorView = typename Matrix< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView;
+   using BaseType = Matrix< Real, Device, Index >;
+   using MatrixRow = DenseRow< Real, Index >;
 
    template< typename _Real = Real,
              typename _Device = Device,
@@ -58,23 +61,17 @@ public:
    void setDimensions( const IndexType rows,
                        const IndexType columns );
 
-   template< typename Real2, typename Device2, typename Index2 >
-   void setLike( const Dense< Real2, Device2, Index2 >& matrix );
+   template< typename Matrix >
+   void setLike( const Matrix& matrix );
 
    /****
     * This method is only for the compatibility with the sparse matrices.
     */
    void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths );
 
-   /****
-    * Returns maximal number of the nonzero matrix elements that can be stored
-    * in a given row.
-    */
+   [[deprecated]]
    IndexType getRowLength( const IndexType row ) const;
 
-   __cuda_callable__
-   IndexType getRowLengthFast( const IndexType row ) const;
-
    IndexType getMaxRowLength() const;
 
    IndexType getNumberOfMatrixElements() const;
@@ -220,4 +217,4 @@ protected:
 } // namespace Matrices
 } // namespace TNL
 
-#include <TNL/Matrices/Dense_impl.h>
+#include <TNL/Matrices/Dense.hpp>
diff --git a/src/TNL/Matrices/Dense.hpp b/src/TNL/Matrices/Dense.hpp
index 246bd09edb459e6df9749af9d1589f508c2c5806..70e5018ddeb59af078665a3f8302b24791f12dfe 100644
--- a/src/TNL/Matrices/Dense.hpp
+++ b/src/TNL/Matrices/Dense.hpp
@@ -19,15 +19,19 @@ namespace Matrices {
 
 template< typename Real,
           typename Device,
-          typename Index >
-Dense< Real, Device, Index >::Dense()
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::Dense()
 {
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-String Dense< Real, Device, Index >::getSerializationType()
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+String Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getSerializationType()
 {
    return String( "Matrices::Dense< " ) +
           getType< RealType >() + ", " +
@@ -37,16 +41,20 @@ String Dense< Real, Device, Index >::getSerializationType()
 
 template< typename Real,
           typename Device,
-          typename Index >
-String Dense< Real, Device, Index >::getSerializationTypeVirtual() const
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+String Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getSerializationTypeVirtual() const
 {
    return this->getSerializationType();
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-void Dense< Real, Device, Index >::setDimensions( const IndexType rows,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::setDimensions( const IndexType rows,
                                                   const IndexType columns )
 {
    Matrix< Real, Device, Index >::setDimensions( rows, columns );
@@ -56,59 +64,71 @@ void Dense< Real, Device, Index >::setDimensions( const IndexType rows,
 
 template< typename Real,
           typename Device,
-          typename Index >
-   template< typename Real2,
-             typename Device2,
-             typename Index2 >
-void Dense< Real, Device, Index >::setLike( const Dense< Real2, Device2, Index2 >& matrix )
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+   template< typename Matrix_ >
+void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::setLike( const Matrix_& matrix )
 {
-   this->setDimensions( matrix.getRows(), matrix.getColumns() );
+   Matrix< Real, Device, Index, RealAllocator >::setLike( matrix );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-void Dense< Real, Device, Index >::setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths )
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths )
 {
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-Index Dense< Real, Device, Index >::getRowLength( const IndexType row ) const
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+Index Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getRowLength( const IndexType row ) const
 {
    return this->getColumns();
 }
 
-template< typename Real,
+/*template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
 __cuda_callable__
-Index Dense< Real, Device, Index >::getRowLengthFast( const IndexType row ) const
+Index Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getRowLengthFast( const IndexType row ) const
 {
    return this->getColumns();
-}
+}*/
 
 template< typename Real,
           typename Device,
-          typename Index >
-Index Dense< Real, Device, Index >::getMaxRowLength() const
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+Index Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getMaxRowLength() const
 {
    return this->getColumns();
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-Index Dense< Real, Device, Index >::getNumberOfMatrixElements() const
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+Index Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getNumberOfMatrixElements() const
 {
    return this->getRows() * this->getColumns();
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-Index Dense< Real, Device, Index >::getNumberOfNonzeroMatrixElements() const
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+Index Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getNumberOfNonzeroMatrixElements() const
 {
    IndexType nonzeroElements( 0 );
    for( IndexType row = 0; row < this->getRows(); row++ )
@@ -120,8 +140,10 @@ Index Dense< Real, Device, Index >::getNumberOfNonzeroMatrixElements() const
 
 template< typename Real,
           typename Device,
-          typename Index >
-void Dense< Real, Device, Index >::reset()
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::reset()
 {
    Matrix< Real, Device, Index >::reset();
    this->values.reset();
@@ -129,8 +151,10 @@ void Dense< Real, Device, Index >::reset()
 
 template< typename Real,
           typename Device,
-          typename Index >
-void Dense< Real, Device, Index >::setValue( const Real& value )
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::setValue( const Real& value )
 {
    this->values.setValue( value );
 }
@@ -138,9 +162,11 @@ void Dense< Real, Device, Index >::setValue( const Real& value )
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
 __cuda_callable__
-Real& Dense< Real, Device, Index >::operator()( const IndexType row,
+Real& Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::operator()( const IndexType row,
                                                 const IndexType column )
 {
    TNL_ASSERT_GE( row, 0, "Row index must be non-negative." );
@@ -153,9 +179,11 @@ Real& Dense< Real, Device, Index >::operator()( const IndexType row,
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
 __cuda_callable__
-const Real& Dense< Real, Device, Index >::operator()( const IndexType row,
+const Real& Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::operator()( const IndexType row,
                                                       const IndexType column ) const
 {
    TNL_ASSERT_GE( row, 0, "Row index must be non-negative." );
@@ -169,9 +197,11 @@ const Real& Dense< Real, Device, Index >::operator()( const IndexType row,
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
 __cuda_callable__
-bool Dense< Real, Device, Index >::setElementFast( const IndexType row,
+bool Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::setElementFast( const IndexType row,
                                                             const IndexType column,
                                                             const RealType& value )
 {
@@ -186,8 +216,10 @@ bool Dense< Real, Device, Index >::setElementFast( const IndexType row,
 
 template< typename Real,
           typename Device,
-          typename Index >
-bool Dense< Real, Device, Index >::setElement( const IndexType row,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+bool Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::setElement( const IndexType row,
                                                const IndexType column,
                                                const RealType& value )
 {
@@ -198,9 +230,11 @@ bool Dense< Real, Device, Index >::setElement( const IndexType row,
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
 __cuda_callable__
-bool Dense< Real, Device, Index >::addElementFast( const IndexType row,
+bool Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::addElementFast( const IndexType row,
                                                    const IndexType column,
                                                    const RealType& value,
                                                    const RealType& thisElementMultiplicator )
@@ -221,8 +255,10 @@ bool Dense< Real, Device, Index >::addElementFast( const IndexType row,
 
 template< typename Real,
           typename Device,
-          typename Index >
-bool Dense< Real, Device, Index >::addElement( const IndexType row,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+bool Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::addElement( const IndexType row,
                                                         const IndexType column,
                                                         const RealType& value,
                                                         const RealType& thisElementMultiplicator )
@@ -240,9 +276,11 @@ bool Dense< Real, Device, Index >::addElement( const IndexType row,
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
 __cuda_callable__
-bool Dense< Real, Device, Index >::setRowFast( const IndexType row,
+bool Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::setRowFast( const IndexType row,
                                                         const IndexType* columns,
                                                         const RealType* values,
                                                         const IndexType elements )
@@ -257,8 +295,10 @@ bool Dense< Real, Device, Index >::setRowFast( const IndexType row,
 
 template< typename Real,
           typename Device,
-          typename Index >
-bool Dense< Real, Device, Index >::setRow( const IndexType row,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+bool Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::setRow( const IndexType row,
                                                     const IndexType* columns,
                                                     const RealType* values,
                                                     const IndexType elements )
@@ -273,9 +313,11 @@ bool Dense< Real, Device, Index >::setRow( const IndexType row,
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
 __cuda_callable__
-bool Dense< Real, Device, Index >::addRowFast( const IndexType row,
+bool Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::addRowFast( const IndexType row,
                                                         const IndexType* columns,
                                                         const RealType* values,
                                                         const IndexType elements,
@@ -292,8 +334,10 @@ bool Dense< Real, Device, Index >::addRowFast( const IndexType row,
 
 template< typename Real,
           typename Device,
-          typename Index >
-bool Dense< Real, Device, Index >::addRow( const IndexType row,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+bool Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::addRow( const IndexType row,
                                                     const IndexType* columns,
                                                     const RealType* values,
                                                     const IndexType elements,
@@ -311,9 +355,11 @@ bool Dense< Real, Device, Index >::addRow( const IndexType row,
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
 __cuda_callable__
-const Real& Dense< Real, Device, Index >::getElementFast( const IndexType row,
+const Real& Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getElementFast( const IndexType row,
                                                           const IndexType column ) const
 {
    TNL_ASSERT_GE( row, 0, "Row index must be non-negative." );
@@ -326,8 +372,10 @@ const Real& Dense< Real, Device, Index >::getElementFast( const IndexType row,
 
 template< typename Real,
           typename Device,
-          typename Index >
-Real Dense< Real, Device, Index >::getElement( const IndexType row,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+Real Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getElement( const IndexType row,
                                                         const IndexType column ) const
 {
    return this->values.getElement( this->getElementIndex( row, column ) );
@@ -335,9 +383,11 @@ Real Dense< Real, Device, Index >::getElement( const IndexType row,
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
 __cuda_callable__
-void Dense< Real, Device, Index >::getRowFast( const IndexType row,
+void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getRowFast( const IndexType row,
                                                         IndexType* columns,
                                                         RealType* values ) const
 {
@@ -350,10 +400,12 @@ void Dense< Real, Device, Index >::getRowFast( const IndexType row,
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
 __cuda_callable__
-typename Dense< Real, Device, Index >::MatrixRow
-Dense< Real, Device, Index >::
+typename Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::MatrixRow
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
 getRow( const IndexType rowIndex )
 {
    if( std::is_same< Device, Devices::Host >::value )
@@ -368,10 +420,12 @@ getRow( const IndexType rowIndex )
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
 __cuda_callable__
-const typename Dense< Real, Device, Index >::MatrixRow
-Dense< Real, Device, Index >::
+const typename Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::MatrixRow
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
 getRow( const IndexType rowIndex ) const
 {
    if( std::is_same< Device, Devices::Host >::value )
@@ -386,10 +440,12 @@ getRow( const IndexType rowIndex ) const
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
    template< typename Vector >
 __cuda_callable__
-typename Vector::RealType Dense< Real, Device, Index >::rowVectorProduct( const IndexType row,
+typename Vector::RealType Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::rowVectorProduct( const IndexType row,
                                                                                    const Vector& vector ) const
 {
    RealType sum( 0.0 );
@@ -400,10 +456,12 @@ typename Vector::RealType Dense< Real, Device, Index >::rowVectorProduct( const
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
    template< typename InVector,
              typename OutVector >
-void Dense< Real, Device, Index >::vectorProduct( const InVector& inVector,
+void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::vectorProduct( const InVector& inVector,
                                                            OutVector& outVector ) const
 {
    TNL_ASSERT( this->getColumns() == inVector.getSize(),
@@ -418,9 +476,11 @@ void Dense< Real, Device, Index >::vectorProduct( const InVector& inVector,
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
    template< typename Matrix >
-void Dense< Real, Device, Index >::addMatrix( const Matrix& matrix,
+void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::addMatrix( const Matrix& matrix,
                                               const RealType& matrixMultiplicator,
                                               const RealType& thisMatrixMultiplicator )
 {
@@ -440,6 +500,8 @@ void Dense< Real, Device, Index >::addMatrix( const Matrix& matrix,
 #ifdef HAVE_CUDA
 template< typename Real,
           typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
           typename Matrix1,
           typename Matrix2,
           int tileDim,
@@ -538,9 +600,11 @@ __global__ void DenseMatrixProductKernel( Dense< Real, Devices::Cuda, Index >* r
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
    template< typename Matrix1, typename Matrix2, int tileDim >
-void Dense< Real, Device, Index >::getMatrixProduct( const Matrix1& matrix1,
+void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getMatrixProduct( const Matrix1& matrix1,
                                                               const Matrix2& matrix2,
                                                               const RealType& matrix1Multiplicator,
                                                               const RealType& matrix2Multiplicator )
@@ -628,6 +692,8 @@ void Dense< Real, Device, Index >::getMatrixProduct( const Matrix1& matrix1,
 template< typename Real,
           typename Index,
           typename Matrix,
+          bool RowMajorOrder,
+          typename RealAllocator,
           int tileDim,
           int tileRowBlockSize >
 __global__ void DenseTranspositionAlignedKernel( Dense< Real, Devices::Cuda, Index >* resultMatrix,
@@ -696,6 +762,8 @@ __global__ void DenseTranspositionAlignedKernel( Dense< Real, Devices::Cuda, Ind
 
 template< typename Real,
           typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
           typename Matrix,
           int tileDim,
           int tileRowBlockSize >
@@ -776,9 +844,11 @@ __global__ void DenseTranspositionNonAlignedKernel( Dense< Real, Devices::Cuda,
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
    template< typename Matrix, int tileDim >
-void Dense< Real, Device, Index >::getTransposition( const Matrix& matrix,
+void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getTransposition( const Matrix& matrix,
                                                               const RealType& matrixMultiplicator )
 {
    TNL_ASSERT( this->getColumns() == matrix.getRows() &&
@@ -867,9 +937,11 @@ void Dense< Real, Device, Index >::getTransposition( const Matrix& matrix,
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
    template< typename Vector1, typename Vector2 >
-void Dense< Real, Device, Index >::performSORIteration( const Vector1& b,
+void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::performSORIteration( const Vector1& b,
                                                         const IndexType row,
                                                         Vector2& x,
                                                         const RealType& omega ) const
@@ -889,9 +961,11 @@ void Dense< Real, Device, Index >::performSORIteration( const Vector1& b,
 // copy assignment
 template< typename Real,
           typename Device,
-          typename Index >
-Dense< Real, Device, Index >&
-Dense< Real, Device, Index >::operator=( const Dense& matrix )
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >&
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::operator=( const Dense& matrix )
 {
    this->setLike( matrix );
    this->values = matrix.values;
@@ -901,10 +975,12 @@ Dense< Real, Device, Index >::operator=( const Dense& matrix )
 // cross-device copy assignment
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
    template< typename Real2, typename Device2, typename Index2, typename >
-Dense< Real, Device, Index >&
-Dense< Real, Device, Index >::operator=( const Dense< Real2, Device2, Index2 >& matrix )
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >&
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::operator=( const Dense< Real2, Device2, Index2 >& matrix )
 {
    static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Cuda >::value,
                   "unknown device" );
@@ -919,40 +995,50 @@ Dense< Real, Device, Index >::operator=( const Dense< Real2, Device2, Index2 >&
 
 template< typename Real,
           typename Device,
-          typename Index >
-void Dense< Real, Device, Index >::save( const String& fileName ) const
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::save( const String& fileName ) const
 {
    Object::save( fileName );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-void Dense< Real, Device, Index >::load( const String& fileName )
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::load( const String& fileName )
 {
    Object::load( fileName );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-void Dense< Real, Device, Index >::save( File& file ) const
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::save( File& file ) const
 {
    Matrix< Real, Device, Index >::save( file );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-void Dense< Real, Device, Index >::load( File& file )
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::load( File& file )
 {
    Matrix< Real, Device, Index >::load( file );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-void Dense< Real, Device, Index >::print( std::ostream& str ) const
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::print( std::ostream& str ) const
 {
    for( IndexType row = 0; row < this->getRows(); row++ )
    {
@@ -965,9 +1051,11 @@ void Dense< Real, Device, Index >::print( std::ostream& str ) const
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
 __cuda_callable__
-Index Dense< Real, Device, Index >::getElementIndex( const IndexType row,
+Index Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getElementIndex( const IndexType row,
                                                               const IndexType column ) const
 {
    TNL_ASSERT( ( std::is_same< Device, Devices::Host >::value ||
@@ -988,9 +1076,11 @@ class DenseDeviceDependentCode< Devices::Host >
 
       template< typename Real,
                 typename Index,
+                bool RowMajorOrder,
+                typename RealAllocator,
                 typename InVector,
                 typename OutVector >
-      static void vectorProduct( const Dense< Real, Device, Index >& matrix,
+      static void vectorProduct( const Dense< Real, Device, Index, RowMajorOrder, RealAllocator >& matrix,
                                  const InVector& inVector,
                                  OutVector& outVector )
       {
@@ -1011,9 +1101,11 @@ class DenseDeviceDependentCode< Devices::Cuda >
 
       template< typename Real,
                 typename Index,
+                bool RowMajorOrder,
+                typename RealAllocator,
                 typename InVector,
                 typename OutVector >
-      static void vectorProduct( const Dense< Real, Device, Index >& matrix,
+      static void vectorProduct( const Dense< Real, Device, Index, RowMajorOrder, RealAllocator >& matrix,
                                  const InVector& inVector,
                                  OutVector& outVector )
       {
diff --git a/src/TNL/Matrices/DistributedSpMV.h b/src/TNL/Matrices/DistributedSpMV.h
index b2abd13c537dc181de638caec4b6adf06755b2bf..e5b2e9008b9a4a65cdf9d7cdfbce84a3fc3fe226 100644
--- a/src/TNL/Matrices/DistributedSpMV.h
+++ b/src/TNL/Matrices/DistributedSpMV.h
@@ -19,6 +19,7 @@
 #include <vector>
 #include <utility>  // std::pair
 #include <limits>   // std::numeric_limits
+#include <TNL/Allocators/Host.h>
 #include <TNL/Matrices/Dense.h>
 #include <TNL/Containers/Vector.h>
 #include <TNL/Containers/VectorView.h>
@@ -235,7 +236,7 @@ public:
 
 protected:
    // communication pattern
-   Matrices::Dense< IndexType, Devices::Host, int > commPatternStarts, commPatternEnds;
+   Matrices::Dense< IndexType, Devices::Host, int, true, Allocators::Host< IndexType > > commPatternStarts, commPatternEnds;
 
    // span of rows with only block-diagonal entries
    std::pair< IndexType, IndexType > localOnlySpan;
diff --git a/src/TNL/Matrices/Matrix.h b/src/TNL/Matrices/Matrix.h
index 8fc8cb5f28eed526099a86afb22574e5819d27b0..a9b458d7b735cdf8b5fda2217bfe32cfd8cbbf28 100644
--- a/src/TNL/Matrices/Matrix.h
+++ b/src/TNL/Matrices/Matrix.h
@@ -61,8 +61,8 @@ public:
 
    virtual void getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const;
 
-   template< typename Real2, typename Device2, typename Index2, typename RealAllocator2 >
-   void setLike( const Matrix< Real2, Device2, Index2, RealAllocator2 >& matrix );
+   template< typename Matrix_ >
+   void setLike( const Matrix_& matrix );
 
    IndexType getNumberOfMatrixElements() const;
 
diff --git a/src/TNL/Matrices/Matrix.hpp b/src/TNL/Matrices/Matrix.hpp
index 9fc5ea620862391092edc54775092af744f1a1d2..29226cb00fa72d968481bb565bdc6bd8302e4083 100644
--- a/src/TNL/Matrices/Matrix.hpp
+++ b/src/TNL/Matrices/Matrix.hpp
@@ -81,11 +81,8 @@ template< typename Real,
           typename Device,
           typename Index,
           typename RealAllocator >
-   template< typename Real2,
-             typename Device2,
-             typename Index2,
-             typename RealAllocator2 >
-void Matrix< Real, Device, Index, RealAllocator >::setLike( const Matrix< Real2, Device2, Index2, RealAllocator2 >& matrix )
+   template< typename Matrix_ >
+void Matrix< Real, Device, Index, RealAllocator >::setLike( const Matrix_& matrix )
 {
    setDimensions( matrix.getRows(), matrix.getColumns() );
 }
diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h
index 0e3484c1090ebcdbcb8e858cffd9d4e30ad738e3..8f96af169cce7e7f54b528d58a1be6a2328102e1 100644
--- a/src/TNL/Matrices/SparseMatrix.h
+++ b/src/TNL/Matrices/SparseMatrix.h
@@ -94,14 +94,8 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
       [[deprecated]]
       virtual IndexType getRowLength( const IndexType row ) const {};
 
-      template< typename Real_,
-                typename Device_,
-                typename Index_,
-                typename MatrixType_,
-                template< typename, typename, typename > class Segments_,
-                typename RealAllocator_,
-                typename IndexAllocator_ >
-      void setLike( const SparseMatrix< Real_, Device_, Index_, MatrixType_, Segments_, RealAllocator_, IndexAllocator_ >& matrix );
+      template< typename Matrix >
+      void setLike( const Matrix& matrix );
 
       IndexType getNumberOfNonzeroMatrixElements() const;
 
diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp
index 72184738b623760e36d59e0f82026e1672506016..6c0655ce0122f32d884ee037f681f896f2b84511 100644
--- a/src/TNL/Matrices/SparseMatrix.hpp
+++ b/src/TNL/Matrices/SparseMatrix.hpp
@@ -202,10 +202,10 @@ template< typename Real,
           template< typename, typename, typename > class Segments,
           typename RealAllocator,
           typename IndexAllocator >
-   template< typename Real2, typename Device2, typename Index2, typename MatrixType2, template< typename, typename, typename > class Segments2, typename RealAllocator2, typename IndexAllocator2 >
+   template< typename Matrix_ >
 void
 SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
-setLike( const SparseMatrix< Real2, Device2, Index2, MatrixType2, Segments2, RealAllocator2, IndexAllocator2 >& matrix )
+setLike( const Matrix_& matrix )
 {
    Matrix< Real, Device, Index, RealAllocator >::setLike( matrix );
 }