From c2514bc6b8b76a13d73f947ae5d7a67d4d7204c5 Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Fri, 17 Jan 2020 17:40:18 +0100
Subject: [PATCH] Debuging assignment of tridiagonal matrix to sparse matrix.

---
 src/TNL/Matrices/SparseMatrix.h               |   2 +-
 src/TNL/Matrices/SparseMatrix.hpp             |  59 +++++++---
 src/TNL/Matrices/TridiagonalMatrixRowView.hpp |   2 +-
 src/TNL/Matrices/TridiagonalMatrixView.h      |   2 +
 src/TNL/Matrices/TridiagonalMatrixView.hpp    |  17 +--
 src/UnitTests/Matrices/SparseMatrixCopyTest.h | 105 +++++++++++++++++-
 6 files changed, 155 insertions(+), 32 deletions(-)

diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h
index 43ea25bf58..26d5d2d840 100644
--- a/src/TNL/Matrices/SparseMatrix.h
+++ b/src/TNL/Matrices/SparseMatrix.h
@@ -73,7 +73,7 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
                     const RealAllocatorType& realAllocator = RealAllocatorType(),
                     const IndexAllocatorType& indexAllocator = IndexAllocatorType() );
 
-      ViewType getView();
+      ViewType getView() const; // TODO: remove const
 
       ConstViewType getConstView() const;
 
diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp
index 14495ad3d5..6d3d9d8b8b 100644
--- a/src/TNL/Matrices/SparseMatrix.hpp
+++ b/src/TNL/Matrices/SparseMatrix.hpp
@@ -82,13 +82,13 @@ template< typename Real,
           typename IndexAllocator >
 auto
 SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
-getView() -> ViewType
+getView() const -> ViewType
 {
    return ViewType( this->getRows(),
                     this->getColumns(),
-                    this->getValues().getView(),
-                    this->columnIndexes.getView(),
-                    this->segments.getView() );
+                    const_cast< SparseMatrix* >( this )->getValues().getView(),  // TODO: remove const_cast
+                    const_cast< SparseMatrix* >( this )->columnIndexes.getView(),
+                    const_cast< SparseMatrix* >( this )->segments.getView() );
 }
 
 template< typename Real,
@@ -624,7 +624,8 @@ operator=( const Dense< Real_, Device_, Index_, RowMajorOrder, RealAllocator_ >&
          thisValuesBuffer_view = matrixValuesBuffer_view;
 
          ////
-         // Copy matrix elements from the buffer to the matrix
+         // Copy matrix elements from the buffer to the matrix and ignoring
+         // zero matrix elements.
          const IndexType matrix_columns = this->getColumns();
          auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType& columnIndex, RealType& value, bool& compute  ) mutable {
             RealType inValue( 0.0 );
@@ -672,34 +673,41 @@ operator=( const RHSMatrix& matrix )
    using RHSRealType = typename RHSMatrix::RealType;
    using RHSDeviceType = typename RHSMatrix::DeviceType;
    using RHSRealAllocatorType = typename RHSMatrix::RealAllocatorType;
-   using RHSIndexAllocatorType = typename RHSMatrix::IndexAllocatorType;
+   using RHSIndexAllocatorType = typename Allocators::Default< RHSDeviceType >::template Allocator< RHSIndexType >;
 
-   typename RHSMatrix::RowsCapacitiesType rowLengths;
+   Containers::Vector< RHSIndexType, RHSDeviceType, RHSIndexType > rowLengths;
    matrix.getCompressedRowLengths( rowLengths );
    this->setDimensions( matrix.getRows(), matrix.getColumns() );
    this->setCompressedRowLengths( rowLengths );
+   Containers::Vector< IndexType, DeviceType, IndexType > rowLocalIndexes( matrix.getRows() );
+   rowLocalIndexes = 0;
+
 
    // TODO: use getConstView when it works
    const auto matrixView = const_cast< RHSMatrix& >( matrix ).getView();
    const IndexType paddingIndex = this->getPaddingIndex();
    auto columns_view = this->columnIndexes.getView();
    auto values_view = this->values.getView();
+   auto rowLocalIndexes_view = rowLocalIndexes.getView();
    columns_view = paddingIndex;
 
-   /*if( std::is_same< DeviceType, RHSDeviceType >::value )
+   if( std::is_same< DeviceType, RHSDeviceType >::value )
    {
       const auto segments_view = this->segments.getView();
-      auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIndex, const RHSRealType& value, bool& compute ) mutable {
+      auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx_, RHSIndexType columnIndex, const RHSRealType& value, bool& compute ) mutable {
+         RealType inValue( 0.0 );
+         IndexType localIdx( rowLocalIndexes_view[ rowIdx ] );
          if( columnIndex != paddingIndex )
          {
-            IndexType thisGlobalIdx = segments_view.getGlobalIndex( rowIdx, localIdx );
+            IndexType thisGlobalIdx = segments_view.getGlobalIndex( rowIdx, localIdx++ );
             columns_view[ thisGlobalIdx ] = columnIndex;
             values_view[ thisGlobalIdx ] = value;
+            rowLocalIndexes_view[ rowIdx ] = localIdx;
          }
       };
       matrix.forAllRows( f );
    }
-   else*/
+   else
    {
       const IndexType maxRowLength = max( rowLengths );
       const IndexType bufferRowsCount( 128 );
@@ -739,14 +747,29 @@ operator=( const RHSMatrix& matrix )
          thisColumnsBuffer_view = matrixColumnsBuffer_view;
 
          ////
-         // Copy matrix elements from the buffer to the matrix
-         auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType& columnIndex, RealType& value, bool& compute ) mutable {
-            const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx;
-            const IndexType column = thisColumnsBuffer_view[ bufferIdx ];
-            if( column != paddingIndex )
+         // Copy matrix elements from the buffer to the matrix and ignoring
+         // zero matrix elements
+         const IndexType matrix_columns = this->getColumns();
+         auto matrix_view = matrix.getView();
+         auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx_, IndexType& columnIndex, RealType& value, bool& compute ) mutable {
+            RealType inValue( 0.0 );
+            IndexType bufferIdx, localIdx( rowLocalIndexes_view[ rowIdx ] );
+            auto matrixRow = matrix_view.getRow( rowIdx );
+            while( inValue == 0.0 && localIdx < matrixRow.getSize() ) //matrix_columns )
             {
-               columnIndex = column;
-               value = thisValuesBuffer_view[ bufferIdx ];
+               bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx++;
+               inValue = thisValuesBuffer_view[ bufferIdx ];
+            }
+            rowLocalIndexes_view[ rowIdx ] = localIdx;
+            if( inValue == 0.0 )
+            {
+               columnIndex = paddingIndex;
+               value = 0.0;
+            }
+            else
+            {
+               columnIndex = thisColumnsBuffer_view[ bufferIdx ];//column - 1;
+               value = inValue;
             }
          };
          this->forRows( baseRow, lastRow, f2 );
diff --git a/src/TNL/Matrices/TridiagonalMatrixRowView.hpp b/src/TNL/Matrices/TridiagonalMatrixRowView.hpp
index ba60876b9b..80fc1a26d5 100644
--- a/src/TNL/Matrices/TridiagonalMatrixRowView.hpp
+++ b/src/TNL/Matrices/TridiagonalMatrixRowView.hpp
@@ -29,7 +29,7 @@ auto
 TridiagonalMatrixRowView< ValuesView, Indexer >::
 getSize() const -> IndexType
 {
-   return indexer.getRowSize();
+   return indexer.getRowSize( rowIdx );
 }
 
 template< typename ValuesView, typename Indexer >
diff --git a/src/TNL/Matrices/TridiagonalMatrixView.h b/src/TNL/Matrices/TridiagonalMatrixView.h
index 7db517cbd3..61b005c5a7 100644
--- a/src/TNL/Matrices/TridiagonalMatrixView.h
+++ b/src/TNL/Matrices/TridiagonalMatrixView.h
@@ -75,8 +75,10 @@ class TridiagonalMatrixView : public MatrixView< Real, Device, Index >
       template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ >
       bool operator != ( const TridiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix ) const;
 
+      __cuda_callable__
       RowView getRow( const IndexType& rowIdx );
 
+      __cuda_callable__
       const RowView getRow( const IndexType& rowIdx ) const;
 
       void setValue( const RealType& v );
diff --git a/src/TNL/Matrices/TridiagonalMatrixView.hpp b/src/TNL/Matrices/TridiagonalMatrixView.hpp
index d8fa6061c3..7fc5fd6b7b 100644
--- a/src/TNL/Matrices/TridiagonalMatrixView.hpp
+++ b/src/TNL/Matrices/TridiagonalMatrixView.hpp
@@ -340,25 +340,26 @@ forRows( IndexType first, IndexType last, Function& function ) const
 {
    const auto values_view = this->values.getConstView();
    const auto indexer = this->indexer;
+   bool compute( true );
    auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
       if( rowIdx == 0 )
       {
-         function( 0, 0, 0, values_view[ indexer.getGlobalIndex( 0, 0 ) ] );
-         function( 0, 1, 1, values_view[ indexer.getGlobalIndex( 0, 1 ) ] );
+         function( 0, 0, 0, values_view[ indexer.getGlobalIndex( 0, 0 ) ], compute );
+         function( 0, 1, 1, values_view[ indexer.getGlobalIndex( 0, 1 ) ], compute );
       } 
       else if( rowIdx + 1 < indexer.getColumns() )
       {
-         function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] );
-         function( rowIdx, 1, rowIdx,     values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] );
-         function( rowIdx, 2, rowIdx + 1, values_view[ indexer.getGlobalIndex( rowIdx, 2 ) ] );
+         function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ], compute );
+         function( rowIdx, 1, rowIdx,     values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ], compute );
+         function( rowIdx, 2, rowIdx + 1, values_view[ indexer.getGlobalIndex( rowIdx, 2 ) ], compute );
       }
       else if( rowIdx < indexer.getColumns() )
       {
-         function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] );
-         function( rowIdx, 1, rowIdx,     values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] );
+         function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ], compute );
+         function( rowIdx, 1, rowIdx,     values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ], compute );
       }
       else
-         function( rowIdx, 0, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] );
+         function( rowIdx, 0, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ], compute );
    };
    Algorithms::ParallelFor< DeviceType >::exec( first, last, f );
 }
diff --git a/src/UnitTests/Matrices/SparseMatrixCopyTest.h b/src/UnitTests/Matrices/SparseMatrixCopyTest.h
index e9898bb393..b285f5e058 100644
--- a/src/UnitTests/Matrices/SparseMatrixCopyTest.h
+++ b/src/UnitTests/Matrices/SparseMatrixCopyTest.h
@@ -15,6 +15,8 @@
 #include <TNL/Matrices/SparseMatrix.h>
 #include <TNL/Matrices/MatrixType.h>
 #include <TNL/Matrices/Dense.h>
+#include <TNL/Matrices/Tridiagonal.h>
+#include <TNL/Matrices/Multidiagonal.h>
 #include <TNL/Containers/Segments/CSR.h>
 #include <TNL/Containers/Segments/Ellpack.h>
 #include <TNL/Containers/Segments/SlicedEllpack.h>
@@ -370,6 +372,7 @@ void testCopyAssignment()
 
       Matrix2 triDiag2;
       triDiag2 = triDiag1;
+      checkTriDiagMatrix( triDiag1 );
       checkTriDiagMatrix( triDiag2 );
    }
    {
@@ -390,6 +393,7 @@ void testCopyAssignment()
 
       Matrix2 unevenRowSize2;
       unevenRowSize2 = unevenRowSize1;
+
       checkUnevenRowSizeMatrix( unevenRowSize2 );
    }
 }
@@ -435,6 +439,62 @@ void testConversion()
    }
 }
 
+template< typename Matrix >
+void tridiagonalMatrixAssignment()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   using TridiagonalHost = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Host, IndexType >;
+   using TridiagonalCuda = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Cuda, IndexType >;
+
+   const IndexType rows( 10 ), columns( 10 );
+   TridiagonalHost hostMatrix( rows, columns );
+   for( IndexType i = 0; i < columns; i++ )
+      for( IndexType j = TNL::max( 0, i - 1 ); j < TNL::min( rows, i + 2 ); j++ )
+         hostMatrix.setElement( i, j, i + j );
+
+   std::cerr << hostMatrix << std::endl;
+   Matrix matrix;
+   matrix = hostMatrix;
+   std::cerr << matrix << std::endl;
+   using RowCapacitiesType = typename Matrix::RowsCapacitiesType;
+   RowCapacitiesType rowCapacities;
+   matrix.getCompressedRowLengths( rowCapacities );
+   RowCapacitiesType exactRowLengths{ 0, 3, 3, 3, 3, 3, 3, 3, 3, 2 };
+   EXPECT_EQ( rowCapacities, exactRowLengths );
+   for( IndexType i = 0; i < columns; i++ )
+      for( IndexType j = 0; j < rows; j++ )
+      {
+         if( abs( i - j ) > 1 )
+            EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
+         else
+            EXPECT_EQ( matrix.getElement( i, j ), i + j );
+      }
+
+#ifdef HAVE_CUDA
+   TridiagonalCuda cudaMatrix( rows, columns );
+   cudaMatrix = hostMatrix;
+   /*for( IndexType i = 0; i < columns; i++ )
+      for( IndexType j = TNL::max( 0, i - 1 ); j < TNL::min( row, i + 1 ); j++ )
+         cudaMatrix.setElement( i, j, i + j );*/
+
+   matrix = cudaMatrix;
+   matrix.getCompressedRowLengths( rowCapacities );
+   EXPECT_EQ( rowCapacities, exactRowLengths );
+   for( IndexType i = 0; i < columns; i++ )
+      for( IndexType j = 0; j < rows; j++ )
+      {
+         if( abs( i - j ) > 1 )
+            EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
+         else
+            EXPECT_EQ( matrix.getElement( i, j ), i + j );
+      }
+#endif
+
+}
+
 template< typename Matrix >
 void denseMatrixAssignment()
 {
@@ -469,10 +529,10 @@ void denseMatrixAssignment()
 
 #ifdef HAVE_CUDA
    DenseCuda cudaMatrix( rows, columns );
-   //cudaMatrix = hostMatrix;
-   for( IndexType i = 0; i < columns; i++ )
+   cudaMatrix = hostMatrix;
+   /*for( IndexType i = 0; i < columns; i++ )
       for( IndexType j = 0; j <= i; j++ )
-         cudaMatrix.setElement( i, j, i + j );
+         cudaMatrix.setElement( i, j, i + j );*/
 
    matrix = cudaMatrix;
    matrix.getCompressedRowLengths( rowCapacities );
@@ -487,7 +547,7 @@ void denseMatrixAssignment()
       }
 #endif
 }
-
+/*
 TEST( SparseMatrixCopyTest, CSR_HostToHost )
 {
    testCopyAssignment< CSR_host, CSR_host >();
@@ -619,6 +679,43 @@ TEST( SparseMatrixCopyTest, SlicedEllpack_to_Ellpack_cuda )
    testConversion< SE_cuda, E_cuda >();
 }
 #endif
+*/
+
+////
+// Tridiagonal matrix assignment test
+TEST( SparseMatrixCopyTest, TridiagonalMatrixAssignment_to_CSR_host )
+{
+   tridiagonalMatrixAssignment< CSR_host >();
+}
+
+TEST( SparseMatrixCopyTest, TridiagonalMatrixAssignment_to_Ellpack_host )
+{
+   tridiagonalMatrixAssignment< E_host >();
+}
+
+TEST( SparseMatrixCopyTest, TridiagonalMatrixAssignment_to_SlicedEllpack_host )
+{
+   tridiagonalMatrixAssignment< SE_host >();
+}
+
+#ifdef HAVE_CUDA
+TEST( SparseMatrixCopyTest, TridiagonalMatrixAssignment_to_CSR_cuda )
+{
+   tridiagonalMatrixAssignment< CSR_cuda >();
+}
+
+TEST( SparseMatrixCopyTest, TridiagonalMatrixAssignment_to_Ellpack_cuda )
+{
+   tridiagonalMatrixAssignment< E_cuda >();
+}
+
+TEST( SparseMatrixCopyTest, TridiagonalMatrixAssignment_to_SlicedEllpack_cuda )
+{
+   tridiagonalMatrixAssignment< SE_cuda >();
+}
+#endif // HAVE_CUDA
+
+
 
 // Dense matrix assignment test
 TEST( SparseMatrixCopyTest, DenseMatrixAssignment_to_CSR_host )
-- 
GitLab