From 7f24c2edffe7ba7c98fc7090f511f48d3bccf10c Mon Sep 17 00:00:00 2001
From: Lukas Cejka <lukas.ostatek@gmail.com>
Date: Mon, 3 Dec 2018 23:53:30 +0100
Subject: [PATCH] Changed implementation of method. Commiting for backup
 purposes.

---
 src/TNL/Matrices/CSR_impl.h       | 16 ++-----
 src/TNL/Matrices/SparseRow_impl.h | 69 ++++++++++++++++---------------
 2 files changed, 40 insertions(+), 45 deletions(-)

diff --git a/src/TNL/Matrices/CSR_impl.h b/src/TNL/Matrices/CSR_impl.h
index 82cc1e82ee..de95c0d78d 100644
--- a/src/TNL/Matrices/CSR_impl.h
+++ b/src/TNL/Matrices/CSR_impl.h
@@ -135,10 +135,9 @@ template< typename Real,
           typename Device,
           typename Index >
 Index CSR< Real, Device, Index >::getNonZeroRowLength( const IndexType row ) const
-{
+{    
     ConstMatrixRow matrixRow = this->getRow( row );
-    IndexType count = matrixRow.getNonZeroElementsCount( TNL::String( Device::getDeviceType() ) );
-//    return count;
+    return matrixRow.getNonZeroElementsCount( TNL::String( Device::getDeviceType() ) );
     // getRow() was throwing segmentation faults.
     // FOR THIS TO WORK, I had to change getRow() from [ rowIndex ] to .getElement( rowIndex ).
     
@@ -459,19 +458,12 @@ typename CSR< Real, Device, Index >::ConstMatrixRow
 CSR< Real, Device, Index >::
 getRow( const IndexType rowIndex ) const
 {
-    const IndexType rowOffset = this->rowPointers.getElement( rowIndex );
-    const IndexType rowLength = this->rowPointers.getElement( rowIndex + 1 ) - rowOffset;
+    const IndexType rowOffset = this->rowPointers[ rowIndex ];
+    const IndexType rowLength = this->rowPointers[ rowIndex + 1 ] - rowOffset;
     return ConstMatrixRow( &this->columnIndexes[ rowOffset ],
                            &this->values[ rowOffset ],
                            rowLength,
                            1 );
-    
-//   const IndexType rowOffset = this->rowPointers[ rowIndex ];
-//   const IndexType rowLength = this->rowPointers[ rowIndex + 1 ] - rowOffset;
-//   return ConstMatrixRow( &this->columnIndexes[ rowOffset ],
-//                          &this->values[ rowOffset ],
-//                          rowLength,
-//                          1 );
 }
 
 template< typename Real,
diff --git a/src/TNL/Matrices/SparseRow_impl.h b/src/TNL/Matrices/SparseRow_impl.h
index f87194df08..14888669dd 100644
--- a/src/TNL/Matrices/SparseRow_impl.h
+++ b/src/TNL/Matrices/SparseRow_impl.h
@@ -118,9 +118,6 @@ Index
 SparseRow< Real, Index >::
 getNonZeroElementsCount( TNL::String deviceType ) const
 {
-    using CudaType = typename TNL::Devices::Cuda;
-    using HostType = typename TNL::Devices::Host;
-    
     using NonConstIndex = typename std::remove_const< Index >::type;
     
     // If this is static, it will trigger a illegal memory address
@@ -128,41 +125,47 @@ getNonZeroElementsCount( TNL::String deviceType ) const
     NonConstIndex elementCount ( 0 );
     
     
-    // elementCount = 0; // Only if it is static. Make sure it is reset. Without this seemingly useless step, it returned incorrect values.
+//    using CudaType = typename TNL::Devices::Cuda;
+//    using HostType = typename TNL::Devices::Host;
+//    
+//    
+//    // elementCount = 0; // Only if it is static. Make sure it is reset. Without this seemingly useless step, it returned incorrect values.
+//    
+//    // PROBLEM: Lambda function with __cuda_callable__ CANNOT pass values by reference!!
+//    // INCORRECT ASSUMPTION!! PROBLEM: Lambda function which takes in anything via capture list, cannot return anything. (Maybe dont capture anything? pass this->values by parameter and return count?)
+//        // WRONG: https://stackoverflow.com/questions/38835154/lambda-function-capture-a-variable-vs-return-value?fbclid=IwAR0ybDD83LRWxkJsrcoSmGW2mbsMfhywmdZQkleqyjU-NOIwqkz8woihfXs
+//    auto computeNonZeros = [=] __cuda_callable__ ( NonConstIndex i /*, NonConstIndex *elementCount*/ ) mutable
+//    {
+//        //std::cout << "this->values[ i * step ] = " << this->values[ i * step ] << " != 0.0/n";
+//        if( this->values[ i * step ] != 0.0 )
+//            elementCount++;//*elementCount++;
+//        
+//        //std::cout << "End of lambda elementCount = " << elementCount << "/n";
+//        //return elementCount;
+//    };
+//    
+//    
+//    // Decide which ParallelFor will be executed, either Host or Cuda.
+//    if( deviceType == TNL::String( "Devices::Host" ) )
+//    {
+//        ParallelFor< HostType >::exec( ( NonConstIndex ) 0, length, computeNonZeros /*, &elementCount*/ );
+//    }
+//    
+//    else if( deviceType == TNL::String( "Cuda" ) )
+//    {
+//        ParallelFor< CudaType >::exec( ( NonConstIndex ) 0, length, computeNonZeros /*, &elementCount*/ );
+//    }
+   
     
-    // PROBLEM: Lambda function with __cuda_callable__ CANNOT pass values by reference!!
-    // PROBLEM: Lambda function which takes in anything via capture list, cannot return anything. (Maybe dont capture anything? pass this->values by parameter and return count?)
-    auto computeNonZeros = [=] __cuda_callable__ ( NonConstIndex i ) mutable
+//    // THE FOLLOWING doesn't work on GPU
+    for( NonConstIndex i = 0; i < length; i++ )
     {
-        //std::cout << "this->values[ i * step ] = " << this->values[ i * step ] << " != 0.0/n";
-        if( this->values[ i * step ] != 0.0 )
+        std::cout << "this->values[ i * step ] = " << this->values[ i * step ] << " != 0.0" << std::endl;
+        if( this->values[ i * step ] != 0.0 ) // Returns the same amount of elements in a row as does getRowLength() in ChunkedEllpack. WHY?
             elementCount++;
-        
-        //std::cout << "End of lambda elementCount = " << elementCount << "/n";
-    };
-    
-    
-    // Decide which ParallelFor will be executed, either Host or Cuda.
-    if( deviceType == TNL::String( "Devices::Host" ) )
-    {
-        ParallelFor< HostType >::exec( ( NonConstIndex ) 0, length, computeNonZeros );
     }
     
-    else if( deviceType == TNL::String( "Cuda" ) )
-    {
-        ParallelFor< CudaType >::exec( ( NonConstIndex ) 0, length, computeNonZeros );
-    }
-   
-    
-//    // THE FOLLOWING doesn't work on GPU
-//    for( NonConstIndex i = 0; i < length; i++ )
-//    {
-//        std::cout << "this->values[ i * step ] = " << this->values[ i * step ] << " != 0.0" << std::endl;
-//        if( this->values[ i * step ] != 0.0 ) // Returns the same amount of elements in a row as does getRowLength() in ChunkedEllpack. WHY?
-//            elementCount++;
-//    }
-    
-    // std::cout << "Element Count = " << elementCount << "\n";
+     std::cout << "Element Count = " << elementCount << "\n";
     
     return elementCount;
 }
-- 
GitLab