From 30ecfefaec80f1953b3c64fd32ab6269d5966932 Mon Sep 17 00:00:00 2001
From: Lukas Cejka <lukas.ostatek@gmail.com>
Date: Wed, 5 Dec 2018 18:04:25 +0100
Subject: [PATCH] Moved getNonZeroRowLengthCudaKernal to SparseRow_impl.h and
 indentified errors in SparseRow_impl.h . Commiting for backup purposes.

---
 src/TNL/Matrices/CSR_impl.h       | 40 +++++++++++--------------------
 src/TNL/Matrices/SparseRow_impl.h | 24 ++++++++++---------
 2 files changed, 27 insertions(+), 37 deletions(-)

diff --git a/src/TNL/Matrices/CSR_impl.h b/src/TNL/Matrices/CSR_impl.h
index ad24fc699b..41ff15857b 100644
--- a/src/TNL/Matrices/CSR_impl.h
+++ b/src/TNL/Matrices/CSR_impl.h
@@ -131,20 +131,6 @@ Index CSR< Real, Device, Index >::getRowLengthFast( const IndexType row ) const
    return this->rowPointers[ row + 1 ] - this->rowPointers[ row ];
 }
 
-#ifdef HAVE_CUDA
-// TODO: move to SparseRow
-template< typename MatrixRow, typename Index >
-__global__
-void getNonZeroRowLengthCudaKernel( const MatrixRow row, Index* result )
-{
-   int threadId = blockIdx.x * blockDim.x + threadIdx.x;
-   if( threadId == 0 )
-   {
-      *result = row.getNonZeroElementsCount();
-   }
-}
-#endif
-
 template< typename Real,
           typename Device,
           typename Index >
@@ -158,27 +144,29 @@ Index CSR< Real, Device, Index >::getNonZeroRowLength( const IndexType row ) con
    if( std::is_same< DeviceType, Devices::Cuda >::value )
    {
       IndexType *cols = new IndexType[4];
-      std::cout << "crash1" << std::endl;
       RealType *vals = new RealType[4];
-      std::cout << "crash2" << std::endl;
       for( int i = 0; i < 4; i++ )
       {
           cols[i] = i;
           vals[i] = 1.0;
       }
-      std::cout << "crash3" << std::endl;
-      ConstMatrixRow matrixRow(cols, vals, 4, 1); // = this->getRow( row ); // If the program even compiles, this line fails because a segfault is thrown on the first line of getRow()
-      std::cout << "crash4" << std::endl;
+      ConstMatrixRow matrixRow(cols, vals, 4, 1);
+//      ConstMatrixRow matrixRow = this->getRow( row );// If the program even compiles, this line fails because a segfault is thrown on the first line of getRow()
+      // WHEN debugging with GDB:
+      //  (gdb) p this->rowPointers[0]
+      //    Could not find operator[].
+      //  (gdb) p rowPointers.getElement(0)
+      //    Attempt to take address of value not located in memory.
       IndexType resultHost ( 0 );
       IndexType* resultCuda = Devices::Cuda::passToDevice( resultHost );
-      std::cout << "resultCuda = " << resultCuda << std::endl;
-      // PROBLEM: If thee second parameter of getNonZeroRowLengthCudaKernel is '&resultCuda', the following issue is thrown:
+      // PROBLEM: If the second parameter of getNonZeroRowLengthCudaKernel is '&resultCuda', the following issue is thrown:
       //          'error: no instance of function template "TNL::Matrices::getNonZeroRowLengthCudaKernel" matches the argument list'
-      /*TNL::Matrices::*/getNonZeroRowLengthCudaKernel< ConstMatrixRow, IndexType ><<< 1, 1 >>>( matrixRow, resultCuda ); // matrixRow works fine, tested them both separately
-      std::cout << "resultCuda = " << resultCuda << std::endl;
-      std::cout << "crash5" << std::endl;
-      resultHost = Devices::Cuda::passFromDevice( resultCuda ); // This causes a crash: Illegal memory address.
-      std::cout << "crash6" << std::endl;
+      TNL::Matrices::getNonZeroRowLengthCudaKernel< ConstMatrixRow, IndexType ><<< 1, 1 >>>( matrixRow, resultCuda ); // matrixRow works fine, tested them both separately
+      delete []cols;
+      delete []vals;
+      std::cout << "Checkpoint BEFORE passFromDevice" << std::endl;
+      resultHost = Devices::Cuda::passFromDevice( resultCuda ); // This causes a crash: Illegal memory address in Cuda_impl.h at TNL_CHECK_CUDA_DEVICE
+      std::cout << "Checkpoint AFTER passFromDevice" << std::endl;
       Devices::Cuda::freeFromDevice( resultCuda );
       return resultHost;
    }
diff --git a/src/TNL/Matrices/SparseRow_impl.h b/src/TNL/Matrices/SparseRow_impl.h
index 3157b6c963..cd36abf6ce 100644
--- a/src/TNL/Matrices/SparseRow_impl.h
+++ b/src/TNL/Matrices/SparseRow_impl.h
@@ -112,16 +112,18 @@ getLength() const
    return length;
 }
 
-//template< typename MatrixRow >
-//__global__ 
-//void getNonZeroRowLengthCudaKernel( const MatrixRow row, typename MatrixRow::IndexType* result )
-//{
-//   int threadId = blockIdx.x * blockDim.x + threadIdx.x;
-//   if( threadId == 0 )
-//   {
-//      result = row->getNonZeroElementsCount();
-//   }
-//}
+#ifdef HAVE_CUDA
+template< typename MatrixRow, typename Index >
+__global__
+void getNonZeroRowLengthCudaKernel( const MatrixRow row, Index* result )
+{
+   int threadId = blockIdx.x * blockDim.x + threadIdx.x;
+   if( threadId == 0 )
+   {
+      *result = row.getNonZeroElementsCount();
+   }
+}
+#endif
 
 template< typename Real, typename Index >
 __cuda_callable__
@@ -140,7 +142,7 @@ getNonZeroElementsCount() const
             elementCount++;
     }
     
-//     std::cout << "Element Count = " << elementCount << "\n";
+//    std::cout << "Element Count = " << elementCount << "\n";
     
     return elementCount;
 }
-- 
GitLab