Commit 3fc82675 authored by Lukas Cejka's avatar Lukas Cejka
Browse files

Commented out the body of getNonZeroElements() and the associated kernel. To be implemented.

parent 21a0bdcd
Loading
Loading
Loading
Loading
+37 −35
Original line number Diff line number Diff line
@@ -136,40 +136,42 @@ template< typename Real,
          typename Index >
Index CSR< Real, Device, Index >::getNonZeroRowLength( const IndexType row ) const
{
   if( std::is_same< DeviceType, Devices::Host >::value )
   {
      ConstMatrixRow matrixRow = this->getRow( row );
      return matrixRow.getNonZeroElementsCount();
   }
   if( std::is_same< DeviceType, Devices::Cuda >::value )
   {
      IndexType *cols = new IndexType[4];
      RealType *vals = new RealType[4];
      for( int i = 0; i < 4; i++ )
      {
          cols[i] = i;
          vals[i] = 1.0;
      }
      ConstMatrixRow matrixRow(cols, vals, 4, 1);
//      ConstMatrixRow matrixRow = this->getRow( row );// If the program even compiles, this line fails because a segfault is thrown on the first line of getRow()
      // WHEN debugging with GDB:
      //  (gdb) p this->rowPointers[0]
      //    Could not find operator[].
      //  (gdb) p rowPointers.getElement(0)
      //    Attempt to take address of value not located in memory.
      IndexType resultHost ( 0 );
      IndexType* resultCuda = Devices::Cuda::passToDevice( resultHost );
      // PROBLEM: If the second parameter of getNonZeroRowLengthCudaKernel is '&resultCuda', the following issue is thrown:
      //          'error: no instance of function template "TNL::Matrices::getNonZeroRowLengthCudaKernel" matches the argument list'
      TNL::Matrices::getNonZeroRowLengthCudaKernel< ConstMatrixRow, IndexType ><<< 1, 1 >>>( matrixRow, resultCuda ); // matrixRow works fine, tested them both separately
      delete []cols;
      delete []vals;
      std::cout << "Checkpoint BEFORE passFromDevice" << std::endl;
      resultHost = Devices::Cuda::passFromDevice( resultCuda ); // This causes a crash: Illegal memory address in Cuda_impl.h at TNL_CHECK_CUDA_DEVICE
      std::cout << "Checkpoint AFTER passFromDevice" << std::endl;
      Devices::Cuda::freeFromDevice( resultCuda );
      return resultHost;
   }
    // TODO: Fix/Implement
    TNL_ASSERT( false, std::cerr << "TODO: Fix/Implement" );
//    if( std::is_same< DeviceType, Devices::Host >::value )
//    {
//       ConstMatrixRow matrixRow = this->getRow( row );
//       return matrixRow.getNonZeroElementsCount();
//    }
//    if( std::is_same< DeviceType, Devices::Cuda >::value )
//    {
//       IndexType *cols = new IndexType[4];
//       RealType *vals = new RealType[4];
//       for( int i = 0; i < 4; i++ )
//       {
//           cols[i] = i;
//           vals[i] = 1.0;
//       }
//       ConstMatrixRow matrixRow(cols, vals, 4, 1);
// //      ConstMatrixRow matrixRow = this->getRow( row );// If the program even compiles, this line fails because a segfault is thrown on the first line of getRow()
//       // WHEN debugging with GDB:
//       //  (gdb) p this->rowPointers[0]
//       //    Could not find operator[].
//       //  (gdb) p rowPointers.getElement(0)
//       //    Attempt to take address of value not located in memory.
//       IndexType resultHost ( 0 );
//       IndexType* resultCuda = Devices::Cuda::passToDevice( resultHost );
//       // PROBLEM: If the second parameter of getNonZeroRowLengthCudaKernel is '&resultCuda', the following issue is thrown:
//       //          'error: no instance of function template "TNL::Matrices::getNonZeroRowLengthCudaKernel" matches the argument list'
//       TNL::Matrices::getNonZeroRowLengthCudaKernel< ConstMatrixRow, IndexType ><<< 1, 1 >>>( matrixRow, resultCuda ); // matrixRow works fine, tested them both separately
//       delete []cols;
//       delete []vals;
//       std::cout << "Checkpoint BEFORE passFromDevice" << std::endl;
//       resultHost = Devices::Cuda::passFromDevice( resultCuda ); // This causes a crash: Illegal memory address in Cuda_impl.h at TNL_CHECK_CUDA_DEVICE
//       std::cout << "Checkpoint AFTER passFromDevice" << std::endl;
//       Devices::Cuda::freeFromDevice( resultCuda );
//       return resultHost;
//   }
}

template< typename Real,
+23 −19
Original line number Diff line number Diff line
@@ -117,11 +117,13 @@ template< typename MatrixRow, typename Index >
__global__
void getNonZeroRowLengthCudaKernel( const MatrixRow row, Index* result )
{
   int threadId = blockIdx.x * blockDim.x + threadIdx.x;
   if( threadId == 0 )
   {
      *result = row.getNonZeroElementsCount();
   }
//    TODO: Fix/Implement
    TNL_ASSERT( false, std::cerr << "TODO: Fix/Implement" );
//    int threadId = blockIdx.x * blockDim.x + threadIdx.x;
//    if( threadId == 0 )
//    {
//       *result = row.getNonZeroElementsCount();
//    }
}
#endif

@@ -131,20 +133,22 @@ Index
SparseRow< Real, Index >::
getNonZeroElementsCount() const
{
    using NonConstIndex = typename std::remove_const< Index >::type;
    
    NonConstIndex elementCount ( 0 );
   
    for( NonConstIndex i = 0; i < length; i++ )
    {
//        std::cout << "this->values[ i * step ] = " << this->values[ i * step ] << " != 0.0" << std::endl;
        if( this->values[ i * step ] != 0.0 ) // Returns the same amount of elements in a row as does getRowLength() in ChunkedEllpack. WHY?
            elementCount++;
    }
    
//    std::cout << "Element Count = " << elementCount << "\n";
    
    return elementCount;
//    TODO: Fix/Implement
    TNL_ASSERT( false, std::cerr << "TODO: Fix/Implement" );
//    using NonConstIndex = typename std::remove_const< Index >::type;
//    
//    NonConstIndex elementCount ( 0 );
//   
//    for( NonConstIndex i = 0; i < length; i++ )
//    {
////        std::cout << "this->values[ i * step ] = " << this->values[ i * step ] << " != 0.0" << std::endl;
//        if( this->values[ i * step ] != 0.0 ) // Returns the same amount of elements in a row as does getRowLength() in ChunkedEllpack. WHY?
//            elementCount++;
//    }
//    
////    std::cout << "Element Count = " << elementCount << "\n";
//    
//    return elementCount;
}

template< typename Real, typename Index >