Commit c11fb108 authored by Lukas Cejka's avatar Lukas Cejka Committed by Tomáš Oberhuber
Browse files

Commented out the body of getNonZeroElements() and the associated kernel. To be implemented.

parent 5e419f95
Loading
Loading
Loading
Loading
+37 −35
Original line number Diff line number Diff line
@@ -136,40 +136,42 @@ template< typename Real,
          typename Index >
Index CSR< Real, Device, Index >::getNonZeroRowLength( const IndexType row ) const
{
   if( std::is_same< DeviceType, Devices::Host >::value )
   {
      ConstMatrixRow matrixRow = this->getRow( row );
      return matrixRow.getNonZeroElementsCount();
   }
   if( std::is_same< DeviceType, Devices::Cuda >::value )
   {
      IndexType *cols = new IndexType[4];
      RealType *vals = new RealType[4];
      for( int i = 0; i < 4; i++ )
      {
          cols[i] = i;
          vals[i] = 1.0;
      }
      ConstMatrixRow matrixRow(cols, vals, 4, 1);
//      ConstMatrixRow matrixRow = this->getRow( row );// If the program even compiles, this line fails because a segfault is thrown on the first line of getRow()
      // WHEN debugging with GDB:
      //  (gdb) p this->rowPointers[0]
      //    Could not find operator[].
      //  (gdb) p rowPointers.getElement(0)
      //    Attempt to take address of value not located in memory.
      IndexType resultHost ( 0 );
      IndexType* resultCuda = Devices::Cuda::passToDevice( resultHost );
      // PROBLEM: If the second parameter of getNonZeroRowLengthCudaKernel is '&resultCuda', the following issue is thrown:
      //          'error: no instance of function template "TNL::Matrices::getNonZeroRowLengthCudaKernel" matches the argument list'
      TNL::Matrices::getNonZeroRowLengthCudaKernel< ConstMatrixRow, IndexType ><<< 1, 1 >>>( matrixRow, resultCuda ); // matrixRow works fine, tested them both separately
      delete []cols;
      delete []vals;
      std::cout << "Checkpoint BEFORE passFromDevice" << std::endl;
      resultHost = Devices::Cuda::passFromDevice( resultCuda ); // This causes a crash: Illegal memory address in Cuda_impl.h at TNL_CHECK_CUDA_DEVICE
      std::cout << "Checkpoint AFTER passFromDevice" << std::endl;
      Devices::Cuda::freeFromDevice( resultCuda );
      return resultHost;
   }
    // TODO: Fix/Implement
    TNL_ASSERT( false, std::cerr << "TODO: Fix/Implement" );
//    if( std::is_same< DeviceType, Devices::Host >::value )
//    {
//       ConstMatrixRow matrixRow = this->getRow( row );
//       return matrixRow.getNonZeroElementsCount();
//    }
//    if( std::is_same< DeviceType, Devices::Cuda >::value )
//    {
//       IndexType *cols = new IndexType[4];
//       RealType *vals = new RealType[4];
//       for( int i = 0; i < 4; i++ )
//       {
//           cols[i] = i;
//           vals[i] = 1.0;
//       }
//       ConstMatrixRow matrixRow(cols, vals, 4, 1);
// //      ConstMatrixRow matrixRow = this->getRow( row );// If the program even compiles, this line fails because a segfault is thrown on the first line of getRow()
//       // WHEN debugging with GDB:
//       //  (gdb) p this->rowPointers[0]
//       //    Could not find operator[].
//       //  (gdb) p rowPointers.getElement(0)
//       //    Attempt to take address of value not located in memory.
//       IndexType resultHost ( 0 );
//       IndexType* resultCuda = Devices::Cuda::passToDevice( resultHost );
//       // PROBLEM: If the second parameter of getNonZeroRowLengthCudaKernel is '&resultCuda', the following issue is thrown:
//       //          'error: no instance of function template "TNL::Matrices::getNonZeroRowLengthCudaKernel" matches the argument list'
//       TNL::Matrices::getNonZeroRowLengthCudaKernel< ConstMatrixRow, IndexType ><<< 1, 1 >>>( matrixRow, resultCuda ); // matrixRow works fine, tested them both separately
//       delete []cols;
//       delete []vals;
//       std::cout << "Checkpoint BEFORE passFromDevice" << std::endl;
//       resultHost = Devices::Cuda::passFromDevice( resultCuda ); // This causes a crash: Illegal memory address in Cuda_impl.h at TNL_CHECK_CUDA_DEVICE
//       std::cout << "Checkpoint AFTER passFromDevice" << std::endl;
//       Devices::Cuda::freeFromDevice( resultCuda );
//       return resultHost;
//   }
}

template< typename Real,
+23 −19
Original line number Diff line number Diff line
@@ -117,11 +117,13 @@ template< typename MatrixRow, typename Index >
__global__
void getNonZeroRowLengthCudaKernel( const MatrixRow row, Index* result )
{
   int threadId = blockIdx.x * blockDim.x + threadIdx.x;
   if( threadId == 0 )
   {
      *result = row.getNonZeroElementsCount();
   }
//    TODO: Fix/Implement
    TNL_ASSERT( false, std::cerr << "TODO: Fix/Implement" );
//    int threadId = blockIdx.x * blockDim.x + threadIdx.x;
//    if( threadId == 0 )
//    {
//       *result = row.getNonZeroElementsCount();
//    }
}
#endif

@@ -131,20 +133,22 @@ Index
SparseRow< Real, Index >::
getNonZeroElementsCount() const
{
    using NonConstIndex = typename std::remove_const< Index >::type;
    
    NonConstIndex elementCount ( 0 );
   
    for( NonConstIndex i = 0; i < length; i++ )
    {
//        std::cout << "this->values[ i * step ] = " << this->values[ i * step ] << " != 0.0" << std::endl;
        if( this->values[ i * step ] != 0.0 ) // Returns the same amount of elements in a row as does getRowLength() in ChunkedEllpack. WHY?
            elementCount++;
    }
    
//    std::cout << "Element Count = " << elementCount << "\n";
    
    return elementCount;
//    TODO: Fix/Implement
    TNL_ASSERT( false, std::cerr << "TODO: Fix/Implement" );
//    using NonConstIndex = typename std::remove_const< Index >::type;
//    
//    NonConstIndex elementCount ( 0 );
//   
//    for( NonConstIndex i = 0; i < length; i++ )
//    {
////        std::cout << "this->values[ i * step ] = " << this->values[ i * step ] << " != 0.0" << std::endl;
//        if( this->values[ i * step ] != 0.0 ) // Returns the same amount of elements in a row as does getRowLength() in ChunkedEllpack. WHY?
//            elementCount++;
//    }
//    
////    std::cout << "Element Count = " << elementCount << "\n";
//    
//    return elementCount;
}

template< typename Real, typename Index >