Commit 991d31a0 authored by Lukas Cejka's avatar Lukas Cejka Committed by Tomáš Oberhuber
Browse files

Reformatted getNonZeroRowLength to pass on the device Type in string and thus...

Reformatted getNonZeroRowLength to pass on the device Type in string and thus enable the lambda in SparseRow_impl.h
parent a9b95034
Loading
Loading
Loading
Loading
+35 −8
Original line number Diff line number Diff line
@@ -136,8 +136,28 @@ template< typename Real,
          typename Index >
Index CSR< Real, Device, Index >::getNonZeroRowLength( const IndexType row ) const
{
    ConstMatrixRow matrixRow = getRow( row );
    return matrixRow.getNonZeroElementsCount();
    ConstMatrixRow matrixRow = this->getRow( row );
    IndexType count = matrixRow.getNonZeroElementsCount( TNL::String( Device::getDeviceType() ) );
//    return count;
    // getRow() was throwing segmentation faults.
    // FOR THIS TO WORK, I had to change getRow() from [ rowIndex ] to .getElement( rowIndex ).
    
    
    // THE FOLLOWING throws: /home/lukas/tnl-dev/src/TNL/ParallelFor.h(92): error: identifier "" is undefined in device code
//    static IndexType elementCount ( 0 );
//    ConstMatrixRow matrixRow = this->getRow( row );
//    
//    elementCount = 0; // Make sure it is reset. Without this seemingly useless step, it returned incorrect values.
//    
//    auto computeNonZeros = [matrixRow] __cuda_callable__ ( IndexType i ) mutable
//    {
//        if( matrixRow.getElementValue( i ) != 0.0 )
//            elementCount++;
//    };
//    
//    ParallelFor< DeviceType >::exec( (IndexType) 0, matrixRow.getLength(), computeNonZeros );
//    
//    return elementCount;
}

template< typename Real,
@@ -439,12 +459,19 @@ typename CSR< Real, Device, Index >::ConstMatrixRow
CSR< Real, Device, Index >::
getRow( const IndexType rowIndex ) const
{
   const IndexType rowOffset = this->rowPointers[ rowIndex ];
   const IndexType rowLength = this->rowPointers[ rowIndex + 1 ] - rowOffset;
    const IndexType rowOffset = this->rowPointers.getElement( rowIndex );
    const IndexType rowLength = this->rowPointers.getElement( rowIndex + 1 ) - rowOffset;
    return ConstMatrixRow( &this->columnIndexes[ rowOffset ],
                           &this->values[ rowOffset ],
                           rowLength,
                           1 );
    
//   const IndexType rowOffset = this->rowPointers[ rowIndex ];
//   const IndexType rowLength = this->rowPointers[ rowIndex + 1 ] - rowOffset;
//   return ConstMatrixRow( &this->columnIndexes[ rowOffset ],
//                          &this->values[ rowOffset ],
//                          rowLength,
//                          1 );
}

template< typename Real,
+18 −3
Original line number Diff line number Diff line
@@ -198,8 +198,8 @@ bool ChunkedEllpack< Real, Device, Index >::setSlice( ConstCompressedRowLengthsV
//       will be zero and make the assertion fail ( https://stackoverflow.com/questions/33273359/in-c-using-the-ceil-a-division-is-not-working ).
//       To fix this, typecast them to ( float ), instead of ( RealType )
       maxChunkInSlice = max( maxChunkInSlice,
                          ceil( ( float ) rowLengths[ i ] /
                                ( float ) this->rowToChunkMapping[ i ] ) );
                          ceil( ( double ) rowLengths[ i ] /
                                ( double ) this->rowToChunkMapping[ i ] ) );
   }
      TNL_ASSERT( maxChunkInSlice > 0,
              std::cerr << " maxChunkInSlice = " << maxChunkInSlice << std::endl );
@@ -314,7 +314,22 @@ template< typename Real,
Index ChunkedEllpack< Real, Device, Index >::getNonZeroRowLength( const IndexType row ) const
{
    ConstMatrixRow matrixRow = getRow( row );
    return matrixRow.getNonZeroElementsCount();
    return matrixRow.getNonZeroElementsCount( Device::getDeviceType() );
    
//    IndexType elementCount ( 0 );
//    ConstMatrixRow matrixRow = this->getRow( row );
//    
//    auto computeNonZeros = [&] /*__cuda_callable__*/ ( IndexType i ) mutable
//    {
//        std::cout << "matrixRow.getElementValue( i ) = " << matrixRow.getElementValue( i ) << " != 0.0" << std::endl;
//        if( matrixRow.getElementValue( i ) !=  0.0 )
//            elementCount++;
//        
//        std::cout << "End of lambda elementCount = " << elementCount << std::endl;
//    };
//   
//    ParallelFor< DeviceType >::exec( ( IndexType ) 0, matrixRow.getLength(), computeNonZeros );
//    return elementCount;
}

template< typename Real,
+1 −1
Original line number Diff line number Diff line
@@ -129,7 +129,7 @@ template< typename Real,
Index Ellpack< Real, Device, Index >::getNonZeroRowLength( const IndexType row ) const
{
    ConstMatrixRow matrixRow = getRow( row );
    return matrixRow.getNonZeroElementsCount();
    return matrixRow.getNonZeroElementsCount( Device::getDeviceType() );
}

template< typename Real,
+1 −1
Original line number Diff line number Diff line
@@ -128,7 +128,7 @@ template< typename Real,
Index SlicedEllpack< Real, Device, Index, SliceSize >::getNonZeroRowLength( const IndexType row ) const
{
    ConstMatrixRow matrixRow = getRow( row );
    return matrixRow.getNonZeroElementsCount();
    return matrixRow.getNonZeroElementsCount( Device::getDeviceType() );
}

template< typename Real,
+1 −1
Original line number Diff line number Diff line
@@ -53,7 +53,7 @@ class SparseRow
      Index getLength() const;
      
      __cuda_callable__
      Index getNonZeroElementsCount() const;
      Index getNonZeroElementsCount( TNL::String deviceType ) const;

      void print( std::ostream& str ) const;

Loading