Commit c22813ae authored by Tomáš Oberhuber's avatar Tomáš Oberhuber
Browse files

Merge branch 'matrices' into 'develop'

Matrices

See merge request !16
parents a6f3ee4d 9cf89e17
Loading
Loading
Loading
Loading
+17 −6
Original line number Diff line number Diff line
@@ -13,6 +13,9 @@
#include <TNL/Matrices/Sparse.h>
#include <TNL/Containers/Vector.h>

#include <TNL/Devices/Cuda.h>
#include <TNL/Exceptions/CudaBadAlloc.h>

namespace TNL {
namespace Matrices {
   
@@ -41,17 +44,21 @@ private:

public:

   typedef Real RealType;
   typedef Device DeviceType;
   typedef Index IndexType;
   using RealType = Real;
   using DeviceType = Device;
   using IndexType = Index;
   typedef typename Sparse< RealType, DeviceType, IndexType >:: CompressedRowLengthsVector CompressedRowLengthsVector;
   typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView;
   typedef CSR< Real, Device, Index > ThisType;
   typedef CSR< Real, Devices::Host, Index > HostType;
   typedef CSR< Real, Devices::Cuda, Index > CudaType;
   typedef Sparse< Real, Device, Index > BaseType;
   typedef typename BaseType::MatrixRow MatrixRow;
   typedef SparseRow< const RealType, const IndexType > ConstMatrixRow;
   //typedef typename BaseType::MatrixRow MatrixRow;
   
   using MatrixRow = typename BaseType::MatrixRow;
   using ConstMatrixRow = typename BaseType::ConstMatrixRow;
   //using typename BaseType::ConstMatrixRow;
   //typedef SparseRow< const RealType, const IndexType > ConstMatrixRow;


   enum SPMVCudaKernel { scalar, vector, hybrid };
@@ -76,6 +83,10 @@ public:
   __cuda_callable__
   IndexType getRowLengthFast( const IndexType row ) const;
   
   IndexType getNonZeroRowLength( const IndexType row ) const;
   
   IndexType getNonZeroRowLengthFast( const IndexType row ) const;
   
   template< typename Real2, typename Device2, typename Index2 >
   void setLike( const CSR< Real2, Device2, Index2 >& matrix );

+60 −6
Original line number Diff line number Diff line
@@ -131,6 +131,60 @@ Index CSR< Real, Device, Index >::getRowLengthFast( const IndexType row ) const
   return this->rowPointers[ row + 1 ] - this->rowPointers[ row ];
}

template< typename Real,
          typename Device,
          typename Index >
Index CSR< Real, Device, Index >::getNonZeroRowLength( const IndexType row ) const
{
    // TODO: Fix/Implement
    TNL_ASSERT( false, std::cerr << "TODO: Fix/Implement" );
    return 0;
//    if( std::is_same< DeviceType, Devices::Host >::value )
//    {
//       ConstMatrixRow matrixRow = this->getRow( row );
//       return matrixRow.getNonZeroElementsCount();
//    }
//    if( std::is_same< DeviceType, Devices::Cuda >::value )
//    {
//       IndexType *cols = new IndexType[4];
//       RealType *vals = new RealType[4];
//       for( int i = 0; i < 4; i++ )
//       {
//           cols[i] = i;
//           vals[i] = 1.0;
//       }
//       ConstMatrixRow matrixRow(cols, vals, 4, 1);
// //      ConstMatrixRow matrixRow = this->getRow( row );// If the program even compiles, this line fails because a segfault is thrown on the first line of getRow()
//       // WHEN debugging with GDB:
//       //  (gdb) p this->rowPointers[0]
//       //    Could not find operator[].
//       //  (gdb) p rowPointers.getElement(0)
//       //    Attempt to take address of value not located in memory.
//       IndexType resultHost ( 0 );
//       IndexType* resultCuda = Devices::Cuda::passToDevice( resultHost );
//       // PROBLEM: If the second parameter of getNonZeroRowLengthCudaKernel is '&resultCuda', the following issue is thrown:
//       //          'error: no instance of function template "TNL::Matrices::getNonZeroRowLengthCudaKernel" matches the argument list'
//       TNL::Matrices::getNonZeroRowLengthCudaKernel< ConstMatrixRow, IndexType ><<< 1, 1 >>>( matrixRow, resultCuda ); // matrixRow works fine, tested them both separately
//       delete []cols;
//       delete []vals;
//       std::cout << "Checkpoint BEFORE passFromDevice" << std::endl;
//       resultHost = Devices::Cuda::passFromDevice( resultCuda ); // This causes a crash: Illegal memory address in Cuda_impl.h at TNL_CHECK_CUDA_DEVICE
//       std::cout << "Checkpoint AFTER passFromDevice" << std::endl;
//       Devices::Cuda::freeFromDevice( resultCuda );
//       return resultHost;
//   }
}

template< typename Real,
          typename Device,
          typename Index >
__cuda_callable__
Index CSR< Real, Device, Index >::getNonZeroRowLengthFast( const IndexType row ) const
{  
   ConstMatrixRow matrixRow = this->getRow( row );
   return matrixRow.getNonZeroElementsCount();
}

template< typename Real,
          typename Device,
          typename Index >
+2 −0
Original line number Diff line number Diff line
@@ -105,6 +105,8 @@ public:
   __cuda_callable__
   IndexType getRowLengthFast( const IndexType row ) const;
   
   IndexType getNonZeroRowLength( const IndexType row ) const;

   template< typename Real2, typename Device2, typename Index2 >
   void setLike( const ChunkedEllpack< Real2, Device2, Index2 >& matrix );

+58 −8
Original line number Diff line number Diff line
@@ -179,9 +179,27 @@ bool ChunkedEllpack< Real, Device, Index >::setSlice( ConstCompressedRowLengthsV
    */
   IndexType maxChunkInSlice( 0 );
   for( IndexType i = sliceBegin; i < sliceEnd; i++ )
   {       
//       ALL OF THE FOLLOWING std::couts are for troubleshooting purposes, can be deleted.
//       std::cout << "Troubleshooting invalid ceil operation: " << std::endl;
//       std::cout << "maxChunkInSlice = " << maxChunkInSlice << std::endl;
//       std::cout << "( RealType ) rowLengths[ i ] = " <<  ( RealType ) rowLengths[ i ] << std::endl;
//       std::cout << "( RealType ) this->rowToChunkMapping[ i ] = " <<  ( RealType ) this->rowToChunkMapping[ i ] << std::endl;
//       std::cout << " ceil( RealType / RealType ) = " << ceil( ( RealType ) rowLengths[ i ] / ( RealType ) this->rowToChunkMapping[ i ] ) << std::endl;
//       std::cout << "( int ) rowLengths[ i ] = " <<  ( int ) rowLengths[ i ] << std::endl;
//       std::cout << "( int ) this->rowToChunkMapping[ i ] = " <<  ( int ) this->rowToChunkMapping[ i ] << std::endl;
//       std::cout << " ceil( int / int ) = " << ceil( ( int ) rowLengths[ i ] / ( int ) this->rowToChunkMapping[ i ] ) << std::endl;
//       std::cout << "( float ) rowLengths[ i ] = " <<  ( float ) rowLengths[ i ] << std::endl;
//       std::cout << "( float ) this->rowToChunkMapping[ i ] = " <<  ( float ) this->rowToChunkMapping[ i ] << std::endl;
//       std::cout << " ceil( float / float ) = " << ceil( ( float ) rowLengths[ i ] / ( float ) this->rowToChunkMapping[ i ] ) << std::endl;
//       The ceil function doesn't work when rowLengths and the other this.->... is 
//       typecasted into ( RealType ), because when RealType is int, it will perform 
//       an integer division and return the int as a double, which in this case 
//       will be zero and make the assertion fail ( https://stackoverflow.com/questions/33273359/in-c-using-the-ceil-a-division-is-not-working ).
//       To fix this, typecast them to ( float ), instead of ( RealType )
       maxChunkInSlice = max( maxChunkInSlice,
                          ceil( ( RealType ) rowLengths[ i ] /
                                ( RealType ) this->rowToChunkMapping[ i ] ) );
                          roundUpDivision( rowLengths[ i ], this->rowToChunkMapping[ i ] ) );
   }
      TNL_ASSERT( maxChunkInSlice > 0,
              std::cerr << " maxChunkInSlice = " << maxChunkInSlice << std::endl );

@@ -232,6 +250,13 @@ void ChunkedEllpack< Real, Device, Index >::setCompressedRowLengths( ConstCompre
      this->rowPointers.computePrefixSum();
   }
   
//   std::cout << "\ngetRowLength after first if: " << std::endl;
//   for( IndexType i = 0; i < rowLengths.getSize(); i++ )
//   {
//       std::cout << getRowLength( i ) << std::endl;
//   }
//   std::cout << "\n";

   if( std::is_same< Device, Devices::Cuda >::value )
   {
      ChunkedEllpack< RealType, Devices::Host, IndexType > hostMatrix;
@@ -255,6 +280,7 @@ void ChunkedEllpack< Real, Device, Index >::setCompressedRowLengths( ConstCompre
      elementsToAllocation = hostMatrix.values.getSize();
   }
   this->maxRowLength = rowLengths.max();
//   std::cout << "\nrowLengths.max() = " << rowLengths.max() << std::endl;
   Sparse< Real, Device, Index >::allocateMatrixElements( elementsToAllocation );
}

@@ -281,6 +307,30 @@ Index ChunkedEllpack< Real, Device, Index >::getRowLengthFast( const IndexType r
   return rowPointers[ row + 1 ] - rowPointers[ row ];
}

template< typename Real,
          typename Device,
          typename Index >
Index ChunkedEllpack< Real, Device, Index >::getNonZeroRowLength( const IndexType row ) const
{
    ConstMatrixRow matrixRow = getRow( row );
    return matrixRow.getNonZeroElementsCount( Device::getDeviceType() );
    
//    IndexType elementCount ( 0 );
//    ConstMatrixRow matrixRow = this->getRow( row );
//    
//    auto computeNonZeros = [&] /*__cuda_callable__*/ ( IndexType i ) mutable
//    {
//        std::cout << "matrixRow.getElementValue( i ) = " << matrixRow.getElementValue( i ) << " != 0.0" << std::endl;
//        if( matrixRow.getElementValue( i ) !=  0.0 )
//            elementCount++;
//        
//        std::cout << "End of lambda elementCount = " << elementCount << std::endl;
//    };
//   
//    ParallelFor< DeviceType >::exec( ( IndexType ) 0, matrixRow.getLength(), computeNonZeros );
//    return elementCount;
}

template< typename Real,
          typename Device,
          typename Index >
@@ -952,7 +1002,7 @@ getRow( const IndexType rowIndex ) const
{
   const IndexType rowOffset = this->rowPointers[ rowIndex ];
   const IndexType rowLength = this->rowPointers[ rowIndex + 1 ] - rowOffset;
   return MatrixRow( &this->columnIndexes[ rowOffset ],
   return ConstMatrixRow( &this->columnIndexes[ rowOffset ],
                          &this->values[ rowOffset ],
                          rowLength,
                          1 );
+2 −0
Original line number Diff line number Diff line
@@ -68,6 +68,8 @@ public:
   __cuda_callable__
   IndexType getRowLengthFast( const IndexType row ) const;
   
   IndexType getNonZeroRowLength( const IndexType row ) const;

   template< typename Real2, typename Device2, typename Index2 >
   void setLike( const Ellpack< Real2, Device2, Index2 >& matrix );

Loading