Commit 2b9409c8 authored by Tomáš Oberhuber's avatar Tomáš Oberhuber
Browse files

Fixes after rebase.

parent 9eb190f2
Loading
Loading
Loading
Loading
+19 −19
Original line number Diff line number Diff line
@@ -100,11 +100,11 @@ benchmarkSpMV( Benchmark & benchmark,
          return false;
      }
    
#ifdef HAVE_CUDA
    // cuSPARSE handle setup
    cusparseHandle_t cusparseHandle;
    cusparseCreate( &cusparseHandle );
    
#ifdef HAVE_CUDA
    // cuSPARSE (in TNL's CSR) only works for device, copy the matrix from host to device
    CSRdeviceMatrix = CSRhostMatrix;
    
@@ -185,9 +185,11 @@ benchmarkSpMV( Benchmark & benchmark,
    auto spmvCuda = [&]() {
       deviceMatrix.vectorProduct( deviceVector, deviceVector2 );
    };
#ifdef HAVE_CUDA
    auto spmvCusparse = [&]() {
        cusparseCSR.vectorProduct( deviceVector, deviceVector2 );
    };
#endif

    benchmark.setOperation( datasetSize );
    benchmark.time< Devices::Host >( reset, "CPU", spmvHost );
@@ -201,18 +203,6 @@ benchmarkSpMV( Benchmark & benchmark,
    // Copy the values
    resultHostVector2 = hostVector2;
    
#ifdef HAVE_CUDA
    benchmark.time< Devices::Cuda >( reset, "GPU", spmvCuda );

    // Initialize the device vector to be compared.
    //  (The values in deviceVector2 will be reset when spmvCusparse starts)
    HostVector resultDeviceVector2;
    resultDeviceVector2.setSize( deviceVector2.getSize() );
    resultDeviceVector2.setValue( 0.0 );
    
    resultDeviceVector2 = deviceVector2;
#endif
    
    // Setup cuSPARSE MetaData, since it has the same header as CSR, 
    //  and therefore will not get its own headers (rows, cols, speedup etc.) in log.
    //      * Not setting this up causes (among other undiscovered errors) the speedup from CPU to GPU on the input format to be overwritten.
@@ -225,6 +215,16 @@ benchmarkSpMV( Benchmark & benchmark,
       } ));
    
#ifdef HAVE_CUDA
    benchmark.time< Devices::Cuda >( reset, "GPU", spmvCuda );

    // Initialize the device vector to be compared.
    //  (The values in deviceVector2 will be reset when spmvCusparse starts)
    HostVector resultDeviceVector2;
    resultDeviceVector2.setSize( deviceVector2.getSize() );
    resultDeviceVector2.setValue( 0.0 );
    
    resultDeviceVector2 = deviceVector2;
    
    benchmark.time< Devices::Cuda >( reset, "GPU", spmvCusparse );
    
    HostVector resultcuSPARSEDeviceVector2;
@@ -232,7 +232,6 @@ benchmarkSpMV( Benchmark & benchmark,
    resultcuSPARSEDeviceVector2.setValue( 0.0 );
    
    resultcuSPARSEDeviceVector2 = deviceVector2;
 #endif
    
    // Difference between GPU (curent format) and GPU-cuSPARSE results
    Real cuSparseDifferenceAbsMax = resultDeviceVector2.differenceAbsMax( resultcuSPARSEDeviceVector2 );
@@ -243,6 +242,7 @@ benchmarkSpMV( Benchmark & benchmark,
    
    char *GPUcuSparse_absMax = &GPUxGPUcuSparse_resultDifferenceAbsMax[ 0u ];
    char *GPUcuSparse_lpNorm = &GPUxGPUcuSparse_resultDifferenceLpNorm[ 0u ];
 #endif
    
    
    // Difference between CPU and GPU results for the current format
@@ -277,7 +277,7 @@ benchmarkSpmvSynthetic( Benchmark & benchmark,
   bool result = true;
   result |= benchmarkSpMV< Real, Matrices::CSR >( benchmark, inputFileName, verboseMR );   
   result |= benchmarkSpMV< Real, Matrices::Ellpack >( benchmark, inputFileName, verboseMR );
   result |= benchmarkSpMV< Real, SlicedEllpack >( benchmark, inputFileName, verboseMR );
   result |= benchmarkSpMV< Real, Matrices::SlicedEllpack >( benchmark, inputFileName, verboseMR );
   result |= benchmarkSpMV< Real, Matrices::ChunkedEllpack >( benchmark, inputFileName, verboseMR );
   
   // AdEllpack is broken
+4 −4
Original line number Diff line number Diff line
@@ -195,7 +195,7 @@ public:
             typename = typename Enabler< Device2 >::type >
    AdEllpack& operator=( const AdEllpack< Real2, Device2, Index2 >& matrix );
    
    bool save( File& file ) const;
    void save( File& file ) const;

    void load( File& file );

@@ -207,13 +207,13 @@ public:

    bool balanceLoad( const RealType average,
                      ConstCompressedRowLengthsVectorView rowLengths,
                      warpList< ThisType >* list );
                      warpList< AdEllpack >* list );

    void computeWarps( const IndexType SMs,
                       const IndexType threadsPerSM,
                       warpList< ThisType >* list );
                       warpList< AdEllpack >* list );

    bool createArrays( warpList< ThisType >* list );
    bool createArrays( warpList< AdEllpack >* list );

    void performRowTest();

+6 −28
Original line number Diff line number Diff line
@@ -162,28 +162,6 @@ AdEllpack< Real, Device, Index >::AdEllpack()
warpSize( 32 )
{}

template< typename Real,
          typename Device,
          typename Index >
String AdEllpack< Real, Device, Index >::getTypeVirtual() const
{
    return this->getType();
}

template< typename Real,
          typename Device,
          typename Index >
String AdEllpack< Real, Device, Index >::getType()
{
    return String( "Matrices::AdEllpack< ") +
           String( TNL::getType< Real >() ) +
           String( ", " ) +
           String( Device::getDeviceType() ) +
           String( ", " ) +
           String( TNL::getType< Index >() ) +
           String( " >" );
}

template< typename Real,
          typename Device,
          typename Index >
@@ -204,7 +182,7 @@ setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths )
        average /= ( RealType ) this->getRows();
        this->totalLoad = average;

        warpList< ThisType >* list = new warpList< ThisType >();
        warpList< AdEllpack >* list = new warpList< AdEllpack >();

        if( !this->balanceLoad( average, rowLengths, list ) )
            throw 0; // TODO: Make better exception
@@ -766,7 +744,7 @@ template< typename Real,
          typename Index >
bool AdEllpack< Real, Device, Index >::balanceLoad( const RealType average,
                                                    ConstCompressedRowLengthsVectorView rowLengths,
                                                    warpList< ThisType >* list )
                                                    warpList< AdEllpack >* list )
{
    IndexType offset, rowOffset, localLoad, reduceMap[ 32 ];

@@ -882,10 +860,10 @@ template< typename Real,
          typename Index >
void AdEllpack< Real, Device, Index >::computeWarps( const IndexType SMs,
                                                     const IndexType threadsPerSM,
                                                     warpList< ThisType >* list )
                                                     warpList< AdEllpack >* list )
{    
    IndexType averageLoad = 0;
    warpInfo< ThisType >* temp = list->getHead()->next;
    warpInfo< AdEllpack >* temp = list->getHead()->next;
    
    while( temp/*->next*/ != list->getTail() )
    {
@@ -918,7 +896,7 @@ void AdEllpack< Real, Device, Index >::computeWarps( const IndexType SMs,
template< typename Real,
          typename Device,
          typename Index >
bool AdEllpack< Real, Device, Index >::createArrays( warpList< ThisType >* list )
bool AdEllpack< Real, Device, Index >::createArrays( warpList< AdEllpack >* list )
{
    IndexType length = list->getNumberOfWarps();

@@ -928,7 +906,7 @@ bool AdEllpack< Real, Device, Index >::createArrays( warpList< ThisType >* list
    this->reduceMap.setSize( length * this->warpSize );

    IndexType iteration = 0;
    warpInfo< ThisType >* warp = list->getHead()->next;
    warpInfo< AdEllpack >* warp = list->getHead()->next;
    while( warp != list->getTail() )
    {
        this->offset.setElement( iteration, warp->offset );
+4 −31
Original line number Diff line number Diff line
@@ -45,28 +45,6 @@ BiEllpack< Real, Device, Index >::BiEllpack()
  logWarpSize( 5 )
{}

template< typename Real,
	  typename Device,
	  typename Index >
String BiEllpack< Real, Device, Index >::getType()
{
	return String( "Matrices::BiEllpack< ") +
	       String( TNL::getType< Real >() ) +
	       String( ", " ) +
	       String( Device :: getDeviceType() ) +
               String( ", " ) +
               String( TNL::getType< Index >() ) +
	       String( " >" );
}

template< typename Real,
	  typename Device,
	  typename Index >
String BiEllpack< Real, Device, Index >::getTypeVirtual() const
{
    return this->getType();
}

template< typename Real,
	  typename Device,
	  typename Index >
@@ -715,18 +693,13 @@ BiEllpack< Real, Device, Index >::operator=( const BiEllpack< Real2, Device2, In
   this->virtualRows = matrix.virtualRows;
   this->rowPermArray = matrix.rowPermArray;
   this->groupPointers = matrix.groupPointers;
   
   if( std::is_same< Device, Devices::MIC >::value ) {
      throw std::runtime_error("Not Implemented yet for MIC");
   }
   
   return *this;
}

template< typename Real,
		  typename Device,
		  typename Index >
bool BiEllpack< Real, Device, Index >::save( File& file ) const
void BiEllpack< Real, Device, Index >::save( File& file ) const
{
   Sparse< Real, Device, Index >::save( file );
   file << this->groupPointers << this->rowPermArray;
@@ -735,7 +708,7 @@ bool BiEllpack< Real, Device, Index >::save( File& file ) const
template< typename Real,
		  typename Device,
		  typename Index >
bool BiEllpack< Real, Device, Index >::load( File& file )
void BiEllpack< Real, Device, Index >::load( File& file )
{
   Sparse< Real, Device, Index >::load( file );
   file >> this->groupPointers >> this->rowPermArray;
@@ -744,7 +717,7 @@ bool BiEllpack< Real, Device, Index >::load( File& file )
template< typename Real,
		  typename Device,
		  typename Index >
bool BiEllpack< Real, Device, Index >::save( const String& fileName ) const
void BiEllpack< Real, Device, Index >::save( const String& fileName ) const
{
   Object::save( fileName );
}
@@ -752,7 +725,7 @@ bool BiEllpack< Real, Device, Index >::save( const String& fileName ) const
template< typename Real,
		  typename Device,
		  typename Index >
bool BiEllpack< Real, Device, Index >::load( const String& fileName )
void BiEllpack< Real, Device, Index >::load( const String& fileName )
{
   Object::load( fileName );
}
+1 −3
Original line number Diff line number Diff line
@@ -45,9 +45,7 @@ String CSR< Real, Device, Index >::getSerializationType()
{
   return String( "Matrices::CSR< ") +
          TNL::getType< Real>() +
          String( ", " ) +
          String( Device :: getDeviceType() ) +
          String( ", " ) +
          ", [any_device], " +
          String( TNL::getType< Index >() ) +
          String( " >" );
}
Loading