Fixes after rebase. (2b9409c8) · Commits · TNL / tnl-dev

src/Benchmarks/SpMV/spmv.h

+19 −19

Original line number	Diff line number	Diff line
		@@ -100,11 +100,11 @@ benchmarkSpMV( Benchmark & benchmark,
		return false;
		}

		#ifdef HAVE_CUDA
		// cuSPARSE handle setup
		cusparseHandle_t cusparseHandle;
		cusparseCreate( &cusparseHandle );

		#ifdef HAVE_CUDA
		// cuSPARSE (in TNL's CSR) only works for device, copy the matrix from host to device
		CSRdeviceMatrix = CSRhostMatrix;

		@@ -185,9 +185,11 @@ benchmarkSpMV( Benchmark & benchmark,
		auto spmvCuda = [&]() {
		deviceMatrix.vectorProduct( deviceVector, deviceVector2 );
		};
		#ifdef HAVE_CUDA
		auto spmvCusparse = [&]() {
		cusparseCSR.vectorProduct( deviceVector, deviceVector2 );
		};
		#endif

		benchmark.setOperation( datasetSize );
		benchmark.time< Devices::Host >( reset, "CPU", spmvHost );
		@@ -201,18 +203,6 @@ benchmarkSpMV( Benchmark & benchmark,
		// Copy the values
		resultHostVector2 = hostVector2;

		#ifdef HAVE_CUDA
		benchmark.time< Devices::Cuda >( reset, "GPU", spmvCuda );

		// Initialize the device vector to be compared.
		// (The values in deviceVector2 will be reset when spmvCusparse starts)
		HostVector resultDeviceVector2;
		resultDeviceVector2.setSize( deviceVector2.getSize() );
		resultDeviceVector2.setValue( 0.0 );

		resultDeviceVector2 = deviceVector2;
		#endif

		// Setup cuSPARSE MetaData, since it has the same header as CSR,
		// and therefore will not get its own headers (rows, cols, speedup etc.) in log.
		// * Not setting this up causes (among other undiscovered errors) the speedup from CPU to GPU on the input format to be overwritten.
		@@ -225,6 +215,16 @@ benchmarkSpMV( Benchmark & benchmark,
		} ));

		#ifdef HAVE_CUDA
		benchmark.time< Devices::Cuda >( reset, "GPU", spmvCuda );

		// Initialize the device vector to be compared.
		// (The values in deviceVector2 will be reset when spmvCusparse starts)
		HostVector resultDeviceVector2;
		resultDeviceVector2.setSize( deviceVector2.getSize() );
		resultDeviceVector2.setValue( 0.0 );

		resultDeviceVector2 = deviceVector2;

		benchmark.time< Devices::Cuda >( reset, "GPU", spmvCusparse );

		HostVector resultcuSPARSEDeviceVector2;
		@@ -232,7 +232,6 @@ benchmarkSpMV( Benchmark & benchmark,
		resultcuSPARSEDeviceVector2.setValue( 0.0 );

		resultcuSPARSEDeviceVector2 = deviceVector2;
		#endif

		// Difference between GPU (curent format) and GPU-cuSPARSE results
		Real cuSparseDifferenceAbsMax = resultDeviceVector2.differenceAbsMax( resultcuSPARSEDeviceVector2 );
		@@ -243,6 +242,7 @@ benchmarkSpMV( Benchmark & benchmark,

		char *GPUcuSparse_absMax = &GPUxGPUcuSparse_resultDifferenceAbsMax[ 0u ];
		char *GPUcuSparse_lpNorm = &GPUxGPUcuSparse_resultDifferenceLpNorm[ 0u ];
		#endif


		// Difference between CPU and GPU results for the current format
		@@ -277,7 +277,7 @@ benchmarkSpmvSynthetic( Benchmark & benchmark,
		bool result = true;
		result \|= benchmarkSpMV< Real, Matrices::CSR >( benchmark, inputFileName, verboseMR );
		result \|= benchmarkSpMV< Real, Matrices::Ellpack >( benchmark, inputFileName, verboseMR );
		result \|= benchmarkSpMV< Real, SlicedEllpack >( benchmark, inputFileName, verboseMR );
		result \|= benchmarkSpMV< Real, Matrices::SlicedEllpack >( benchmark, inputFileName, verboseMR );
		result \|= benchmarkSpMV< Real, Matrices::ChunkedEllpack >( benchmark, inputFileName, verboseMR );

		// AdEllpack is broken

src/TNL/Matrices/AdEllpack.h

+4 −4

Original line number	Diff line number	Diff line
		@@ -195,7 +195,7 @@ public:
		typename = typename Enabler< Device2 >::type >
		AdEllpack& operator=( const AdEllpack< Real2, Device2, Index2 >& matrix );

		bool save( File& file ) const;
		void save( File& file ) const;

		void load( File& file );

		@@ -207,13 +207,13 @@ public:

		bool balanceLoad( const RealType average,
		ConstCompressedRowLengthsVectorView rowLengths,
		warpList< ThisType >* list );
		warpList< AdEllpack >* list );

		void computeWarps( const IndexType SMs,
		const IndexType threadsPerSM,
		warpList< ThisType >* list );
		warpList< AdEllpack >* list );

		bool createArrays( warpList< ThisType >* list );
		bool createArrays( warpList< AdEllpack >* list );

		void performRowTest();

src/TNL/Matrices/AdEllpack_impl.h

+6 −28

Original line number	Diff line number	Diff line
		@@ -162,28 +162,6 @@ AdEllpack< Real, Device, Index >::AdEllpack()
		warpSize( 32 )
		{}

		template< typename Real,
		typename Device,
		typename Index >
		String AdEllpack< Real, Device, Index >::getTypeVirtual() const
		{
		return this->getType();
		}

		template< typename Real,
		typename Device,
		typename Index >
		String AdEllpack< Real, Device, Index >::getType()
		{
		return String( "Matrices::AdEllpack< ") +
		String( TNL::getType< Real >() ) +
		String( ", " ) +
		String( Device::getDeviceType() ) +
		String( ", " ) +
		String( TNL::getType< Index >() ) +
		String( " >" );
		}

		template< typename Real,
		typename Device,
		typename Index >
		@@ -204,7 +182,7 @@ setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths )
		average /= ( RealType ) this->getRows();
		this->totalLoad = average;

		warpList< ThisType >* list = new warpList< ThisType >();
		warpList< AdEllpack >* list = new warpList< AdEllpack >();

		if( !this->balanceLoad( average, rowLengths, list ) )
		throw 0; // TODO: Make better exception
		@@ -766,7 +744,7 @@ template< typename Real,
		typename Index >
		bool AdEllpack< Real, Device, Index >::balanceLoad( const RealType average,
		ConstCompressedRowLengthsVectorView rowLengths,
		warpList< ThisType >* list )
		warpList< AdEllpack >* list )
		{
		IndexType offset, rowOffset, localLoad, reduceMap[ 32 ];

		@@ -882,10 +860,10 @@ template< typename Real,
		typename Index >
		void AdEllpack< Real, Device, Index >::computeWarps( const IndexType SMs,
		const IndexType threadsPerSM,
		warpList< ThisType >* list )
		warpList< AdEllpack >* list )
		{
		IndexType averageLoad = 0;
		warpInfo< ThisType >* temp = list->getHead()->next;
		warpInfo< AdEllpack >* temp = list->getHead()->next;

		while( temp/->next/ != list->getTail() )
		{
		@@ -918,7 +896,7 @@ void AdEllpack< Real, Device, Index >::computeWarps( const IndexType SMs,
		template< typename Real,
		typename Device,
		typename Index >
		bool AdEllpack< Real, Device, Index >::createArrays( warpList< ThisType >* list )
		bool AdEllpack< Real, Device, Index >::createArrays( warpList< AdEllpack >* list )
		{
		IndexType length = list->getNumberOfWarps();

		@@ -928,7 +906,7 @@ bool AdEllpack< Real, Device, Index >::createArrays( warpList< ThisType >* list
		this->reduceMap.setSize( length * this->warpSize );

		IndexType iteration = 0;
		warpInfo< ThisType >* warp = list->getHead()->next;
		warpInfo< AdEllpack >* warp = list->getHead()->next;
		while( warp != list->getTail() )
		{
		this->offset.setElement( iteration, warp->offset );

src/TNL/Matrices/BiEllpack_impl.h

+4 −31

Original line number	Diff line number	Diff line
		@@ -45,28 +45,6 @@ BiEllpack< Real, Device, Index >::BiEllpack()
		logWarpSize( 5 )
		{}

		template< typename Real,
		typename Device,
		typename Index >
		String BiEllpack< Real, Device, Index >::getType()
		{
		return String( "Matrices::BiEllpack< ") +
		String( TNL::getType< Real >() ) +
		String( ", " ) +
		String( Device :: getDeviceType() ) +
		String( ", " ) +
		String( TNL::getType< Index >() ) +
		String( " >" );
		}

		template< typename Real,
		typename Device,
		typename Index >
		String BiEllpack< Real, Device, Index >::getTypeVirtual() const
		{
		return this->getType();
		}

		template< typename Real,
		typename Device,
		typename Index >
		@@ -715,18 +693,13 @@ BiEllpack< Real, Device, Index >::operator=( const BiEllpack< Real2, Device2, In
		this->virtualRows = matrix.virtualRows;
		this->rowPermArray = matrix.rowPermArray;
		this->groupPointers = matrix.groupPointers;

		if( std::is_same< Device, Devices::MIC >::value ) {
		throw std::runtime_error("Not Implemented yet for MIC");
		}

		return *this;
		}

		template< typename Real,
		typename Device,
		typename Index >
		bool BiEllpack< Real, Device, Index >::save( File& file ) const
		void BiEllpack< Real, Device, Index >::save( File& file ) const
		{
		Sparse< Real, Device, Index >::save( file );
		file << this->groupPointers << this->rowPermArray;
		@@ -735,7 +708,7 @@ bool BiEllpack< Real, Device, Index >::save( File& file ) const
		template< typename Real,
		typename Device,
		typename Index >
		bool BiEllpack< Real, Device, Index >::load( File& file )
		void BiEllpack< Real, Device, Index >::load( File& file )
		{
		Sparse< Real, Device, Index >::load( file );
		file >> this->groupPointers >> this->rowPermArray;
		@@ -744,7 +717,7 @@ bool BiEllpack< Real, Device, Index >::load( File& file )
		template< typename Real,
		typename Device,
		typename Index >
		bool BiEllpack< Real, Device, Index >::save( const String& fileName ) const
		void BiEllpack< Real, Device, Index >::save( const String& fileName ) const
		{
		Object::save( fileName );
		}
		@@ -752,7 +725,7 @@ bool BiEllpack< Real, Device, Index >::save( const String& fileName ) const
		template< typename Real,
		typename Device,
		typename Index >
		bool BiEllpack< Real, Device, Index >::load( const String& fileName )
		void BiEllpack< Real, Device, Index >::load( const String& fileName )
		{
		Object::load( fileName );
		}

src/TNL/Matrices/CSR_impl.h

+1 −3

Original line number	Diff line number	Diff line
		@@ -45,9 +45,7 @@ String CSR< Real, Device, Index >::getSerializationType()
		{
		return String( "Matrices::CSR< ") +
		TNL::getType< Real>() +
		String( ", " ) +
		String( Device :: getDeviceType() ) +
		String( ", " ) +
		", [any_device], " +
		String( TNL::getType< Index >() ) +
		String( " >" );
		}