Fixing bug in tnlVector::operator !=. (c2f4c4ab) · Commits · TNL / tnl-dev

src/core/vectors/tnlSharedVector_impl.h

+1 −1

Original line number	Diff line number	Diff line
		@@ -142,7 +142,7 @@ template< typename Real,
		template< typename Vector >
		bool tnlSharedVector< Real, Device, Index > :: operator != ( const Vector& vector ) const
		{
		return tnlSharedArray< Real, Device, Index > :: operator == ( vector );
		return tnlSharedArray< Real, Device, Index > :: operator != ( vector );
		}

		template< typename Real,

+1 −1

Original line number	Diff line number	Diff line
		@@ -127,7 +127,7 @@ template< typename Real,
		template< typename Vector >
		bool tnlVector< Real, Device, Index > :: operator != ( const Vector& vector ) const
		{
		return tnlArray< Real, Device, Index > :: operator == ( vector );
		return tnlArray< Real, Device, Index > :: operator != ( vector );
		}

		template< typename Real,

+25 −0

Original line number	Diff line number	Diff line
		@@ -863,6 +863,31 @@ class tnlSlicedEllpackMatrixDeviceDependentCode< tnlCuda >
		OutVector& outVector )
		{
		tnlMatrixVectorProductCuda( matrix, inVector, outVector );
		/*#ifdef HAVE_CUDA
		typedef tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize > Matrix;
		typedef typename Matrix::IndexType IndexType;
		Matrix* kernel_this = tnlCuda::passToDevice( matrix );
		InVector* kernel_inVector = tnlCuda::passToDevice( inVector );
		OutVector* kernel_outVector = tnlCuda::passToDevice( outVector );
		dim3 cudaBlockSize( 256 ), cudaGridSize( tnlCuda::getMaxGridSize() );
		const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x );
		const IndexType cudaGrids = roundUpDivision( cudaBlocks, tnlCuda::getMaxGridSize() );
		for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
		{
		if( gridIdx == cudaGrids - 1 )
		cudaGridSize.x = cudaBlocks % tnlCuda::getMaxGridSize();
		tnlMatrixVectorProductCudaKernel<<< cudaGridSize, cudaBlockSize >>>
		( kernel_this,
		kernel_inVector,
		kernel_outVector,
		gridIdx );
		checkCudaDevice;
		}
		tnlCuda::freeFromDevice( kernel_this );
		tnlCuda::freeFromDevice( kernel_inVector );
		tnlCuda::freeFromDevice( kernel_outVector );
		checkCudaDevice;
		#endif*/
		}

		};

+8 −0

Original line number	Diff line number	Diff line
		@@ -222,6 +222,8 @@ int main( int argc, char* argv[] )
		}
		}
		datasetSize = loops * elements * sizeof( double ) / oneGB;
		hostVector.setValue( 1.0 );
		deviceVector.setValue( 1.0 );
		cout << "Benchmarking SpMV on CPU: ";
		timer.reset();
		for( int i = 0; i < loops; i++ )
		@@ -236,6 +238,12 @@ int main( int argc, char* argv[] )
		for( int i = 0; i < loops; i++ )
		deviceMatrix.vectorProduct( deviceVector, deviceVector2 );
		timer.stop();
		//cout << hostVector2 << endl << deviceVector2 << endl;

		if( hostVector2 != deviceVector2 )
		{
		cerr << "Error in SliceEllpack Spmv kernel." << endl;
		}
		bandwidth = 2 * datasetSize / loops / timer.getTime();
		cout << timer.getTime() << " => " << bandwidth << " GB/s" << " speedup " << hostTime / timer.getTime() << endl;