Fixing scalar product and Lp norm in CUDA. (9b588df4) · Commits · TNL / tnl-dev

src/core/cuda/cuda-reduction_impl.h

+51 −46

Original line number	Diff line number	Diff line
		@@ -293,8 +293,10 @@ typename Operation :: IndexType reduceOnCudaDevice( const Operation& operation,
		IndexType alignedBlockSize = 1;
		while( alignedBlockSize < blockSize. x ) alignedBlockSize <<= 1;
		blockSize. x = alignedBlockSize;
		//const IndexType numberOfBlocks = tnlCuda::getNumberOfBlocks( size / 2, blockSize.x );

		gridSize. x = Min( ( IndexType ) ( size / blockSize. x + 1 ) / 2, desGridSize );
		//gridSize. x = Min( ( IndexType ) ( size / blockSize. x + 1 ) / 2, desGridSize );
		gridSize. x = Min( tnlCuda::getNumberOfBlocks( size / 2, blockSize.x ), desGridSize );

		if( ! output &&
		! tnlArrayOperations< tnlCuda >::allocateMemory( output, :: Max( ( IndexType ) 1, size / desBlockSize ) ) )
		@@ -347,6 +349,7 @@ typename Operation :: IndexType reduceOnCudaDevice( const Operation& operation,
		default:
		tnlAssert( false, cerr << "Block size is " << blockSize. x << " which is none of 1, 2, 4, 8, 16, 32, 64, 128, 256 or 512." );
		}
		checkCudaDevice;
		return gridSize. x;
		}
		#endif
		@@ -402,6 +405,8 @@ bool reductionOnCudaDevice( const Operation& operation,
		deviceAux1,
		( ResultType* ) 0,
		deviceAux2 );
		if( ! checkCudaDevice )
		return false;
		Swap( deviceAux1, deviceAux2 );
		}

		@@ -428,7 +433,7 @@ bool reductionOnCudaDevice( const Operation& operation,
		return false;
		if( deviceAux2 && ! tnlArrayOperations< tnlCuda >::freeMemory( deviceAux2 ) )
		return false;
		return true;
		return checkCudaDevice;
		#else
		tnlCudaSupportMissingMessage;;
		return false;

+1 −1

Original line number	Diff line number	Diff line
		@@ -954,7 +954,7 @@ class tnlParallelReductionScalarProduct
		const RealType* data3 ) const
		{
		return data1[ idx1 ] +
		( data2[ idx2 ] * data2[ idx2] ) +
		( data2[ idx2 ] * data3[ idx2] ) +
		( data2[ idx3 ] * data3[ idx3] );
		};

+2 −0

Original line number	Diff line number	Diff line
		@@ -460,6 +460,7 @@ typename tnlEllpackMatrix< Real, Device, Index >::MatrixRow
		tnlEllpackMatrix< Real, Device, Index >::
		getRow( const IndexType rowIndex )
		{
		//printf( "this->rowLengths = %d this = %p \n", this->rowLengths, this );
		IndexType rowBegin = DeviceDependentCode::getRowBegin( *this, rowIndex );
		return MatrixRow( &this->columnIndexes[ rowBegin ],
		&this->values[ rowBegin ],
		@@ -477,6 +478,7 @@ const typename tnlEllpackMatrix< Real, Device, Index >::MatrixRow
		tnlEllpackMatrix< Real, Device, Index >::
		getRow( const IndexType rowIndex ) const
		{
		//printf( "this->rowLengths = %d this = %p \n", this->rowLengths, this );
		IndexType rowBegin = DeviceDependentCode::getRowBegin( *this, rowIndex );
		return MatrixRow( &this->columnIndexes[ rowBegin ],
		&this->values[ rowBegin ],

+2 −1

Original line number	Diff line number	Diff line
		@@ -266,6 +266,7 @@ void tnlMatrixVectorProductCuda( const Matrix& matrix,
		kernel_inVector,
		kernel_outVector,
		gridIdx );
		checkCudaDevice;
		}
		tnlCuda::freeFromDevice( kernel_this );
		tnlCuda::freeFromDevice( kernel_inVector );

+1 −0

Original line number	Diff line number	Diff line
		@@ -77,6 +77,7 @@ setElement( const Index& elementIndex,
		tnlAssert( this->columns, );
		tnlAssert( this->values, );
		tnlAssert( this->step > 0,);
		//printf( "elementIndex = %d length = %d \n", elementIndex, this->length );
		tnlAssert( elementIndex >= 0 && elementIndex < this->length,
		cerr << "elementIndex = " << elementIndex << " this->length = " << this->length );