Loading src/core/cuda/cuda-reduction_impl.h +51 −46 Original line number Diff line number Diff line Loading @@ -293,8 +293,10 @@ typename Operation :: IndexType reduceOnCudaDevice( const Operation& operation, IndexType alignedBlockSize = 1; while( alignedBlockSize < blockSize. x ) alignedBlockSize <<= 1; blockSize. x = alignedBlockSize; //const IndexType numberOfBlocks = tnlCuda::getNumberOfBlocks( size / 2, blockSize.x ); gridSize. x = Min( ( IndexType ) ( size / blockSize. x + 1 ) / 2, desGridSize ); //gridSize. x = Min( ( IndexType ) ( size / blockSize. x + 1 ) / 2, desGridSize ); gridSize. x = Min( tnlCuda::getNumberOfBlocks( size / 2, blockSize.x ), desGridSize ); if( ! output && ! tnlArrayOperations< tnlCuda >::allocateMemory( output, :: Max( ( IndexType ) 1, size / desBlockSize ) ) ) Loading Loading @@ -347,6 +349,7 @@ typename Operation :: IndexType reduceOnCudaDevice( const Operation& operation, default: tnlAssert( false, cerr << "Block size is " << blockSize. x << " which is none of 1, 2, 4, 8, 16, 32, 64, 128, 256 or 512." ); } checkCudaDevice; return gridSize. x; } #endif Loading Loading @@ -402,6 +405,8 @@ bool reductionOnCudaDevice( const Operation& operation, deviceAux1, ( ResultType* ) 0, deviceAux2 ); if( ! checkCudaDevice ) return false; Swap( deviceAux1, deviceAux2 ); } Loading @@ -428,7 +433,7 @@ bool reductionOnCudaDevice( const Operation& operation, return false; if( deviceAux2 && ! tnlArrayOperations< tnlCuda >::freeMemory( deviceAux2 ) ) return false; return true; return checkCudaDevice; #else tnlCudaSupportMissingMessage;; return false; Loading src/core/cuda/reduction-operations.h +1 −1 Original line number Diff line number Diff line Loading @@ -954,7 +954,7 @@ class tnlParallelReductionScalarProduct const RealType* data3 ) const { return data1[ idx1 ] + ( data2[ idx2 ] * data2[ idx2] ) + ( data2[ idx2 ] * data3[ idx2] ) + ( data2[ idx3 ] * data3[ idx3] ); }; Loading src/matrices/tnlEllpackMatrix_impl.h +2 −0 Original line number Diff line number Diff line Loading @@ -460,6 +460,7 @@ typename tnlEllpackMatrix< Real, Device, Index >::MatrixRow tnlEllpackMatrix< Real, Device, Index >:: getRow( const IndexType rowIndex ) { //printf( "this->rowLengths = %d this = %p \n", this->rowLengths, this ); IndexType rowBegin = DeviceDependentCode::getRowBegin( *this, rowIndex ); return MatrixRow( &this->columnIndexes[ rowBegin ], &this->values[ rowBegin ], Loading @@ -477,6 +478,7 @@ const typename tnlEllpackMatrix< Real, Device, Index >::MatrixRow tnlEllpackMatrix< Real, Device, Index >:: getRow( const IndexType rowIndex ) const { //printf( "this->rowLengths = %d this = %p \n", this->rowLengths, this ); IndexType rowBegin = DeviceDependentCode::getRowBegin( *this, rowIndex ); return MatrixRow( &this->columnIndexes[ rowBegin ], &this->values[ rowBegin ], Loading src/matrices/tnlMatrix_impl.h +2 −1 Original line number Diff line number Diff line Loading @@ -266,6 +266,7 @@ void tnlMatrixVectorProductCuda( const Matrix& matrix, kernel_inVector, kernel_outVector, gridIdx ); checkCudaDevice; } tnlCuda::freeFromDevice( kernel_this ); tnlCuda::freeFromDevice( kernel_inVector ); Loading src/matrices/tnlSparseMatrixRow_impl.h +1 −0 Original line number Diff line number Diff line Loading @@ -77,6 +77,7 @@ setElement( const Index& elementIndex, tnlAssert( this->columns, ); tnlAssert( this->values, ); tnlAssert( this->step > 0,); //printf( "elementIndex = %d length = %d \n", elementIndex, this->length ); tnlAssert( elementIndex >= 0 && elementIndex < this->length, cerr << "elementIndex = " << elementIndex << " this->length = " << this->length ); Loading Loading
src/core/cuda/cuda-reduction_impl.h +51 −46 Original line number Diff line number Diff line Loading @@ -293,8 +293,10 @@ typename Operation :: IndexType reduceOnCudaDevice( const Operation& operation, IndexType alignedBlockSize = 1; while( alignedBlockSize < blockSize. x ) alignedBlockSize <<= 1; blockSize. x = alignedBlockSize; //const IndexType numberOfBlocks = tnlCuda::getNumberOfBlocks( size / 2, blockSize.x ); gridSize. x = Min( ( IndexType ) ( size / blockSize. x + 1 ) / 2, desGridSize ); //gridSize. x = Min( ( IndexType ) ( size / blockSize. x + 1 ) / 2, desGridSize ); gridSize. x = Min( tnlCuda::getNumberOfBlocks( size / 2, blockSize.x ), desGridSize ); if( ! output && ! tnlArrayOperations< tnlCuda >::allocateMemory( output, :: Max( ( IndexType ) 1, size / desBlockSize ) ) ) Loading Loading @@ -347,6 +349,7 @@ typename Operation :: IndexType reduceOnCudaDevice( const Operation& operation, default: tnlAssert( false, cerr << "Block size is " << blockSize. x << " which is none of 1, 2, 4, 8, 16, 32, 64, 128, 256 or 512." ); } checkCudaDevice; return gridSize. x; } #endif Loading Loading @@ -402,6 +405,8 @@ bool reductionOnCudaDevice( const Operation& operation, deviceAux1, ( ResultType* ) 0, deviceAux2 ); if( ! checkCudaDevice ) return false; Swap( deviceAux1, deviceAux2 ); } Loading @@ -428,7 +433,7 @@ bool reductionOnCudaDevice( const Operation& operation, return false; if( deviceAux2 && ! tnlArrayOperations< tnlCuda >::freeMemory( deviceAux2 ) ) return false; return true; return checkCudaDevice; #else tnlCudaSupportMissingMessage;; return false; Loading
src/core/cuda/reduction-operations.h +1 −1 Original line number Diff line number Diff line Loading @@ -954,7 +954,7 @@ class tnlParallelReductionScalarProduct const RealType* data3 ) const { return data1[ idx1 ] + ( data2[ idx2 ] * data2[ idx2] ) + ( data2[ idx2 ] * data3[ idx2] ) + ( data2[ idx3 ] * data3[ idx3] ); }; Loading
src/matrices/tnlEllpackMatrix_impl.h +2 −0 Original line number Diff line number Diff line Loading @@ -460,6 +460,7 @@ typename tnlEllpackMatrix< Real, Device, Index >::MatrixRow tnlEllpackMatrix< Real, Device, Index >:: getRow( const IndexType rowIndex ) { //printf( "this->rowLengths = %d this = %p \n", this->rowLengths, this ); IndexType rowBegin = DeviceDependentCode::getRowBegin( *this, rowIndex ); return MatrixRow( &this->columnIndexes[ rowBegin ], &this->values[ rowBegin ], Loading @@ -477,6 +478,7 @@ const typename tnlEllpackMatrix< Real, Device, Index >::MatrixRow tnlEllpackMatrix< Real, Device, Index >:: getRow( const IndexType rowIndex ) const { //printf( "this->rowLengths = %d this = %p \n", this->rowLengths, this ); IndexType rowBegin = DeviceDependentCode::getRowBegin( *this, rowIndex ); return MatrixRow( &this->columnIndexes[ rowBegin ], &this->values[ rowBegin ], Loading
src/matrices/tnlMatrix_impl.h +2 −1 Original line number Diff line number Diff line Loading @@ -266,6 +266,7 @@ void tnlMatrixVectorProductCuda( const Matrix& matrix, kernel_inVector, kernel_outVector, gridIdx ); checkCudaDevice; } tnlCuda::freeFromDevice( kernel_this ); tnlCuda::freeFromDevice( kernel_inVector ); Loading
src/matrices/tnlSparseMatrixRow_impl.h +1 −0 Original line number Diff line number Diff line Loading @@ -77,6 +77,7 @@ setElement( const Index& elementIndex, tnlAssert( this->columns, ); tnlAssert( this->values, ); tnlAssert( this->step > 0,); //printf( "elementIndex = %d length = %d \n", elementIndex, this->length ); tnlAssert( elementIndex >= 0 && elementIndex < this->length, cerr << "elementIndex = " << elementIndex << " this->length = " << this->length ); Loading