Commit 9b588df4 authored by Tomáš Oberhuber's avatar Tomáš Oberhuber
Browse files

Fixing scalar product and Lp norm in CUDA.

parent 698dfcec
Loading
Loading
Loading
Loading
+51 −46
Original line number Diff line number Diff line
@@ -293,8 +293,10 @@ typename Operation :: IndexType reduceOnCudaDevice( const Operation& operation,
   IndexType alignedBlockSize = 1;
   while( alignedBlockSize < blockSize. x ) alignedBlockSize <<= 1;
   blockSize. x = alignedBlockSize;
   //const IndexType numberOfBlocks = tnlCuda::getNumberOfBlocks( size / 2, blockSize.x );
   
   gridSize. x = Min( ( IndexType ) ( size / blockSize. x + 1 ) / 2, desGridSize );
   //gridSize. x = Min( ( IndexType ) ( size / blockSize. x + 1 ) / 2, desGridSize );
   gridSize. x = Min( tnlCuda::getNumberOfBlocks( size / 2, blockSize.x ), desGridSize );

   if( ! output &&
       ! tnlArrayOperations< tnlCuda >::allocateMemory( output, :: Max( ( IndexType ) 1, size / desBlockSize ) ) )
@@ -347,6 +349,7 @@ typename Operation :: IndexType reduceOnCudaDevice( const Operation& operation,
      default:
         tnlAssert( false, cerr << "Block size is " << blockSize. x << " which is none of 1, 2, 4, 8, 16, 32, 64, 128, 256 or 512." );
   }
   checkCudaDevice;
   return gridSize. x;
}
#endif
@@ -402,6 +405,8 @@ bool reductionOnCudaDevice( const Operation& operation,
                                        deviceAux1,
                                        ( ResultType* ) 0,
                                        deviceAux2 );
      if( ! checkCudaDevice )
          return false;
      Swap( deviceAux1, deviceAux2 );
   }

@@ -428,7 +433,7 @@ bool reductionOnCudaDevice( const Operation& operation,
      return false;
   if( deviceAux2 && ! tnlArrayOperations< tnlCuda >::freeMemory( deviceAux2 ) )
      return false;
   return true;
   return checkCudaDevice;
#else
   tnlCudaSupportMissingMessage;;
   return false;
+1 −1
Original line number Diff line number Diff line
@@ -954,7 +954,7 @@ class tnlParallelReductionScalarProduct
                                                 const RealType* data3 ) const
   {
      return data1[ idx1 ] +
             ( data2[ idx2 ] * data2[ idx2] ) +
             ( data2[ idx2 ] * data3[ idx2] ) +
             ( data2[ idx3 ] * data3[ idx3] );
   };

+2 −0
Original line number Diff line number Diff line
@@ -460,6 +460,7 @@ typename tnlEllpackMatrix< Real, Device, Index >::MatrixRow
tnlEllpackMatrix< Real, Device, Index >::
getRow( const IndexType rowIndex )
{
   //printf( "this->rowLengths = %d this = %p \n", this->rowLengths, this );
   IndexType rowBegin = DeviceDependentCode::getRowBegin( *this, rowIndex );
   return MatrixRow( &this->columnIndexes[ rowBegin ],
                     &this->values[ rowBegin ],
@@ -477,6 +478,7 @@ const typename tnlEllpackMatrix< Real, Device, Index >::MatrixRow
tnlEllpackMatrix< Real, Device, Index >::
getRow( const IndexType rowIndex ) const
{
   //printf( "this->rowLengths = %d this = %p \n", this->rowLengths, this );
   IndexType rowBegin = DeviceDependentCode::getRowBegin( *this, rowIndex );
   return MatrixRow( &this->columnIndexes[ rowBegin ],
                     &this->values[ rowBegin ],
+2 −1
Original line number Diff line number Diff line
@@ -266,6 +266,7 @@ void tnlMatrixVectorProductCuda( const Matrix& matrix,
                                       kernel_inVector,
                                       kernel_outVector,
                                       gridIdx );
      checkCudaDevice;
   }
   tnlCuda::freeFromDevice( kernel_this );
   tnlCuda::freeFromDevice( kernel_inVector );
+1 −0
Original line number Diff line number Diff line
@@ -77,6 +77,7 @@ setElement( const Index& elementIndex,
   tnlAssert( this->columns, );
   tnlAssert( this->values, );
   tnlAssert( this->step > 0,);
   //printf( "elementIndex = %d length = %d \n", elementIndex, this->length );
   tnlAssert( elementIndex >= 0 && elementIndex < this->length,
              cerr << "elementIndex = " << elementIndex << " this->length = " << this->length );

Loading