Commit 5d4a1869 authored by Tomáš Oberhuber's avatar Tomáš Oberhuber
Browse files

Adding OpenMP to vector operations.

parent 31ee230c
Loading
Loading
Loading
Loading
+63 −26
Original line number Diff line number Diff line
@@ -99,10 +99,13 @@ getVectorL1Norm( const Vector& v )
   typedef typename Vector :: IndexType Index;
   tnlAssert( v. getSize() > 0, );

   Real result = fabs( v. getElement( 0 ) );
   Real result( 0.0 );
   const Index n = v. getSize();
   for( Index i = 1; i < n; i ++ )
      result += fabs( v. getElement( i ) );
#ifdef HAVE_OPENMP
#pragma omp parallel for reduction(+:result) if( n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold
#endif           
   for( Index i = 0; i < n; i ++ )
      result += fabs( v[ i ] );
   return result;
}

@@ -114,12 +117,14 @@ getVectorL2Norm( const Vector& v )
   typedef typename Vector :: RealType Real;
   typedef typename Vector :: IndexType Index;
   tnlAssert( v. getSize() > 0, );
   Real result = v. getElement( 0 );
   result *= result;
   Real result( 0.0 );
   const Index n = v. getSize();
   for( Index i = 1; i < n; i ++ )
#ifdef HAVE_OPENMP
#pragma omp parallel for reduction(+:result) if( n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold
#endif           
   for( Index i = 0; i < n; i ++ )
   {
      const Real aux = v. getElement( i );
      const Real& aux = v[ i ];
      result += aux * aux;
   }
   return sqrt( result );
@@ -141,10 +146,13 @@ getVectorLpNorm( const Vector& v,
   if( p == 2.0 )
      return getVectorL2Norm( v );

   Real result = pow( fabs( v. getElement( 0 ) ), p );
   Real result( 0.0 );
   const Index n = v. getSize();
   for( Index i = 1; i < n; i ++ )
      result += pow( fabs( v. getElement( i ) ), p );
#ifdef HAVE_OPENMP
#pragma omp parallel for reduction(+:result) if( n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold
#endif           
   for( Index i = 0; i < n; i ++ )
      result += pow( fabs( v[ i ] ), p );
   return pow( result, 1.0 / p );
}

@@ -155,10 +163,13 @@ typename Vector :: RealType tnlVectorOperations< tnlHost > :: getVectorSum( cons
   typedef typename Vector :: IndexType Index;
   tnlAssert( v. getSize() > 0, );

   Real result = v. getElement( 0 );
   Real result( 0.0 );
   const Index n = v. getSize();
   for( Index i = 1; i < n; i ++ )
      result += v. getElement( i );
#ifdef HAVE_OPENMP
#pragma omp parallel for reduction(+:result) if( n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold
#endif        
   for( Index i = 0; i < n; i ++ )
      result += v[ i ];
   return result;
}

@@ -221,10 +232,10 @@ typename Vector1 :: RealType tnlVectorOperations< tnlHost > :: getVectorDifferen
   tnlAssert( v1. getSize() > 0, );
   tnlAssert( v1. getSize() == v2. getSize(), );

   Real result = fabs( v1. getElement( 0 ) - v2. getElement( 0 ) );
   Real result = fabs( v1[ 0 ] - v2[ 0 ] );
   const Index n = v1. getSize();
   for( Index i = 1; i < n; i ++ )
      result =  Min( result, ( Real ) fabs( v1. getElement( i ) - v2. getElement( i ) ) );
      result =  Min( result, ( Real ) fabs( v1[ i ] - v2[ i ] ) );
   return result;
}

@@ -240,10 +251,13 @@ getVectorDifferenceL1Norm( const Vector1& v1,
   tnlAssert( v1. getSize() > 0, );
   tnlAssert( v1. getSize() == v2. getSize(), );

   Real result = fabs( v1. getElement( 0 ) - v2. getElement( 0 ) );
   Real result( 0.0 );
   const Index n = v1. getSize();
   for( Index i = 1; i < n; i ++ )
      result += fabs( v1. getElement( i ) - v2. getElement( i ) );
#ifdef HAVE_OPENMP
#pragma omp parallel for reduction(+:result) if( n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold
#endif        
   for( Index i = 0; i < n; i ++ )
      result += fabs( v1[ i ] - v2[ i ] );
   return result;
}

@@ -259,12 +273,14 @@ getVectorDifferenceL2Norm( const Vector1& v1,
   tnlAssert( v1. getSize() > 0, );
   tnlAssert( v1. getSize() == v2. getSize(), );

   Real result = fabs( v1. getElement( 0 ) - v2. getElement( 0 ) );
   result *= result;
   Real result( 0.0 );
   const Index n = v1. getSize();
   for( Index i = 1; i < n; i ++ )
#ifdef HAVE_OPENMP
#pragma omp parallel for reduction(+:result) if( n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold
#endif        
   for( Index i = 0; i < n; i ++ )
   {
      Real aux = fabs( v1. getElement( i ) - v2. getElement( i ) );
      Real aux = fabs( v1[ i ] - v2[ i ] );
      result += aux * aux;
   }
   return sqrt( result );
@@ -291,9 +307,12 @@ getVectorDifferenceLpNorm( const Vector1& v1,
   if( p == 2.0 )
      return getVectorDifferenceL2Norm( v1, v2 );

   Real result = pow( fabs( v1. getElement( 0 ) - v2. getElement( 0 ) ), p );
   Real result( 0.0 );
   const Index n = v1. getSize();
   for( Index i = 1; i < n; i ++ )
#ifdef HAVE_OPENMP
#pragma omp parallel for reduction(+:result) if( n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold
#endif        
   for( Index i = 0; i < n; i ++ )
      result += pow( fabs( v1. getElement( i ) - v2. getElement( i ) ), p );
   return pow( result, 1.0 / p );
}
@@ -308,9 +327,12 @@ typename Vector1::RealType tnlVectorOperations< tnlHost > :: getVectorDifference
   tnlAssert( v1. getSize() > 0, );
   tnlAssert( v1. getSize() == v2. getSize(), );

   Real result = v1. getElement( 0 ) - v2. getElement( 0 );
   Real result( 0.0 );
   const Index n = v1. getSize();
   for( Index i = 1; i < n; i ++ )
#ifdef HAVE_OPENMP
#pragma omp parallel for reduction(+:result) if( n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold
#endif        
   for( Index i = 0; i < n; i ++ )
      result += v1. getElement( i ) - v2. getElement( i );
   return result;
}
@@ -326,6 +348,9 @@ void tnlVectorOperations< tnlHost > :: vectorScalarMultiplication( Vector& v,
   tnlAssert( v. getSize() > 0, );

   const Index n = v. getSize();
#ifdef HAVE_OPENMP
#pragma omp parallel for if( n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold
#endif        
   for( Index i = 0; i < n; i ++ )
      v[ i ] *= alpha;
}
@@ -365,9 +390,15 @@ void tnlVectorOperations< tnlHost > :: addVector( Vector1& y,

   const Index n = y. getSize();
   if( thisMultiplicator == 1.0 )
#ifdef HAVE_OPENMP
#pragma omp parallel for if( n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold
#endif           
      for( Index i = 0; i < n; i ++ )
         y[ i ] += alpha * x[ i ];
   else
#ifdef HAVE_OPENMP
#pragma omp parallel for if( n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold
#endif           
      for( Index i = 0; i < n; i ++ )
         y[ i ] = thisMultiplicator * y[ i ] + alpha * x[ i ];
}
@@ -393,9 +424,15 @@ addVectors( Vector1& v,
   
   const Index n = v.getSize();
   if( thisMultiplicator == 1.0 )
#ifdef HAVE_OPENMP
#pragma omp parallel for if( n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold
#endif           
      for( Index i = 0; i < n; i ++ )
         v[ i ] += multiplicator1 * v1[ i ] + multiplicator2 * v2[ i ];
   else
#ifdef HAVE_OPENMP
#pragma omp parallel for if( n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold
#endif           
      for( Index i = 0; i < n; i ++ )
         v[ i ] = thisMultiplicator * v[ i ] * multiplicator1 * v1[ i ] + multiplicator2 * v2[ i ];
}