From 5d4a186975bc9080a12e42432ee0ea743076eaa3 Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Sun, 13 Dec 2015 14:28:05 +0100
Subject: [PATCH] Adding OpenMP to vector operations.

---
 .../vectors/tnlVectorOperationsHost_impl.h    | 89 +++++++++++++------
 1 file changed, 63 insertions(+), 26 deletions(-)

diff --git a/src/core/vectors/tnlVectorOperationsHost_impl.h b/src/core/vectors/tnlVectorOperationsHost_impl.h
index 67ed621ecf..c508ff9204 100644
--- a/src/core/vectors/tnlVectorOperationsHost_impl.h
+++ b/src/core/vectors/tnlVectorOperationsHost_impl.h
@@ -99,10 +99,13 @@ getVectorL1Norm( const Vector& v )
    typedef typename Vector :: IndexType Index;
    tnlAssert( v. getSize() > 0, );
 
-   Real result = fabs( v. getElement( 0 ) );
+   Real result( 0.0 );
    const Index n = v. getSize();
-   for( Index i = 1; i < n; i ++ )
-      result += fabs( v. getElement( i ) );
+#ifdef HAVE_OPENMP
+#pragma omp parallel for reduction(+:result) if( n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold
+#endif           
+   for( Index i = 0; i < n; i ++ )
+      result += fabs( v[ i ] );
    return result;
 }
 
@@ -114,12 +117,14 @@ getVectorL2Norm( const Vector& v )
    typedef typename Vector :: RealType Real;
    typedef typename Vector :: IndexType Index;
    tnlAssert( v. getSize() > 0, );
-   Real result = v. getElement( 0 );
-   result *= result;
+   Real result( 0.0 );
    const Index n = v. getSize();
-   for( Index i = 1; i < n; i ++ )
+#ifdef HAVE_OPENMP
+#pragma omp parallel for reduction(+:result) if( n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold
+#endif           
+   for( Index i = 0; i < n; i ++ )
    {
-      const Real aux = v. getElement( i );
+      const Real& aux = v[ i ];
       result += aux * aux;
    }
    return sqrt( result );
@@ -141,10 +146,13 @@ getVectorLpNorm( const Vector& v,
    if( p == 2.0 )
       return getVectorL2Norm( v );
 
-   Real result = pow( fabs( v. getElement( 0 ) ), p );
+   Real result( 0.0 );
    const Index n = v. getSize();
-   for( Index i = 1; i < n; i ++ )
-      result += pow( fabs( v. getElement( i ) ), p );
+#ifdef HAVE_OPENMP
+#pragma omp parallel for reduction(+:result) if( n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold
+#endif           
+   for( Index i = 0; i < n; i ++ )
+      result += pow( fabs( v[ i ] ), p );
    return pow( result, 1.0 / p );
 }
 
@@ -155,10 +163,13 @@ typename Vector :: RealType tnlVectorOperations< tnlHost > :: getVectorSum( cons
    typedef typename Vector :: IndexType Index;
    tnlAssert( v. getSize() > 0, );
 
-   Real result = v. getElement( 0 );
+   Real result( 0.0 );
    const Index n = v. getSize();
-   for( Index i = 1; i < n; i ++ )
-      result += v. getElement( i );
+#ifdef HAVE_OPENMP
+#pragma omp parallel for reduction(+:result) if( n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold
+#endif        
+   for( Index i = 0; i < n; i ++ )
+      result += v[ i ];
    return result;
 }
 
@@ -221,10 +232,10 @@ typename Vector1 :: RealType tnlVectorOperations< tnlHost > :: getVectorDifferen
    tnlAssert( v1. getSize() > 0, );
    tnlAssert( v1. getSize() == v2. getSize(), );
 
-   Real result = fabs( v1. getElement( 0 ) - v2. getElement( 0 ) );
+   Real result = fabs( v1[ 0 ] - v2[ 0 ] );
    const Index n = v1. getSize();
    for( Index i = 1; i < n; i ++ )
-      result =  Min( result, ( Real ) fabs( v1. getElement( i ) - v2. getElement( i ) ) );
+      result =  Min( result, ( Real ) fabs( v1[ i ] - v2[ i ] ) );
    return result;
 }
 
@@ -240,10 +251,13 @@ getVectorDifferenceL1Norm( const Vector1& v1,
    tnlAssert( v1. getSize() > 0, );
    tnlAssert( v1. getSize() == v2. getSize(), );
 
-   Real result = fabs( v1. getElement( 0 ) - v2. getElement( 0 ) );
+   Real result( 0.0 );
    const Index n = v1. getSize();
-   for( Index i = 1; i < n; i ++ )
-      result += fabs( v1. getElement( i ) - v2. getElement( i ) );
+#ifdef HAVE_OPENMP
+#pragma omp parallel for reduction(+:result) if( n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold
+#endif        
+   for( Index i = 0; i < n; i ++ )
+      result += fabs( v1[ i ] - v2[ i ] );
    return result;
 }
 
@@ -259,12 +273,14 @@ getVectorDifferenceL2Norm( const Vector1& v1,
    tnlAssert( v1. getSize() > 0, );
    tnlAssert( v1. getSize() == v2. getSize(), );
 
-   Real result = fabs( v1. getElement( 0 ) - v2. getElement( 0 ) );
-   result *= result;
+   Real result( 0.0 );
    const Index n = v1. getSize();
-   for( Index i = 1; i < n; i ++ )
+#ifdef HAVE_OPENMP
+#pragma omp parallel for reduction(+:result) if( n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold
+#endif        
+   for( Index i = 0; i < n; i ++ )
    {
-      Real aux = fabs( v1. getElement( i ) - v2. getElement( i ) );
+      Real aux = fabs( v1[ i ] - v2[ i ] );
       result += aux * aux;
    }
    return sqrt( result );
@@ -291,9 +307,12 @@ getVectorDifferenceLpNorm( const Vector1& v1,
    if( p == 2.0 )
       return getVectorDifferenceL2Norm( v1, v2 );
 
-   Real result = pow( fabs( v1. getElement( 0 ) - v2. getElement( 0 ) ), p );
+   Real result( 0.0 );
    const Index n = v1. getSize();
-   for( Index i = 1; i < n; i ++ )
+#ifdef HAVE_OPENMP
+#pragma omp parallel for reduction(+:result) if( n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold
+#endif        
+   for( Index i = 0; i < n; i ++ )
       result += pow( fabs( v1. getElement( i ) - v2. getElement( i ) ), p );
    return pow( result, 1.0 / p );
 }
@@ -308,9 +327,12 @@ typename Vector1::RealType tnlVectorOperations< tnlHost > :: getVectorDifference
    tnlAssert( v1. getSize() > 0, );
    tnlAssert( v1. getSize() == v2. getSize(), );
 
-   Real result = v1. getElement( 0 ) - v2. getElement( 0 );
+   Real result( 0.0 );
    const Index n = v1. getSize();
-   for( Index i = 1; i < n; i ++ )
+#ifdef HAVE_OPENMP
+#pragma omp parallel for reduction(+:result) if( n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold
+#endif        
+   for( Index i = 0; i < n; i ++ )
       result += v1. getElement( i ) - v2. getElement( i );
    return result;
 }
@@ -326,6 +348,9 @@ void tnlVectorOperations< tnlHost > :: vectorScalarMultiplication( Vector& v,
    tnlAssert( v. getSize() > 0, );
 
    const Index n = v. getSize();
+#ifdef HAVE_OPENMP
+#pragma omp parallel for if( n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold
+#endif        
    for( Index i = 0; i < n; i ++ )
       v[ i ] *= alpha;
 }
@@ -365,9 +390,15 @@ void tnlVectorOperations< tnlHost > :: addVector( Vector1& y,
 
    const Index n = y. getSize();
    if( thisMultiplicator == 1.0 )
+#ifdef HAVE_OPENMP
+#pragma omp parallel for if( n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold
+#endif           
       for( Index i = 0; i < n; i ++ )
          y[ i ] += alpha * x[ i ];
    else
+#ifdef HAVE_OPENMP
+#pragma omp parallel for if( n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold
+#endif           
       for( Index i = 0; i < n; i ++ )
          y[ i ] = thisMultiplicator * y[ i ] + alpha * x[ i ];
 }
@@ -393,9 +424,15 @@ addVectors( Vector1& v,
    
    const Index n = v.getSize();
    if( thisMultiplicator == 1.0 )
+#ifdef HAVE_OPENMP
+#pragma omp parallel for if( n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold
+#endif           
       for( Index i = 0; i < n; i ++ )
          v[ i ] += multiplicator1 * v1[ i ] + multiplicator2 * v2[ i ];
    else
+#ifdef HAVE_OPENMP
+#pragma omp parallel for if( n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold
+#endif           
       for( Index i = 0; i < n; i ++ )
          v[ i ] = thisMultiplicator * v[ i ] * multiplicator1 * v1[ i ] + multiplicator2 * v2[ i ];
 }
-- 
GitLab