From 5d4a186975bc9080a12e42432ee0ea743076eaa3 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz> Date: Sun, 13 Dec 2015 14:28:05 +0100 Subject: [PATCH] Adding OpenMP to vector operations. --- .../vectors/tnlVectorOperationsHost_impl.h | 89 +++++++++++++------ 1 file changed, 63 insertions(+), 26 deletions(-) diff --git a/src/core/vectors/tnlVectorOperationsHost_impl.h b/src/core/vectors/tnlVectorOperationsHost_impl.h index 67ed621ecf..c508ff9204 100644 --- a/src/core/vectors/tnlVectorOperationsHost_impl.h +++ b/src/core/vectors/tnlVectorOperationsHost_impl.h @@ -99,10 +99,13 @@ getVectorL1Norm( const Vector& v ) typedef typename Vector :: IndexType Index; tnlAssert( v. getSize() > 0, ); - Real result = fabs( v. getElement( 0 ) ); + Real result( 0.0 ); const Index n = v. getSize(); - for( Index i = 1; i < n; i ++ ) - result += fabs( v. getElement( i ) ); +#ifdef HAVE_OPENMP +#pragma omp parallel for reduction(+:result) if( n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold +#endif + for( Index i = 0; i < n; i ++ ) + result += fabs( v[ i ] ); return result; } @@ -114,12 +117,14 @@ getVectorL2Norm( const Vector& v ) typedef typename Vector :: RealType Real; typedef typename Vector :: IndexType Index; tnlAssert( v. getSize() > 0, ); - Real result = v. getElement( 0 ); - result *= result; + Real result( 0.0 ); const Index n = v. getSize(); - for( Index i = 1; i < n; i ++ ) +#ifdef HAVE_OPENMP +#pragma omp parallel for reduction(+:result) if( n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold +#endif + for( Index i = 0; i < n; i ++ ) { - const Real aux = v. getElement( i ); + const Real& aux = v[ i ]; result += aux * aux; } return sqrt( result ); @@ -141,10 +146,13 @@ getVectorLpNorm( const Vector& v, if( p == 2.0 ) return getVectorL2Norm( v ); - Real result = pow( fabs( v. getElement( 0 ) ), p ); + Real result( 0.0 ); const Index n = v. getSize(); - for( Index i = 1; i < n; i ++ ) - result += pow( fabs( v. getElement( i ) ), p ); +#ifdef HAVE_OPENMP +#pragma omp parallel for reduction(+:result) if( n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold +#endif + for( Index i = 0; i < n; i ++ ) + result += pow( fabs( v[ i ] ), p ); return pow( result, 1.0 / p ); } @@ -155,10 +163,13 @@ typename Vector :: RealType tnlVectorOperations< tnlHost > :: getVectorSum( cons typedef typename Vector :: IndexType Index; tnlAssert( v. getSize() > 0, ); - Real result = v. getElement( 0 ); + Real result( 0.0 ); const Index n = v. getSize(); - for( Index i = 1; i < n; i ++ ) - result += v. getElement( i ); +#ifdef HAVE_OPENMP +#pragma omp parallel for reduction(+:result) if( n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold +#endif + for( Index i = 0; i < n; i ++ ) + result += v[ i ]; return result; } @@ -221,10 +232,10 @@ typename Vector1 :: RealType tnlVectorOperations< tnlHost > :: getVectorDifferen tnlAssert( v1. getSize() > 0, ); tnlAssert( v1. getSize() == v2. getSize(), ); - Real result = fabs( v1. getElement( 0 ) - v2. getElement( 0 ) ); + Real result = fabs( v1[ 0 ] - v2[ 0 ] ); const Index n = v1. getSize(); for( Index i = 1; i < n; i ++ ) - result = Min( result, ( Real ) fabs( v1. getElement( i ) - v2. getElement( i ) ) ); + result = Min( result, ( Real ) fabs( v1[ i ] - v2[ i ] ) ); return result; } @@ -240,10 +251,13 @@ getVectorDifferenceL1Norm( const Vector1& v1, tnlAssert( v1. getSize() > 0, ); tnlAssert( v1. getSize() == v2. getSize(), ); - Real result = fabs( v1. getElement( 0 ) - v2. getElement( 0 ) ); + Real result( 0.0 ); const Index n = v1. getSize(); - for( Index i = 1; i < n; i ++ ) - result += fabs( v1. getElement( i ) - v2. getElement( i ) ); +#ifdef HAVE_OPENMP +#pragma omp parallel for reduction(+:result) if( n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold +#endif + for( Index i = 0; i < n; i ++ ) + result += fabs( v1[ i ] - v2[ i ] ); return result; } @@ -259,12 +273,14 @@ getVectorDifferenceL2Norm( const Vector1& v1, tnlAssert( v1. getSize() > 0, ); tnlAssert( v1. getSize() == v2. getSize(), ); - Real result = fabs( v1. getElement( 0 ) - v2. getElement( 0 ) ); - result *= result; + Real result( 0.0 ); const Index n = v1. getSize(); - for( Index i = 1; i < n; i ++ ) +#ifdef HAVE_OPENMP +#pragma omp parallel for reduction(+:result) if( n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold +#endif + for( Index i = 0; i < n; i ++ ) { - Real aux = fabs( v1. getElement( i ) - v2. getElement( i ) ); + Real aux = fabs( v1[ i ] - v2[ i ] ); result += aux * aux; } return sqrt( result ); @@ -291,9 +307,12 @@ getVectorDifferenceLpNorm( const Vector1& v1, if( p == 2.0 ) return getVectorDifferenceL2Norm( v1, v2 ); - Real result = pow( fabs( v1. getElement( 0 ) - v2. getElement( 0 ) ), p ); + Real result( 0.0 ); const Index n = v1. getSize(); - for( Index i = 1; i < n; i ++ ) +#ifdef HAVE_OPENMP +#pragma omp parallel for reduction(+:result) if( n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold +#endif + for( Index i = 0; i < n; i ++ ) result += pow( fabs( v1. getElement( i ) - v2. getElement( i ) ), p ); return pow( result, 1.0 / p ); } @@ -308,9 +327,12 @@ typename Vector1::RealType tnlVectorOperations< tnlHost > :: getVectorDifference tnlAssert( v1. getSize() > 0, ); tnlAssert( v1. getSize() == v2. getSize(), ); - Real result = v1. getElement( 0 ) - v2. getElement( 0 ); + Real result( 0.0 ); const Index n = v1. getSize(); - for( Index i = 1; i < n; i ++ ) +#ifdef HAVE_OPENMP +#pragma omp parallel for reduction(+:result) if( n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold +#endif + for( Index i = 0; i < n; i ++ ) result += v1. getElement( i ) - v2. getElement( i ); return result; } @@ -326,6 +348,9 @@ void tnlVectorOperations< tnlHost > :: vectorScalarMultiplication( Vector& v, tnlAssert( v. getSize() > 0, ); const Index n = v. getSize(); +#ifdef HAVE_OPENMP +#pragma omp parallel for if( n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold +#endif for( Index i = 0; i < n; i ++ ) v[ i ] *= alpha; } @@ -365,9 +390,15 @@ void tnlVectorOperations< tnlHost > :: addVector( Vector1& y, const Index n = y. getSize(); if( thisMultiplicator == 1.0 ) +#ifdef HAVE_OPENMP +#pragma omp parallel for if( n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold +#endif for( Index i = 0; i < n; i ++ ) y[ i ] += alpha * x[ i ]; else +#ifdef HAVE_OPENMP +#pragma omp parallel for if( n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold +#endif for( Index i = 0; i < n; i ++ ) y[ i ] = thisMultiplicator * y[ i ] + alpha * x[ i ]; } @@ -393,9 +424,15 @@ addVectors( Vector1& v, const Index n = v.getSize(); if( thisMultiplicator == 1.0 ) +#ifdef HAVE_OPENMP +#pragma omp parallel for if( n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold +#endif for( Index i = 0; i < n; i ++ ) v[ i ] += multiplicator1 * v1[ i ] + multiplicator2 * v2[ i ]; else +#ifdef HAVE_OPENMP +#pragma omp parallel for if( n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold +#endif for( Index i = 0; i < n; i ++ ) v[ i ] = thisMultiplicator * v[ i ] * multiplicator1 * v1[ i ] + multiplicator2 * v2[ i ]; } -- GitLab