Commit b06297bb authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

Prefetching and unrolling does not help for vector addition according to benchmarks

parent 04e118e9
Loading
Loading
Loading
Loading
+0 −37
Original line number Diff line number Diff line
@@ -520,42 +520,6 @@ addVector( Vector1& y,

   const Index n = y.getSize();

#ifdef OPTIMIZED_VECTOR_HOST_OPERATIONS
#ifdef __GNUC__
   // We need to get the address of the first element to avoid
   // bounds checking in TNL::Array::operator[]
   typedef typename Vector1::RealType Real;   
         Real* Y = y.getData();
   const Real* X = x.getData();
#endif

   Index i = 0;
   const Index unroll_limit = n - n % 4;
#ifdef HAVE_OPENMP
   #pragma omp parallel for \
      if( n > OpenMPVectorOperationsThreshold ) \
      lastprivate(i)
#endif
   for(i = 0; i < unroll_limit; i += 4)
   {
#ifdef __GNUC__
      __builtin_prefetch(&y[ i + PrefetchDistance ], 1, 0);
      __builtin_prefetch(&x[ i + PrefetchDistance ], 0, 0);
#endif
      y[ i ]     = thisMultiplicator * y[ i ]     + alpha * x[ i ];
      y[ i + 1 ] = thisMultiplicator * y[ i + 1 ] + alpha * x[ i + 1 ];
      y[ i + 2 ] = thisMultiplicator * y[ i + 2 ] + alpha * x[ i + 2 ];
      y[ i + 3 ] = thisMultiplicator * y[ i + 3 ] + alpha * x[ i + 3 ];
   }

   while( i < n )
   {
      y[i] = thisMultiplicator * y[ i ] + alpha * x[ i ];
      i++;
   }

#else // OPTIMIZED_VECTOR_HOST_OPERATIONS

   if( thisMultiplicator == 1.0 )
#ifdef HAVE_OPENMP
#pragma omp parallel for if( TNL::Devices::Host::isOMPEnabled() && n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold
@@ -568,7 +532,6 @@ addVector( Vector1& y,
#endif
      for( Index i = 0; i < n; i ++ )
         y[ i ] = thisMultiplicator * y[ i ] + alpha * x[ i ];
#endif // OPTIMIZED_VECTOR_HOST_OPERATIONS
}

template< typename Vector1,