From 3c5e76fd4baf305a92e731e006f824353f3a0a29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com> Date: Wed, 26 Jun 2019 18:40:26 +0200 Subject: [PATCH] Implemented function addAndReduce for expression templates. --- .../Expressions/ExpressionTemplates.h | 54 +++++++++ .../Expressions/StaticExpressionTemplates.h | 48 ++++++++ src/UnitTests/Containers/VectorTest-8.cpp | 11 ++ src/UnitTests/Containers/VectorTest-8.cu | 11 ++ src/UnitTests/Containers/VectorTest-8.h | 111 ++++++++++++++++++ 5 files changed, 235 insertions(+) create mode 100644 src/UnitTests/Containers/VectorTest-8.cpp create mode 100644 src/UnitTests/Containers/VectorTest-8.cu create mode 100644 src/UnitTests/Containers/VectorTest-8.h diff --git a/src/TNL/Containers/Expressions/ExpressionTemplates.h b/src/TNL/Containers/Expressions/ExpressionTemplates.h index f942ceb8cc..8a7863e561 100644 --- a/src/TNL/Containers/Expressions/ExpressionTemplates.h +++ b/src/TNL/Containers/Expressions/ExpressionTemplates.h @@ -2199,6 +2199,60 @@ Result evaluateAndReduce( Vector& lhs, return Containers::Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, volatileReduction, fetch, zero ); } +//// +// Addition and reduction +template< typename Vector, + typename T1, + typename T2, + template< typename, typename > class Operation, + typename Reduction, + typename VolatileReduction, + typename Result > +Result addAndReduce( Vector& lhs, + const Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >& expression, + Reduction& reduction, + VolatileReduction& volatileReduction, + const Result& zero ) +{ + using RealType = typename Vector::RealType; + using IndexType = typename Vector::IndexType; + using DeviceType = typename Vector::DeviceType; + + RealType* lhs_data = lhs.getData(); + auto fetch = [=] __cuda_callable__ ( IndexType i ) -> RealType { + const RealType aux = expression[ i ]; + lhs_data[ i ] += aux; + return aux; + }; + return Containers::Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, volatileReduction, fetch, zero ); +} + +template< typename Vector, + typename T1, + template< typename > class Operation, + typename Reduction, + typename VolatileReduction, + typename Result > +Result addAndReduce( Vector& lhs, + const Containers::Expressions::UnaryExpressionTemplate< T1, Operation >& expression, + Reduction& reduction, + VolatileReduction& volatileReduction, + const Result& zero ) +{ + using RealType = typename Vector::RealType; + using IndexType = typename Vector::IndexType; + using DeviceType = typename Vector::DeviceType; + + RealType* lhs_data = lhs.getData(); + auto fetch = [=] __cuda_callable__ ( IndexType i ) -> RealType { + const RealType aux = expression[ i ]; + lhs_data[ i ] += aux; + return aux; + }; + return Containers::Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, volatileReduction, fetch, zero ); +} + + //// // Output stream template< typename T1, diff --git a/src/TNL/Containers/Expressions/StaticExpressionTemplates.h b/src/TNL/Containers/Expressions/StaticExpressionTemplates.h index df045bbba2..cec0d3ce22 100644 --- a/src/TNL/Containers/Expressions/StaticExpressionTemplates.h +++ b/src/TNL/Containers/Expressions/StaticExpressionTemplates.h @@ -2429,4 +2429,52 @@ Result evaluateAndReduce( Vector& lhs, return result; } +//// +// Addition with reduction +template< typename Vector, + typename T1, + typename T2, + template< typename, typename > class Operation, + typename Reduction, + typename VolatileReduction, + typename Result > +__cuda_callable__ +Result addAndReduce( Vector& lhs, + const Containers::Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >& expression, + Reduction& reduction, + VolatileReduction& volatileReduction, + const Result& zero ) +{ + Result result( zero ); + for( int i = 0; i < Vector::getSize(); i++ ) { + const Result aux = expression[ i ]; + lhs[ i ] += aux; + reduction( result, aux ); + } + return result; +} + +template< typename Vector, + typename T1, + template< typename > class Operation, + typename Reduction, + typename VolatileReduction, + typename Result > +__cuda_callable__ +Result addAndReduce( Vector& lhs, + const Containers::Expressions::StaticUnaryExpressionTemplate< T1, Operation >& expression, + Reduction& reduction, + VolatileReduction& volatileReduction, + const Result& zero ) +{ + Result result( zero ); + for( int i = 0; i < Vector::getSize(); i++ ) { + const Result aux = expression[ i ]; + lhs[ i ] += aux; + reduction( result, aux ); + } + return result; +} + + } // namespace TNL diff --git a/src/UnitTests/Containers/VectorTest-8.cpp b/src/UnitTests/Containers/VectorTest-8.cpp new file mode 100644 index 0000000000..16b6e4a0fc --- /dev/null +++ b/src/UnitTests/Containers/VectorTest-8.cpp @@ -0,0 +1,11 @@ +/*************************************************************************** + VectorTest-8.cpp - description + ------------------- + begin : Jan 26, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include "VectorTest-8.h" diff --git a/src/UnitTests/Containers/VectorTest-8.cu b/src/UnitTests/Containers/VectorTest-8.cu new file mode 100644 index 0000000000..95b77c840c --- /dev/null +++ b/src/UnitTests/Containers/VectorTest-8.cu @@ -0,0 +1,11 @@ +/*************************************************************************** + VectorTest-8.cu - description + ------------------- + begin : Jan 26, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include "VectorTest-8.h" diff --git a/src/UnitTests/Containers/VectorTest-8.h b/src/UnitTests/Containers/VectorTest-8.h new file mode 100644 index 0000000000..7b3e908df3 --- /dev/null +++ b/src/UnitTests/Containers/VectorTest-8.h @@ -0,0 +1,111 @@ +/*************************************************************************** + VectorTest-8.h - description + ------------------- + begin : Jan 26, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +// NOTE: Vector = Array + VectorOperations, so we test Vector and VectorOperations at the same time + +#pragma once + +#ifdef HAVE_GTEST +#include <limits> + +#include <TNL/Experimental/Arithmetics/Quad.h> +#include <TNL/Containers/Vector.h> +#include <TNL/Containers/VectorView.h> +#include "VectorTestSetup.h" + +#include "gtest/gtest.h" + +using namespace TNL; +using namespace TNL::Containers; +using namespace TNL::Containers::Algorithms; +using namespace TNL::Arithmetics; + +// should be small enough to have fast tests, but larger than minGPUReductionDataSize +// and large enough to require multiple CUDA blocks for reduction +constexpr int VECTOR_TEST_SIZE = 500; + +// NOTE: The following lambdas cannot be inside the test because of nvcc ( v. 10.1.105 ) +// error #3049-D: The enclosing parent function ("TestBody") for an extended __host__ __device__ lambda cannot have private or protected access within its class +template< typename VectorView > +typename VectorView::RealType +performEvaluateAndReduce( VectorView& u, VectorView& v, VectorView& w ) +{ + using RealType = typename VectorView::RealType; + + auto reduction = [] __cuda_callable__ ( RealType& a, const RealType& b ) { a += b; }; + auto volatileReduction = [] __cuda_callable__ ( volatile RealType& a, volatile RealType& b ) { a += b; }; + return evaluateAndReduce( w, u * v, reduction, volatileReduction, ( RealType ) 0.0 ); +} + +TYPED_TEST( VectorTest, evaluateAndReduce ) +{ + using VectorType = typename TestFixture::VectorType; + using ViewType = typename TestFixture::ViewType; + using RealType = typename VectorType::RealType; + using IndexType = typename VectorType::IndexType; + const int size = VECTOR_TEST_SIZE; + + VectorType _u( size ), _v( size ), _w( size ); + ViewType u( _u ), v( _v ), w( _w ); + RealType aux( 0.0 ); + for( int i = 0; i < size; i++ ) + { + const RealType x = i; + const RealType y = size / 2 - i; + u.setElement( i, x ); + v.setElement( i, y ); + aux += x * y; + } + auto r = performEvaluateAndReduce( u, v, w ); + EXPECT_TRUE( w == u * v ); + EXPECT_NEAR( aux, r, 1.0e-5 ); +} + +// NOTE: The following lambdas cannot be inside the test because of nvcc ( v. 10.1.105 ) +// error #3049-D: The enclosing parent function ("TestBody") for an extended __host__ __device__ lambda cannot have private or protected access within its class +template< typename VectorView > +typename VectorView::RealType +performAddAndReduce( VectorView& u, VectorView& v, VectorView& w ) +{ + using RealType = typename VectorView::RealType; + + auto reduction = [] __cuda_callable__ ( RealType& a, const RealType& b ) { a += b; }; + auto volatileReduction = [] __cuda_callable__ ( volatile RealType& a, volatile RealType& b ) { a += b; }; + return addAndReduce( w, u * v, reduction, volatileReduction, ( RealType ) 0.0 ); +} + +TYPED_TEST( VectorTest, addAndReduce ) +{ + using VectorType = typename TestFixture::VectorType; + using ViewType = typename TestFixture::ViewType; + using RealType = typename VectorType::RealType; + using IndexType = typename VectorType::IndexType; + const int size = VECTOR_TEST_SIZE; + + VectorType _u( size ), _v( size ), _w( size ); + ViewType u( _u ), v( _v ), w( _w ); + RealType aux( 0.0 ); + for( int i = 0; i < size; i++ ) + { + const RealType x = i; + const RealType y = size / 2 - i; + u.setElement( i, x ); + v.setElement( i, y ); + w.setElement( i, x ); + aux += x * y; + } + auto r = performAddAndReduce( u, v, w ); + EXPECT_TRUE( w == u * v + u ); + EXPECT_NEAR( aux, r, 1.0e-5 ); +} + +#endif // HAVE_GTEST + +#include "../main.h" -- GitLab