From 3c5e76fd4baf305a92e731e006f824353f3a0a29 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Wed, 26 Jun 2019 18:40:26 +0200
Subject: [PATCH] Implemented function addAndReduce for expression templates.

---
 .../Expressions/ExpressionTemplates.h         |  54 +++++++++
 .../Expressions/StaticExpressionTemplates.h   |  48 ++++++++
 src/UnitTests/Containers/VectorTest-8.cpp     |  11 ++
 src/UnitTests/Containers/VectorTest-8.cu      |  11 ++
 src/UnitTests/Containers/VectorTest-8.h       | 111 ++++++++++++++++++
 5 files changed, 235 insertions(+)
 create mode 100644 src/UnitTests/Containers/VectorTest-8.cpp
 create mode 100644 src/UnitTests/Containers/VectorTest-8.cu
 create mode 100644 src/UnitTests/Containers/VectorTest-8.h

diff --git a/src/TNL/Containers/Expressions/ExpressionTemplates.h b/src/TNL/Containers/Expressions/ExpressionTemplates.h
index f942ceb8cc..8a7863e561 100644
--- a/src/TNL/Containers/Expressions/ExpressionTemplates.h
+++ b/src/TNL/Containers/Expressions/ExpressionTemplates.h
@@ -2199,6 +2199,60 @@ Result evaluateAndReduce( Vector& lhs,
    return Containers::Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, volatileReduction, fetch, zero );
 }
 
+////
+// Addition and reduction
+template< typename Vector,
+   typename T1,
+   typename T2,
+   template< typename, typename > class Operation,
+   typename Reduction,
+   typename VolatileReduction,
+   typename Result >
+Result addAndReduce( Vector& lhs,
+   const Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >& expression,
+   Reduction& reduction,
+   VolatileReduction& volatileReduction,
+   const Result& zero )
+{
+   using RealType = typename Vector::RealType;
+   using IndexType = typename Vector::IndexType;
+   using DeviceType = typename Vector::DeviceType;
+
+   RealType* lhs_data = lhs.getData();
+   auto fetch = [=] __cuda_callable__ ( IndexType i ) -> RealType {
+      const RealType aux = expression[ i ];
+      lhs_data[ i ] += aux;
+      return aux;
+   };
+   return Containers::Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, volatileReduction, fetch, zero );
+}
+
+template< typename Vector,
+   typename T1,
+   template< typename > class Operation,
+   typename Reduction,
+   typename VolatileReduction,
+   typename Result >
+Result addAndReduce( Vector& lhs,
+   const Containers::Expressions::UnaryExpressionTemplate< T1, Operation >& expression,
+   Reduction& reduction,
+   VolatileReduction& volatileReduction,
+   const Result& zero )
+{
+   using RealType = typename Vector::RealType;
+   using IndexType = typename Vector::IndexType;
+   using DeviceType = typename Vector::DeviceType;
+
+   RealType* lhs_data = lhs.getData();
+   auto fetch = [=] __cuda_callable__ ( IndexType i ) -> RealType {
+      const RealType aux = expression[ i ];
+      lhs_data[ i ] += aux;
+      return aux;
+   };
+   return Containers::Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, volatileReduction, fetch, zero );
+}
+
+
 ////
 // Output stream
 template< typename T1,
diff --git a/src/TNL/Containers/Expressions/StaticExpressionTemplates.h b/src/TNL/Containers/Expressions/StaticExpressionTemplates.h
index df045bbba2..cec0d3ce22 100644
--- a/src/TNL/Containers/Expressions/StaticExpressionTemplates.h
+++ b/src/TNL/Containers/Expressions/StaticExpressionTemplates.h
@@ -2429,4 +2429,52 @@ Result evaluateAndReduce( Vector& lhs,
    return result;
 }
 
+////
+// Addition with reduction
+template< typename Vector,
+   typename T1,
+   typename T2,
+   template< typename, typename > class Operation,
+   typename Reduction,
+   typename VolatileReduction,
+   typename Result >
+__cuda_callable__
+Result addAndReduce( Vector& lhs,
+   const Containers::Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >& expression,
+   Reduction& reduction,
+   VolatileReduction& volatileReduction,
+   const Result& zero )
+{
+   Result result( zero );
+   for( int i = 0; i < Vector::getSize(); i++ ) {
+      const Result aux = expression[ i ];
+      lhs[ i ] += aux;
+      reduction( result, aux );
+   }
+   return result;
+}
+
+template< typename Vector,
+   typename T1,
+   template< typename > class Operation,
+   typename Reduction,
+   typename VolatileReduction,
+   typename Result >
+__cuda_callable__
+Result addAndReduce( Vector& lhs,
+   const Containers::Expressions::StaticUnaryExpressionTemplate< T1, Operation >& expression,
+   Reduction& reduction,
+   VolatileReduction& volatileReduction,
+   const Result& zero )
+{
+   Result result( zero );
+   for( int i = 0; i < Vector::getSize(); i++ ) {
+      const Result aux = expression[ i ];
+      lhs[ i ] += aux;
+      reduction( result, aux );
+   }
+   return result;
+}
+
+
 } // namespace TNL
diff --git a/src/UnitTests/Containers/VectorTest-8.cpp b/src/UnitTests/Containers/VectorTest-8.cpp
new file mode 100644
index 0000000000..16b6e4a0fc
--- /dev/null
+++ b/src/UnitTests/Containers/VectorTest-8.cpp
@@ -0,0 +1,11 @@
+/***************************************************************************
+                          VectorTest-8.cpp  -  description
+                             -------------------
+    begin                : Jan 26, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include "VectorTest-8.h"
diff --git a/src/UnitTests/Containers/VectorTest-8.cu b/src/UnitTests/Containers/VectorTest-8.cu
new file mode 100644
index 0000000000..95b77c840c
--- /dev/null
+++ b/src/UnitTests/Containers/VectorTest-8.cu
@@ -0,0 +1,11 @@
+/***************************************************************************
+                          VectorTest-8.cu  -  description
+                             -------------------
+    begin                : Jan 26, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include "VectorTest-8.h"
diff --git a/src/UnitTests/Containers/VectorTest-8.h b/src/UnitTests/Containers/VectorTest-8.h
new file mode 100644
index 0000000000..7b3e908df3
--- /dev/null
+++ b/src/UnitTests/Containers/VectorTest-8.h
@@ -0,0 +1,111 @@
+/***************************************************************************
+                          VectorTest-8.h  -  description
+                             -------------------
+    begin                : Jan 26, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+// NOTE: Vector = Array + VectorOperations, so we test Vector and VectorOperations at the same time
+
+#pragma once
+
+#ifdef HAVE_GTEST
+#include <limits>
+
+#include <TNL/Experimental/Arithmetics/Quad.h>
+#include <TNL/Containers/Vector.h>
+#include <TNL/Containers/VectorView.h>
+#include "VectorTestSetup.h"
+
+#include "gtest/gtest.h"
+
+using namespace TNL;
+using namespace TNL::Containers;
+using namespace TNL::Containers::Algorithms;
+using namespace TNL::Arithmetics;
+
+// should be small enough to have fast tests, but larger than minGPUReductionDataSize
+// and large enough to require multiple CUDA blocks for reduction
+constexpr int VECTOR_TEST_SIZE = 500;
+
+// NOTE: The following lambdas cannot be inside the test because of nvcc ( v. 10.1.105 )
+// error #3049-D: The enclosing parent function ("TestBody") for an extended __host__ __device__ lambda cannot have private or protected access within its class
+template< typename VectorView >
+typename VectorView::RealType
+performEvaluateAndReduce( VectorView& u, VectorView& v, VectorView& w )
+{
+   using RealType = typename VectorView::RealType;
+
+   auto reduction = [] __cuda_callable__ ( RealType& a, const RealType& b ) { a += b; };
+   auto volatileReduction = [] __cuda_callable__ ( volatile RealType& a, volatile RealType& b ) { a += b; };
+   return evaluateAndReduce( w, u * v, reduction, volatileReduction, ( RealType ) 0.0 );
+}
+
+TYPED_TEST( VectorTest, evaluateAndReduce )
+{
+   using VectorType = typename TestFixture::VectorType;
+   using ViewType = typename TestFixture::ViewType;
+   using RealType = typename VectorType::RealType;
+   using IndexType = typename VectorType::IndexType;
+   const int size = VECTOR_TEST_SIZE;
+
+   VectorType _u( size ), _v( size ), _w( size );
+   ViewType u( _u ), v( _v ), w( _w );
+   RealType aux( 0.0 );
+   for( int i = 0; i < size; i++ )
+   {
+      const RealType x = i;
+      const RealType y = size / 2 - i;
+      u.setElement( i, x );
+      v.setElement( i, y );
+      aux += x * y;
+   }
+   auto r = performEvaluateAndReduce( u, v, w );
+   EXPECT_TRUE( w == u * v );
+   EXPECT_NEAR( aux, r, 1.0e-5 );
+}
+
+// NOTE: The following lambdas cannot be inside the test because of nvcc ( v. 10.1.105 )
+// error #3049-D: The enclosing parent function ("TestBody") for an extended __host__ __device__ lambda cannot have private or protected access within its class
+template< typename VectorView >
+typename VectorView::RealType
+performAddAndReduce( VectorView& u, VectorView& v, VectorView& w )
+{
+   using RealType = typename VectorView::RealType;
+
+   auto reduction = [] __cuda_callable__ ( RealType& a, const RealType& b ) { a += b; };
+   auto volatileReduction = [] __cuda_callable__ ( volatile RealType& a, volatile RealType& b ) { a += b; };
+   return addAndReduce( w, u * v, reduction, volatileReduction, ( RealType ) 0.0 );
+}
+
+TYPED_TEST( VectorTest, addAndReduce )
+{
+   using VectorType = typename TestFixture::VectorType;
+   using ViewType = typename TestFixture::ViewType;
+   using RealType = typename VectorType::RealType;
+   using IndexType = typename VectorType::IndexType;
+   const int size = VECTOR_TEST_SIZE;
+
+   VectorType _u( size ), _v( size ), _w( size );
+   ViewType u( _u ), v( _v ), w( _w );
+   RealType aux( 0.0 );
+   for( int i = 0; i < size; i++ )
+   {
+      const RealType x = i;
+      const RealType y = size / 2 - i;
+      u.setElement( i, x );
+      v.setElement( i, y );
+      w.setElement( i, x );
+      aux += x * y;
+   }
+   auto r = performAddAndReduce( u, v, w );
+   EXPECT_TRUE( w == u * v + u );
+   EXPECT_NEAR( aux, r, 1.0e-5 );
+}
+
+#endif // HAVE_GTEST
+
+#include "../main.h"
-- 
GitLab