From d712846b164965fbd9a2a57a87a116fe3e7aae2d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Tue, 7 Jul 2020 18:29:12 +0200
Subject: [PATCH 01/15] Cleaned up redundant code in ndarray tests

---
 .../DistributedNDArrayOverlaps_1D_test.h      | 60 +------------------
 .../DistributedNDArrayOverlaps_semi1D_test.h  | 60 +------------------
 .../ndarray/DistributedNDArray_1D_test.h      | 60 +------------------
 .../ndarray/DistributedNDArray_semi1D_test.h  | 60 +------------------
 .../Containers/ndarray/NDArrayTest.h          | 14 ++---
 .../Containers/ndarray/NDSubarrayTest.cpp     | 14 ++---
 .../Containers/ndarray/SlicedNDArrayTest.cpp  | 14 ++---
 .../ndarray/StaticNDArrayCudaTest.cu          | 14 ++---
 .../Containers/ndarray/StaticNDArrayTest.cpp  | 14 ++---
 9 files changed, 24 insertions(+), 286 deletions(-)
diff --git a/src/UnitTests/Containers/ndarray/DistributedNDArrayOverlaps_1D_test.h b/src/UnitTests/Containers/ndarray/DistributedNDArrayOverlaps_1D_test.h
index a7609ee74..7377cbff2 100644
--- a/src/UnitTests/Containers/ndarray/DistributedNDArrayOverlaps_1D_test.h
+++ b/src/UnitTests/Containers/ndarray/DistributedNDArrayOverlaps_1D_test.h
@@ -308,62 +308,4 @@ TYPED_TEST( DistributedNDArrayOverlaps_1D_test, synchronize )
 #endif  // HAVE_GTEST
 
 
-#if (defined(HAVE_GTEST) && defined(HAVE_MPI))
-using CommunicatorType = Communicators::MpiCommunicator;
-
-#include <sstream>
-
-class MinimalistBufferedPrinter
-: public ::testing::EmptyTestEventListener
-{
-private:
-   std::stringstream sout;
-
-public:
-   // Called before a test starts.
-   virtual void OnTestStart(const ::testing::TestInfo& test_info)
-   {
-      sout << test_info.test_case_name() << "." << test_info.name() << " Start." << std::endl;
-   }
-
-   // Called after a failed assertion or a SUCCEED() invocation.
-   virtual void OnTestPartResult(const ::testing::TestPartResult& test_part_result)
-   {
-      sout << (test_part_result.failed() ? "====Failure=== " : "===Success=== ")
-           << test_part_result.file_name() << " "
-           << test_part_result.line_number() <<std::endl
-           << test_part_result.summary() <<std::endl;
-   }
-
-   // Called after a test ends.
-   virtual void OnTestEnd(const ::testing::TestInfo& test_info)
-   {
-      const int rank = CommunicatorType::GetRank(CommunicatorType::AllGroup);
-      sout << test_info.test_case_name() << "." << test_info.name() << " End." <<std::endl;
-      std::cout << rank << ":" << std::endl << sout.str()<< std::endl;
-      sout.str( std::string() );
-      sout.clear();
-   }
-};
-#endif
-
-#include "../../GtestMissingError.h"
-int main( int argc, char* argv[] )
-{
-#ifdef HAVE_GTEST
-   ::testing::InitGoogleTest( &argc, argv );
-
-   #ifdef HAVE_MPI
-      ::testing::TestEventListeners& listeners =
-         ::testing::UnitTest::GetInstance()->listeners();
-
-      delete listeners.Release(listeners.default_result_printer());
-      listeners.Append(new MinimalistBufferedPrinter);
-
-      Communicators::ScopedInitializer< CommunicatorType > mpi(argc, argv);
-   #endif
-   return RUN_ALL_TESTS();
-#else
-   throw GtestMissingError();
-#endif
-}
+#include "../../main_mpi.h"
diff --git a/src/UnitTests/Containers/ndarray/DistributedNDArrayOverlaps_semi1D_test.h b/src/UnitTests/Containers/ndarray/DistributedNDArrayOverlaps_semi1D_test.h
index a7f28ead5..f1ac970eb 100644
--- a/src/UnitTests/Containers/ndarray/DistributedNDArrayOverlaps_semi1D_test.h
+++ b/src/UnitTests/Containers/ndarray/DistributedNDArrayOverlaps_semi1D_test.h
@@ -348,62 +348,4 @@ void test_helper_synchronize( DistributedArray& a, const int rank, const int npr
 #endif  // HAVE_GTEST
 
 
-#if (defined(HAVE_GTEST) && defined(HAVE_MPI))
-using CommunicatorType = Communicators::MpiCommunicator;
-
-#include <sstream>
-
-class MinimalistBufferedPrinter
-: public ::testing::EmptyTestEventListener
-{
-private:
-   std::stringstream sout;
-
-public:
-   // Called before a test starts.
-   virtual void OnTestStart(const ::testing::TestInfo& test_info)
-   {
-      sout << test_info.test_case_name() << "." << test_info.name() << " Start." << std::endl;
-   }
-
-   // Called after a failed assertion or a SUCCEED() invocation.
-   virtual void OnTestPartResult(const ::testing::TestPartResult& test_part_result)
-   {
-      sout << (test_part_result.failed() ? "====Failure=== " : "===Success=== ")
-           << test_part_result.file_name() << " "
-           << test_part_result.line_number() <<std::endl
-           << test_part_result.summary() <<std::endl;
-   }
-
-   // Called after a test ends.
-   virtual void OnTestEnd(const ::testing::TestInfo& test_info)
-   {
-      const int rank = CommunicatorType::GetRank(CommunicatorType::AllGroup);
-      sout << test_info.test_case_name() << "." << test_info.name() << " End." <<std::endl;
-      std::cout << rank << ":" << std::endl << sout.str()<< std::endl;
-      sout.str( std::string() );
-      sout.clear();
-   }
-};
-#endif
-
-#include "../../GtestMissingError.h"
-int main( int argc, char* argv[] )
-{
-#ifdef HAVE_GTEST
-   ::testing::InitGoogleTest( &argc, argv );
-
-   #ifdef HAVE_MPI
-      ::testing::TestEventListeners& listeners =
-         ::testing::UnitTest::GetInstance()->listeners();
-
-      delete listeners.Release(listeners.default_result_printer());
-      listeners.Append(new MinimalistBufferedPrinter);
-
-      Communicators::ScopedInitializer< CommunicatorType > mpi(argc, argv);
-   #endif
-   return RUN_ALL_TESTS();
-#else
-   throw GtestMissingError();
-#endif
-}
+#include "../../main_mpi.h"
diff --git a/src/UnitTests/Containers/ndarray/DistributedNDArray_1D_test.h b/src/UnitTests/Containers/ndarray/DistributedNDArray_1D_test.h
index 04afb91a4..a8d3bcdab 100644
--- a/src/UnitTests/Containers/ndarray/DistributedNDArray_1D_test.h
+++ b/src/UnitTests/Containers/ndarray/DistributedNDArray_1D_test.h
@@ -495,62 +495,4 @@ TYPED_TEST( DistributedNDArray_1D_test, forOverlaps )
 #endif  // HAVE_GTEST
 
 
-#if (defined(HAVE_GTEST) && defined(HAVE_MPI))
-using CommunicatorType = Communicators::MpiCommunicator;
-
-#include <sstream>
-
-class MinimalistBufferedPrinter
-: public ::testing::EmptyTestEventListener
-{
-private:
-   std::stringstream sout;
-
-public:
-   // Called before a test starts.
-   virtual void OnTestStart(const ::testing::TestInfo& test_info)
-   {
-      sout << test_info.test_case_name() << "." << test_info.name() << " Start." << std::endl;
-   }
-
-   // Called after a failed assertion or a SUCCEED() invocation.
-   virtual void OnTestPartResult(const ::testing::TestPartResult& test_part_result)
-   {
-      sout << (test_part_result.failed() ? "====Failure=== " : "===Success=== ")
-           << test_part_result.file_name() << " "
-           << test_part_result.line_number() <<std::endl
-           << test_part_result.summary() <<std::endl;
-   }
-
-   // Called after a test ends.
-   virtual void OnTestEnd(const ::testing::TestInfo& test_info)
-   {
-      const int rank = CommunicatorType::GetRank(CommunicatorType::AllGroup);
-      sout << test_info.test_case_name() << "." << test_info.name() << " End." <<std::endl;
-      std::cout << rank << ":" << std::endl << sout.str()<< std::endl;
-      sout.str( std::string() );
-      sout.clear();
-   }
-};
-#endif
-
-#include "../../GtestMissingError.h"
-int main( int argc, char* argv[] )
-{
-#ifdef HAVE_GTEST
-   ::testing::InitGoogleTest( &argc, argv );
-
-   #ifdef HAVE_MPI
-      ::testing::TestEventListeners& listeners =
-         ::testing::UnitTest::GetInstance()->listeners();
-
-      delete listeners.Release(listeners.default_result_printer());
-      listeners.Append(new MinimalistBufferedPrinter);
-
-      Communicators::ScopedInitializer< CommunicatorType > mpi(argc, argv);
-   #endif
-   return RUN_ALL_TESTS();
-#else
-   throw GtestMissingError();
-#endif
-}
+#include "../../main_mpi.h"
diff --git a/src/UnitTests/Containers/ndarray/DistributedNDArray_semi1D_test.h b/src/UnitTests/Containers/ndarray/DistributedNDArray_semi1D_test.h
index 17108509d..6f777c215 100644
--- a/src/UnitTests/Containers/ndarray/DistributedNDArray_semi1D_test.h
+++ b/src/UnitTests/Containers/ndarray/DistributedNDArray_semi1D_test.h
@@ -480,62 +480,4 @@ TYPED_TEST( DistributedNDArray_semi1D_test, forOverlaps )
 #endif  // HAVE_GTEST
 
 
-#if (defined(HAVE_GTEST) && defined(HAVE_MPI))
-using CommunicatorType = Communicators::MpiCommunicator;
-
-#include <sstream>
-
-class MinimalistBufferedPrinter
-: public ::testing::EmptyTestEventListener
-{
-private:
-   std::stringstream sout;
-
-public:
-   // Called before a test starts.
-   virtual void OnTestStart(const ::testing::TestInfo& test_info)
-   {
-      sout << test_info.test_case_name() << "." << test_info.name() << " Start." << std::endl;
-   }
-
-   // Called after a failed assertion or a SUCCEED() invocation.
-   virtual void OnTestPartResult(const ::testing::TestPartResult& test_part_result)
-   {
-      sout << (test_part_result.failed() ? "====Failure=== " : "===Success=== ")
-           << test_part_result.file_name() << " "
-           << test_part_result.line_number() <<std::endl
-           << test_part_result.summary() <<std::endl;
-   }
-
-   // Called after a test ends.
-   virtual void OnTestEnd(const ::testing::TestInfo& test_info)
-   {
-      const int rank = CommunicatorType::GetRank(CommunicatorType::AllGroup);
-      sout << test_info.test_case_name() << "." << test_info.name() << " End." <<std::endl;
-      std::cout << rank << ":" << std::endl << sout.str()<< std::endl;
-      sout.str( std::string() );
-      sout.clear();
-   }
-};
-#endif
-
-#include "../../GtestMissingError.h"
-int main( int argc, char* argv[] )
-{
-#ifdef HAVE_GTEST
-   ::testing::InitGoogleTest( &argc, argv );
-
-   #ifdef HAVE_MPI
-      ::testing::TestEventListeners& listeners =
-         ::testing::UnitTest::GetInstance()->listeners();
-
-      delete listeners.Release(listeners.default_result_printer());
-      listeners.Append(new MinimalistBufferedPrinter);
-
-      Communicators::ScopedInitializer< CommunicatorType > mpi(argc, argv);
-   #endif
-   return RUN_ALL_TESTS();
-#else
-   throw GtestMissingError();
-#endif
-}
+#include "../../main_mpi.h"
diff --git a/src/UnitTests/Containers/ndarray/NDArrayTest.h b/src/UnitTests/Containers/ndarray/NDArrayTest.h
index 1e5d9a30c..3d2c85888 100644
--- a/src/UnitTests/Containers/ndarray/NDArrayTest.h
+++ b/src/UnitTests/Containers/ndarray/NDArrayTest.h
@@ -1,3 +1,4 @@
+#ifdef HAVE_GTEST
 #include "gtest/gtest.h"
 
 #include <TNL/Containers/NDArray.h>
@@ -1325,14 +1326,7 @@ TEST( NDArrayTest, forBoundary_static_6D )
                << "i = " << i << ", j = " << j << ", k = " << k << ", l = " << l << ", m = " << m << ", n = " << n;
     }
 }
+#endif // HAVE_GTEST
 
-//#include "GtestMissingError.h"
-int main( int argc, char* argv[] )
-{
-//#ifdef HAVE_GTEST
-   ::testing::InitGoogleTest( &argc, argv );
-   return RUN_ALL_TESTS();
-//#else
-//   throw GtestMissingError();
-//#endif
-}
+
+#include "../../main.h"
diff --git a/src/UnitTests/Containers/ndarray/NDSubarrayTest.cpp b/src/UnitTests/Containers/ndarray/NDSubarrayTest.cpp
index 1b57eed28..f4f44003f 100644
--- a/src/UnitTests/Containers/ndarray/NDSubarrayTest.cpp
+++ b/src/UnitTests/Containers/ndarray/NDSubarrayTest.cpp
@@ -1,3 +1,4 @@
+#ifdef HAVE_GTEST
 #include "gtest/gtest.h"
 
 #include <TNL/Containers/NDArray.h>
@@ -392,14 +393,7 @@ TEST( NDArraySubarrayTest, Dynamic_6D )
     }
     a.setValue( 0 );
 }
+#endif // HAVE_GTEST
 
-//#include "GtestMissingError.h"
-int main( int argc, char* argv[] )
-{
-//#ifdef HAVE_GTEST
-   ::testing::InitGoogleTest( &argc, argv );
-   return RUN_ALL_TESTS();
-//#else
-//   throw GtestMissingError();
-//#endif
-}
+
+#include "../../main.h"
diff --git a/src/UnitTests/Containers/ndarray/SlicedNDArrayTest.cpp b/src/UnitTests/Containers/ndarray/SlicedNDArrayTest.cpp
index 8574a5602..eda47a615 100644
--- a/src/UnitTests/Containers/ndarray/SlicedNDArrayTest.cpp
+++ b/src/UnitTests/Containers/ndarray/SlicedNDArrayTest.cpp
@@ -1,3 +1,4 @@
+#ifdef HAVE_GTEST
 #include "gtest/gtest.h"
 
 #include <TNL/Containers/NDArray.h>
@@ -238,14 +239,7 @@ TEST( SlicedNDArrayTest, CopySemantics )
     EXPECT_EQ( a.getConstView(), c.getConstView() );
     EXPECT_EQ( a.getConstView(), c_view.getConstView() );
 }
+#endif // HAVE_GTEST
 
-//#include "GtestMissingError.h"
-int main( int argc, char* argv[] )
-{
-//#ifdef HAVE_GTEST
-   ::testing::InitGoogleTest( &argc, argv );
-   return RUN_ALL_TESTS();
-//#else
-//   throw GtestMissingError();
-//#endif
-}
+
+#include "../../main.h"
diff --git a/src/UnitTests/Containers/ndarray/StaticNDArrayCudaTest.cu b/src/UnitTests/Containers/ndarray/StaticNDArrayCudaTest.cu
index 5a0561955..f2c83ba16 100644
--- a/src/UnitTests/Containers/ndarray/StaticNDArrayCudaTest.cu
+++ b/src/UnitTests/Containers/ndarray/StaticNDArrayCudaTest.cu
@@ -1,3 +1,4 @@
+#ifdef HAVE_GTEST
 #include "gtest/gtest.h"
 
 #include <TNL/Containers/NDArray.h>
@@ -76,14 +77,7 @@ TEST( StaticNDArrayCudaTest, CopyFromArray )
 {
     __test_CopyFromArray();
 }
+#endif // HAVE_GTEST
 
-//#include "GtestMissingError.h"
-int main( int argc, char* argv[] )
-{
-//#ifdef HAVE_GTEST
-   ::testing::InitGoogleTest( &argc, argv );
-   return RUN_ALL_TESTS();
-//#else
-//   throw GtestMissingError();
-//#endif
-}
+
+#include "../../main.h"
diff --git a/src/UnitTests/Containers/ndarray/StaticNDArrayTest.cpp b/src/UnitTests/Containers/ndarray/StaticNDArrayTest.cpp
index e3ea290f2..23ffd8066 100644
--- a/src/UnitTests/Containers/ndarray/StaticNDArrayTest.cpp
+++ b/src/UnitTests/Containers/ndarray/StaticNDArrayTest.cpp
@@ -1,3 +1,4 @@
+#ifdef HAVE_GTEST
 #include "gtest/gtest.h"
 
 #include <TNL/Containers/NDArray.h>
@@ -92,14 +93,7 @@ TEST( StaticNDArrayTest, CopySemantics )
     EXPECT_EQ( a.getConstView(), c.getConstView() );
     EXPECT_EQ( a.getConstView(), c_view.getConstView() );
 }
+#endif // HAVE_GTEST
 
-//#include "GtestMissingError.h"
-int main( int argc, char* argv[] )
-{
-//#ifdef HAVE_GTEST
-   ::testing::InitGoogleTest( &argc, argv );
-   return RUN_ALL_TESTS();
-//#else
-//   throw GtestMissingError();
-//#endif
-}
+
+#include "../../main.h"
-- 
GitLab


From a50188ff1b3fdad6809c201e67f9210662dc882e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Tue, 7 Jul 2020 23:53:36 +0200
Subject: [PATCH 02/15] Tests: added overloads for the GTest's PrintTo function
 to force the use of our operator<< for vectors

---
 src/UnitTests/GtestPrintToOverrides.h | 35 +++++++++++++++++++++++++++
 src/UnitTests/main.h                  |  1 +
 src/UnitTests/main_mpi.h              |  1 +
 3 files changed, 37 insertions(+)
 create mode 100644 src/UnitTests/GtestPrintToOverrides.h

diff --git a/src/UnitTests/GtestPrintToOverrides.h b/src/UnitTests/GtestPrintToOverrides.h
new file mode 100644
index 000000000..ed545890e
--- /dev/null
+++ b/src/UnitTests/GtestPrintToOverrides.h
@@ -0,0 +1,35 @@
+#pragma once
+
+// Overrides due to GTest's fuckup...
+// https://stackoverflow.com/a/25265174
+
+#include <TNL/Containers/Vector.h>
+#include <TNL/Containers/VectorView.h>
+#include <TNL/Containers/StaticVector.h>
+
+namespace TNL {
+namespace Containers {
+
+template< typename Value, typename Device, typename Index, typename Allocator >
+void PrintTo( const Vector< Value, Device, Index, Allocator >& vec,
+              std::ostream *str )
+{
+   *str << vec;
+}
+
+template< typename Value, typename Device, typename Index >
+void PrintTo( const VectorView< Value, Device, Index >& vec,
+              std::ostream *str )
+{
+   *str << vec;
+}
+
+template< int Size, typename Value >
+void PrintTo( const StaticVector< Size, Value >& vec,
+              std::ostream *str )
+{
+   *str << vec;
+}
+
+} // namespace Containers
+} // namespace TNL
diff --git a/src/UnitTests/main.h b/src/UnitTests/main.h
index 87da37fa8..00cb11be6 100644
--- a/src/UnitTests/main.h
+++ b/src/UnitTests/main.h
@@ -1,5 +1,6 @@
 #ifdef HAVE_GTEST
 #include <gtest/gtest.h>
+#include "GtestPrintToOverrides.h"
 #else
 #include "GtestMissingError.h"
 #endif
diff --git a/src/UnitTests/main_mpi.h b/src/UnitTests/main_mpi.h
index 3c7c39a86..9fe75c850 100644
--- a/src/UnitTests/main_mpi.h
+++ b/src/UnitTests/main_mpi.h
@@ -1,5 +1,6 @@
 #ifdef HAVE_GTEST
 #include <gtest/gtest.h>
+#include "GtestPrintToOverrides.h"
 #else
 #include "GtestMissingError.h"
 #endif
-- 
GitLab


From c53284ebe77d4d046e922bbb83a1f5c300e524d6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Tue, 7 Jul 2020 11:17:21 +0200
Subject: [PATCH 03/15] CMakeLists.txt: enabled address and undefined
 sanitizers for Debug builds

---
 CMakeLists.txt | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index c9e570042..27451ef82 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -127,6 +127,16 @@ if( CMAKE_CXX_COMPILER_ID STREQUAL "Clang" )
    set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-self-assign-overloaded" )
 endif()
 
+# enable address sanitizer (does not work with MPI due to many false positives, does not work with nvcc at all)
+if( CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang" )
+   if( NOT ${WITH_MPI} AND NOT ${WITH_CUDA} )
+      set( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fsanitize=address -fsanitize=undefined -fno-omit-frame-pointer" )
+      set( CMAKE_SHARED_LIBRARY_LINK_C_FLAGS_DEBUG "${CMAKE_SHARED_LIBRARY_LINK_C_FLAGS_DEBUG} -fsanitize=address -fsanitize=undefined" )
+      set( CMAKE_EXE_LINKER_FLAGS_DEBUG "${CMAKE_EXE_LINKER_FLAGS_DEBUG} -fsanitize=address -fsanitize=undefined" )
+      set( CMAKE_SHARED_LINKER_FLAGS_DEBUG "${CMAKE_SHARED_LINKER_FLAGS_DEBUG} -fsanitize=address -fsanitize=undefined" )
+   endif()
+endif()
+
 # enable link time optimizations (but not in continuous integration)
 if( NOT DEFINED ENV{CI_JOB_NAME} )
    if( CMAKE_CXX_COMPILER_ID STREQUAL "GNU" )
-- 
GitLab


From f1d3d92a9b1c1e4ddf8508a55048586af0ba4a26 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Mon, 6 Jul 2020 16:59:11 +0200
Subject: [PATCH 04/15] Added OperandMemberType to avoid storing
 const-reference to temporary objects created in subexpressions

---
 .../Expressions/HorizontalOperations.h        |  2 +-
 .../Expressions/StaticExpressionTemplates.h   | 14 ++++-----
 src/TNL/Containers/Expressions/TypeTraits.h   | 31 +++++++++++++++++--
 3 files changed, 36 insertions(+), 11 deletions(-)

diff --git a/src/TNL/Containers/Expressions/HorizontalOperations.h b/src/TNL/Containers/Expressions/HorizontalOperations.h
index cb49f7d47..614f2c878 100644
--- a/src/TNL/Containers/Expressions/HorizontalOperations.h
+++ b/src/TNL/Containers/Expressions/HorizontalOperations.h
@@ -60,7 +60,7 @@ struct Min
 {
    template< typename T1, typename T2 >
    __cuda_callable__
-   static auto evaluate( const T1& a, const T2& b ) -> decltype( min( a , b ) )
+   static auto evaluate( const T1& a, const T2& b ) -> decltype( min( a, b ) )
    {
       return min( a, b );
    }
diff --git a/src/TNL/Containers/Expressions/StaticExpressionTemplates.h b/src/TNL/Containers/Expressions/StaticExpressionTemplates.h
index 89e13d7f1..9ae232a06 100644
--- a/src/TNL/Containers/Expressions/StaticExpressionTemplates.h
+++ b/src/TNL/Containers/Expressions/StaticExpressionTemplates.h
@@ -103,8 +103,8 @@ struct StaticBinaryExpressionTemplate< T1, T2, Operation, VectorExpressionVariab
    }
 
 protected:
-   const T1& op1;
-   const T2& op2;
+   typename OperandMemberType< T1 >::type op1;
+   typename OperandMemberType< T2 >::type op2;
 };
 
 template< typename T1,
@@ -151,8 +151,8 @@ struct StaticBinaryExpressionTemplate< T1, T2, Operation, VectorExpressionVariab
    }
 
 protected:
-   const T1& op1;
-   const T2 op2;
+   typename OperandMemberType< T1 >::type op1;
+   typename OperandMemberType< T2 >::type op2;
 };
 
 template< typename T1,
@@ -199,8 +199,8 @@ struct StaticBinaryExpressionTemplate< T1, T2, Operation, ArithmeticVariable, Ve
    }
 
 protected:
-   const T1 op1;
-   const T2& op2;
+   typename OperandMemberType< T1 >::type op1;
+   typename OperandMemberType< T2 >::type op2;
 };
 
 ////
@@ -248,7 +248,7 @@ struct StaticUnaryExpressionTemplate
    }
 
 protected:
-   const T1& operand;
+   typename OperandMemberType< T1 >::type operand;
 };
 
 #ifndef DOXYGEN_ONLY
diff --git a/src/TNL/Containers/Expressions/TypeTraits.h b/src/TNL/Containers/Expressions/TypeTraits.h
index 1024bb465..9a0570345 100644
--- a/src/TNL/Containers/Expressions/TypeTraits.h
+++ b/src/TNL/Containers/Expressions/TypeTraits.h
@@ -103,22 +103,47 @@ struct IsArithmeticSubtype< T, V, false >
 
 
 // helper trait class (used in unit tests)
-template< typename R, bool enabled = ! HasEnabledStaticExpressionTemplates< R >::value >
+template<class T, class R = void>
+struct enable_if_type { typedef R type; };
+
+template< typename R, typename Enable = void >
 struct RemoveExpressionTemplate
 {
    using type = R;
 };
 
 template< typename R >
-struct RemoveExpressionTemplate< R, false >
+struct RemoveExpressionTemplate< R, typename enable_if_type< typename R::VectorOperandType >::type >
 {
-//   using type = StaticVector< R::getSize(), typename RemoveExpressionTemplate< typename R::RealType >::type >;
    using type = typename RemoveExpressionTemplate< typename R::VectorOperandType >::type;
 };
 
 template< typename R >
 using RemoveET = typename RemoveExpressionTemplate< R >::type;
 
+// helper trait class for Static*ExpressionTemplates classes
+template< typename R, typename Enable = void >
+struct OperandMemberType
+{
+   using type = std::conditional_t< std::is_fundamental< R >::value,
+                     // non-reference for fundamental types
+                     std::add_const_t< std::remove_reference_t< R > >,
+                     // lvalue-reference for other types (especially StaticVector)
+                     std::add_lvalue_reference_t< std::add_const_t< R > >
+                  >;
+//   using type = std::add_const_t< std::remove_reference_t< R > >;
+};
+
+// assuming that only the StaticBinaryExpressionTemplate and StaticUnaryTemplate classes have a VectorOperandType type member
+template< typename R >
+struct OperandMemberType< R, typename enable_if_type< typename R::VectorOperandType >::type >
+{
+   // non-reference for StaticBinaryExpressionTemplate and StaticUnaryExpressionTemplate
+   // (otherwise we would get segfaults - binding const-reference to temporary Static*ExpressionTemplate
+   // objects does not work as expected...)
+   using type = std::add_const_t< std::remove_reference_t< R > >;
+};
+
 } // namespace Expressions
 } // namespace Containers
 } // namespace TNL
-- 
GitLab


From 43597c11c81552075b50b755d3dc36a2164de901 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Mon, 6 Jul 2020 16:59:49 +0200
Subject: [PATCH 05/15] Wrapped the result type of vertical operations with
 RemoveET

---
 .../Expressions/VerticalOperations.h          | 49 +++++++++++--------
 1 file changed, 29 insertions(+), 20 deletions(-)

diff --git a/src/TNL/Containers/Expressions/VerticalOperations.h b/src/TNL/Containers/Expressions/VerticalOperations.h
index 63b339cb8..6e429414e 100644
--- a/src/TNL/Containers/Expressions/VerticalOperations.h
+++ b/src/TNL/Containers/Expressions/VerticalOperations.h
@@ -14,6 +14,7 @@
 #include <type_traits>
 
 #include <TNL/Algorithms/Reduction.h>
+#include <TNL/Containers/Expressions/TypeTraits.h>
 
 ////
 // By vertical operations we mean those applied across vector elements or
@@ -26,9 +27,10 @@ namespace Expressions {
 ////
 // Vertical operations
 template< typename Expression >
-auto ExpressionMin( const Expression& expression ) -> std::decay_t< decltype( expression[0] ) >
+auto ExpressionMin( const Expression& expression )
+-> RemoveET< std::decay_t< decltype( expression[0] ) > >
 {
-   using ResultType = std::decay_t< decltype( expression[0] ) >;
+   using ResultType = RemoveET< std::decay_t< decltype( expression[0] ) > >;
    using IndexType = typename Expression::IndexType;
 
    const auto view = expression.getConstView();
@@ -39,9 +41,9 @@ auto ExpressionMin( const Expression& expression ) -> std::decay_t< decltype( ex
 
 template< typename Expression >
 auto ExpressionArgMin( const Expression& expression )
--> std::pair< std::decay_t< decltype( expression[0] ) >, typename Expression::IndexType >
+-> RemoveET< std::pair< std::decay_t< decltype( expression[0] ) >, typename Expression::IndexType > >
 {
-   using ResultType = std::decay_t< decltype( expression[0] ) >;
+   using ResultType = RemoveET< std::decay_t< decltype( expression[0] ) > >;
    using IndexType = typename Expression::IndexType;
 
    const auto view = expression.getConstView();
@@ -58,9 +60,10 @@ auto ExpressionArgMin( const Expression& expression )
 }
 
 template< typename Expression >
-auto ExpressionMax( const Expression& expression ) -> std::decay_t< decltype( expression[0] ) >
+auto ExpressionMax( const Expression& expression )
+-> RemoveET< std::decay_t< decltype( expression[0] ) > >
 {
-   using ResultType = std::decay_t< decltype( expression[0] ) >;
+   using ResultType = RemoveET< std::decay_t< decltype( expression[0] ) > >;
    using IndexType = typename Expression::IndexType;
 
    const auto view = expression.getConstView();
@@ -71,9 +74,9 @@ auto ExpressionMax( const Expression& expression ) -> std::decay_t< decltype( ex
 
 template< typename Expression >
 auto ExpressionArgMax( const Expression& expression )
--> std::pair< std::decay_t< decltype( expression[0] ) >, typename Expression::IndexType >
+-> RemoveET< std::pair< std::decay_t< decltype( expression[0] ) >, typename Expression::IndexType > >
 {
-   using ResultType = std::decay_t< decltype( expression[0] ) >;
+   using ResultType = RemoveET< std::decay_t< decltype( expression[0] ) > >;
    using IndexType = typename Expression::IndexType;
 
    const auto view = expression.getConstView();
@@ -90,9 +93,10 @@ auto ExpressionArgMax( const Expression& expression )
 }
 
 template< typename Expression >
-auto ExpressionSum( const Expression& expression ) -> std::decay_t< decltype( expression[0] + expression[0] ) >
+auto ExpressionSum( const Expression& expression )
+-> RemoveET< std::decay_t< decltype( expression[0] + expression[0] ) > >
 {
-   using ResultType = std::decay_t< decltype( expression[0] + expression[0] ) >;
+   using ResultType = RemoveET< std::decay_t< decltype( expression[0] + expression[0] ) > >;
    using IndexType = typename Expression::IndexType;
 
    const auto view = expression.getConstView();
@@ -101,9 +105,10 @@ auto ExpressionSum( const Expression& expression ) -> std::decay_t< decltype( ex
 }
 
 template< typename Expression >
-auto ExpressionProduct( const Expression& expression ) -> std::decay_t< decltype( expression[0] * expression[0] ) >
+auto ExpressionProduct( const Expression& expression )
+-> RemoveET< std::decay_t< decltype( expression[0] * expression[0] ) > >
 {
-   using ResultType = std::decay_t< decltype( expression[0] * expression[0] ) >;
+   using ResultType = RemoveET< std::decay_t< decltype( expression[0] * expression[0] ) > >;
    using IndexType = typename Expression::IndexType;
 
    const auto view = expression.getConstView();
@@ -112,9 +117,10 @@ auto ExpressionProduct( const Expression& expression ) -> std::decay_t< decltype
 }
 
 template< typename Expression >
-auto ExpressionLogicalAnd( const Expression& expression ) -> std::decay_t< decltype( expression[0] && expression[0] ) >
+auto ExpressionLogicalAnd( const Expression& expression )
+-> RemoveET< std::decay_t< decltype( expression[0] && expression[0] ) > >
 {
-   using ResultType = std::decay_t< decltype( expression[0] && expression[0] ) >;
+   using ResultType = RemoveET< std::decay_t< decltype( expression[0] && expression[0] ) > >;
    using IndexType = typename Expression::IndexType;
 
    const auto view = expression.getConstView();
@@ -123,9 +129,10 @@ auto ExpressionLogicalAnd( const Expression& expression ) -> std::decay_t< declt
 }
 
 template< typename Expression >
-auto ExpressionLogicalOr( const Expression& expression ) -> std::decay_t< decltype( expression[0] || expression[0] ) >
+auto ExpressionLogicalOr( const Expression& expression )
+-> RemoveET< std::decay_t< decltype( expression[0] || expression[0] ) > >
 {
-   using ResultType = std::decay_t< decltype( expression[0] || expression[0] ) >;
+   using ResultType = RemoveET< std::decay_t< decltype( expression[0] || expression[0] ) > >;
    using IndexType = typename Expression::IndexType;
 
    const auto view = expression.getConstView();
@@ -134,9 +141,10 @@ auto ExpressionLogicalOr( const Expression& expression ) -> std::decay_t< declty
 }
 
 template< typename Expression >
-auto ExpressionBinaryAnd( const Expression& expression ) -> std::decay_t< decltype( expression[0] & expression[0] ) >
+auto ExpressionBinaryAnd( const Expression& expression )
+-> RemoveET< std::decay_t< decltype( expression[0] & expression[0] ) > >
 {
-   using ResultType = std::decay_t< decltype( expression[0] & expression[0] ) >;
+   using ResultType = RemoveET< std::decay_t< decltype( expression[0] & expression[0] ) > >;
    using IndexType = typename Expression::IndexType;
 
    const auto view = expression.getConstView();
@@ -145,9 +153,10 @@ auto ExpressionBinaryAnd( const Expression& expression ) -> std::decay_t< declty
 }
 
 template< typename Expression >
-auto ExpressionBinaryOr( const Expression& expression ) -> std::decay_t< decltype( expression[0] | expression[0] ) >
+auto ExpressionBinaryOr( const Expression& expression )
+-> RemoveET< std::decay_t< decltype( expression[0] | expression[0] ) > >
 {
-   using ResultType = std::decay_t< decltype( expression[0] | expression[0] ) >;
+   using ResultType = RemoveET< std::decay_t< decltype( expression[0] | expression[0] ) > >;
    using IndexType = typename Expression::IndexType;
 
    const auto view = expression.getConstView();
-- 
GitLab


From 2df931ad3e5c30e7442b8cf53e8f18233e6fb6fb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Tue, 7 Jul 2020 13:58:02 +0200
Subject: [PATCH 06/15] Improved type traits to work even with reference types

---
 .../Expressions/ExpressionVariableType.h      |  2 +-
 src/TNL/Containers/Expressions/TypeTraits.h   | 46 +++++++++++--------
 src/TNL/TypeTraits.h                          | 16 +++----
 3 files changed, 36 insertions(+), 28 deletions(-)

diff --git a/src/TNL/Containers/Expressions/ExpressionVariableType.h b/src/TNL/Containers/Expressions/ExpressionVariableType.h
index 2095a4352..10acb65cc 100644
--- a/src/TNL/Containers/Expressions/ExpressionVariableType.h
+++ b/src/TNL/Containers/Expressions/ExpressionVariableType.h
@@ -22,7 +22,7 @@ template< typename T, typename V = T >
 constexpr ExpressionVariableType
 getExpressionVariableType()
 {
-   if( std::is_arithmetic< std::decay_t< T > >::value )
+   if( std::is_arithmetic< T >::value )
       return ArithmeticVariable;
    // vectors must be considered as an arithmetic type when used as RealType in another vector
    if( IsArithmeticSubtype< T, V >::value )
diff --git a/src/TNL/Containers/Expressions/TypeTraits.h b/src/TNL/Containers/Expressions/TypeTraits.h
index 9a0570345..5df97785e 100644
--- a/src/TNL/Containers/Expressions/TypeTraits.h
+++ b/src/TNL/Containers/Expressions/TypeTraits.h
@@ -33,47 +33,55 @@ struct HasEnabledDistributedExpressionTemplates : std::false_type
 // type aliases for enabling specific operators and functions using SFINAE
 template< typename ET1 >
 using EnableIfStaticUnaryExpression_t = std::enable_if_t<
-      HasEnabledStaticExpressionTemplates< ET1 >::value >;
+      HasEnabledStaticExpressionTemplates< std::decay_t< ET1 > >::value >;
 
 template< typename ET1, typename ET2 >
 using EnableIfStaticBinaryExpression_t = std::enable_if_t<
-      HasEnabledStaticExpressionTemplates< ET1 >::value ||
-      HasEnabledStaticExpressionTemplates< ET2 >::value >;
+      (
+         HasEnabledStaticExpressionTemplates< std::decay_t< ET1 > >::value ||
+         HasEnabledStaticExpressionTemplates< std::decay_t< ET2 > >::value
+      ) && !
+      (
+         HasEnabledExpressionTemplates< std::decay_t< ET2 > >::value ||
+         HasEnabledExpressionTemplates< std::decay_t< ET1 > >::value ||
+         HasEnabledDistributedExpressionTemplates< std::decay_t< ET2 > >::value ||
+         HasEnabledDistributedExpressionTemplates< std::decay_t< ET1 > >::value
+      ) >;
 
 template< typename ET1 >
 using EnableIfUnaryExpression_t = std::enable_if_t<
-      HasEnabledExpressionTemplates< ET1 >::value >;
+      HasEnabledExpressionTemplates< std::decay_t< ET1 > >::value >;
 
 template< typename ET1, typename ET2 >
 using EnableIfBinaryExpression_t = std::enable_if_t<
       // we need to avoid ambiguity with operators defined in Array (e.g. Array::operator==)
       // so the first operand must not be Array
       (
-         HasAddAssignmentOperator< ET1 >::value ||
-         HasEnabledExpressionTemplates< ET1 >::value ||
-         std::is_arithmetic< ET1 >::value
+         HasAddAssignmentOperator< std::decay_t< ET1 > >::value ||
+         HasEnabledExpressionTemplates< std::decay_t< ET1 > >::value ||
+         std::is_arithmetic< std::decay_t< ET1 > >::value
       ) &&
       (
-         HasEnabledExpressionTemplates< ET2 >::value ||
-         HasEnabledExpressionTemplates< ET1 >::value
+         HasEnabledExpressionTemplates< std::decay_t< ET2 > >::value ||
+         HasEnabledExpressionTemplates< std::decay_t< ET1 > >::value
       ) >;
 
 template< typename ET1 >
 using EnableIfDistributedUnaryExpression_t = std::enable_if_t<
-      HasEnabledDistributedExpressionTemplates< ET1 >::value >;
+      HasEnabledDistributedExpressionTemplates< std::decay_t< ET1 > >::value >;
 
 template< typename ET1, typename ET2 >
 using EnableIfDistributedBinaryExpression_t = std::enable_if_t<
       // we need to avoid ambiguity with operators defined in Array (e.g. Array::operator==)
       // so the first operand must not be Array
       (
-         HasAddAssignmentOperator< ET1 >::value ||
-         HasEnabledDistributedExpressionTemplates< ET1 >::value ||
-         std::is_arithmetic< ET1 >::value
+         HasAddAssignmentOperator< std::decay_t< ET1 > >::value ||
+         HasEnabledDistributedExpressionTemplates< std::decay_t< ET1 > >::value ||
+         std::is_arithmetic< std::decay_t< ET1 > >::value
       ) &&
       (
-         HasEnabledDistributedExpressionTemplates< ET2 >::value ||
-         HasEnabledDistributedExpressionTemplates< ET1 >::value
+         HasEnabledDistributedExpressionTemplates< std::decay_t< ET2 > >::value ||
+         HasEnabledDistributedExpressionTemplates< std::decay_t< ET1 > >::value
       ) >;
 
 
@@ -83,7 +91,7 @@ template< typename T, typename V,
 struct IsArithmeticSubtype
 : public std::integral_constant< bool,
             // TODO: use std::is_assignable?
-            std::is_same< T, typename V::RealType >::value >
+            std::is_same< T, typename std::decay_t< V >::RealType >::value >
 {};
 
 template< typename T >
@@ -109,13 +117,13 @@ struct enable_if_type { typedef R type; };
 template< typename R, typename Enable = void >
 struct RemoveExpressionTemplate
 {
-   using type = R;
+   using type = std::decay_t< R >;
 };
 
 template< typename R >
-struct RemoveExpressionTemplate< R, typename enable_if_type< typename R::VectorOperandType >::type >
+struct RemoveExpressionTemplate< R, typename enable_if_type< typename std::decay_t< R >::VectorOperandType >::type >
 {
-   using type = typename RemoveExpressionTemplate< typename R::VectorOperandType >::type;
+   using type = typename RemoveExpressionTemplate< typename std::decay_t< R >::VectorOperandType >::type;
 };
 
 template< typename R >
diff --git a/src/TNL/TypeTraits.h b/src/TNL/TypeTraits.h
index 3032a0b0d..2afda7aad 100644
--- a/src/TNL/TypeTraits.h
+++ b/src/TNL/TypeTraits.h
@@ -29,7 +29,7 @@ private:
     template< typename C > static NoType& test(...);
 
 public:
-    static constexpr bool value = ( sizeof( test< T >(0) ) == sizeof( YesType ) );
+    static constexpr bool value = ( sizeof( test< std::decay_t<T> >(0) ) == sizeof( YesType ) );
 };
 
 /**
@@ -46,7 +46,7 @@ private:
     template< typename C > static NoType& test(...);
 
 public:
-    static constexpr bool value = ( sizeof( test< T >(0) ) == sizeof( YesType ) );
+    static constexpr bool value = ( sizeof( test< std::decay_t<T> >(0) ) == sizeof( YesType ) );
 };
 
 /**
@@ -70,7 +70,7 @@ private:
    template< typename >
    static constexpr std::false_type check(...);
 
-   using type = decltype(check<T>(0));
+   using type = decltype(check<std::decay_t<T>>(0));
 
 public:
     static constexpr bool value = type::value;
@@ -97,7 +97,7 @@ private:
    template< typename >
    static constexpr std::false_type check(...);
 
-   using type = decltype(check<T>(0));
+   using type = decltype(check<std::decay_t<T>>(0));
 
 public:
     static constexpr bool value = type::value;
@@ -124,7 +124,7 @@ private:
    template< typename >
    static constexpr std::false_type check(...);
 
-   using type = decltype(check<T>(0));
+   using type = decltype(check<std::decay_t<T>>(0));
 
 public:
     static constexpr bool value = type::value;
@@ -188,7 +188,7 @@ private:
       template< typename M, M method >
       static constexpr std::false_type is_constexpr_impl(...);
 
-      using type = decltype(is_constexpr_impl< decltype(&T::getSize), &T::getSize >(0));
+      using type = decltype(is_constexpr_impl< decltype(&std::decay_t<T>::getSize), &std::decay_t<T>::getSize >(0));
    };
 
    // specialization for types which don't have getSize() method at all
@@ -223,7 +223,7 @@ struct IsStaticArrayType
 template< typename T >
 struct IsViewType
 : public std::integral_constant< bool,
-            std::is_same< typename T::ViewType, T >::value >
+            std::is_same< typename std::decay_t<T>::ViewType, T >::value >
 {};
 
 /**
@@ -247,7 +247,7 @@ private:
    template< typename >
    static constexpr std::false_type check(...);
 
-   using type = decltype(check<T>(0));
+   using type = decltype(check<std::decay_t<T>>(0));
 
 public:
     static constexpr bool value = type::value;
-- 
GitLab


From ee68bdc95593807f40ca269535f17baa0efaba98 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Wed, 8 Jul 2020 15:45:43 +0200
Subject: [PATCH 07/15] Added static asserts for
 std::numeric_limits<ResultType>::is_specialized to vector reductions

---
 .../Expressions/DistributedVerticalOperations.h      | 12 ++++++++++++
 src/TNL/Containers/Expressions/VerticalOperations.h  | 12 ++++++++++++
 2 files changed, 24 insertions(+)

diff --git a/src/TNL/Containers/Expressions/DistributedVerticalOperations.h b/src/TNL/Containers/Expressions/DistributedVerticalOperations.h
index e92391a7d..43390b529 100644
--- a/src/TNL/Containers/Expressions/DistributedVerticalOperations.h
+++ b/src/TNL/Containers/Expressions/DistributedVerticalOperations.h
@@ -25,6 +25,8 @@ auto DistributedExpressionMin( const Expression& expression ) -> std::decay_t< d
    using ResultType = std::decay_t< decltype( expression[0] ) >;
    using CommunicatorType = typename Expression::CommunicatorType;
 
+   static_assert( std::numeric_limits< ResultType >::is_specialized,
+                  "std::numeric_limits is not specialized for the reduction's result type" );
    ResultType result = std::numeric_limits< ResultType >::max();
    if( expression.getCommunicationGroup() != CommunicatorType::NullGroup ) {
       const ResultType localResult = ExpressionMin( expression.getConstLocalView() );
@@ -42,6 +44,8 @@ auto DistributedExpressionArgMin( const Expression& expression )
    using ResultType = std::pair< RealType, IndexType >;
    using CommunicatorType = typename Expression::CommunicatorType;
 
+   static_assert( std::numeric_limits< RealType >::is_specialized,
+                  "std::numeric_limits is not specialized for the reduction's real type" );
    ResultType result( -1, std::numeric_limits< RealType >::max() );
    const auto group = expression.getCommunicationGroup();
    if( group != CommunicatorType::NullGroup ) {
@@ -82,6 +86,8 @@ auto DistributedExpressionMax( const Expression& expression ) -> std::decay_t< d
    using ResultType = std::decay_t< decltype( expression[0] ) >;
    using CommunicatorType = typename Expression::CommunicatorType;
 
+   static_assert( std::numeric_limits< ResultType >::is_specialized,
+                  "std::numeric_limits is not specialized for the reduction's result type" );
    ResultType result = std::numeric_limits< ResultType >::lowest();
    if( expression.getCommunicationGroup() != CommunicatorType::NullGroup ) {
       const ResultType localResult = ExpressionMax( expression.getConstLocalView() );
@@ -99,6 +105,8 @@ auto DistributedExpressionArgMax( const Expression& expression )
    using ResultType = std::pair< RealType, IndexType >;
    using CommunicatorType = typename Expression::CommunicatorType;
 
+   static_assert( std::numeric_limits< RealType >::is_specialized,
+                  "std::numeric_limits is not specialized for the reduction's real type" );
    ResultType result( -1, std::numeric_limits< RealType >::lowest() );
    const auto group = expression.getCommunicationGroup();
    if( group != CommunicatorType::NullGroup ) {
@@ -168,6 +176,8 @@ auto DistributedExpressionLogicalAnd( const Expression& expression ) -> std::dec
    using ResultType = std::decay_t< decltype( expression[0] && expression[0] ) >;
    using CommunicatorType = typename Expression::CommunicatorType;
 
+   static_assert( std::numeric_limits< ResultType >::is_specialized,
+                  "std::numeric_limits is not specialized for the reduction's result type" );
    ResultType result = std::numeric_limits< ResultType >::max();
    if( expression.getCommunicationGroup() != CommunicatorType::NullGroup ) {
       const ResultType localResult = ExpressionLogicalAnd( expression.getConstLocalView() );
@@ -196,6 +206,8 @@ auto DistributedExpressionBinaryAnd( const Expression& expression ) -> std::deca
    using ResultType = std::decay_t< decltype( expression[0] & expression[0] ) >;
    using CommunicatorType = typename Expression::CommunicatorType;
 
+   static_assert( std::numeric_limits< ResultType >::is_specialized,
+                  "std::numeric_limits is not specialized for the reduction's result type" );
    ResultType result = std::numeric_limits< ResultType >::max();
    if( expression.getCommunicationGroup() != CommunicatorType::NullGroup ) {
       const ResultType localResult = ExpressionLogicalBinaryAnd( expression.getConstLocalView() );
diff --git a/src/TNL/Containers/Expressions/VerticalOperations.h b/src/TNL/Containers/Expressions/VerticalOperations.h
index 6e429414e..68360495c 100644
--- a/src/TNL/Containers/Expressions/VerticalOperations.h
+++ b/src/TNL/Containers/Expressions/VerticalOperations.h
@@ -36,6 +36,8 @@ auto ExpressionMin( const Expression& expression )
    const auto view = expression.getConstView();
    auto fetch = [=] __cuda_callable__ ( IndexType i ) { return view[ i ]; };
    auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return TNL::min( a, b ); };
+   static_assert( std::numeric_limits< ResultType >::is_specialized,
+                  "std::numeric_limits is not specialized for the reduction's result type" );
    return Algorithms::Reduction< typename Expression::DeviceType >::reduce( ( IndexType ) 0, expression.getSize(), reduction, fetch, std::numeric_limits< ResultType >::max() );
 }
 
@@ -56,6 +58,8 @@ auto ExpressionArgMin( const Expression& expression )
       else if( a == b && bIdx < aIdx )
          aIdx = bIdx;
    };
+   static_assert( std::numeric_limits< ResultType >::is_specialized,
+                  "std::numeric_limits is not specialized for the reduction's result type" );
    return Algorithms::Reduction< typename Expression::DeviceType >::reduceWithArgument( ( IndexType ) 0, expression.getSize(), reduction, fetch, std::numeric_limits< ResultType >::max() );
 }
 
@@ -69,6 +73,8 @@ auto ExpressionMax( const Expression& expression )
    const auto view = expression.getConstView();
    auto fetch = [=] __cuda_callable__ ( IndexType i ) { return view[ i ]; };
    auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return TNL::max( a, b ); };
+   static_assert( std::numeric_limits< ResultType >::is_specialized,
+                  "std::numeric_limits is not specialized for the reduction's result type" );
    return Algorithms::Reduction< typename Expression::DeviceType >::reduce( ( IndexType ) 0, expression.getSize(), reduction, fetch, std::numeric_limits< ResultType >::lowest() );
 }
 
@@ -89,6 +95,8 @@ auto ExpressionArgMax( const Expression& expression )
       else if( a == b && bIdx < aIdx )
          aIdx = bIdx;
    };
+   static_assert( std::numeric_limits< ResultType >::is_specialized,
+                  "std::numeric_limits is not specialized for the reduction's result type" );
    return Algorithms::Reduction< typename Expression::DeviceType >::reduceWithArgument( ( IndexType ) 0, expression.getSize(), reduction, fetch, std::numeric_limits< ResultType >::lowest() );
 }
 
@@ -125,6 +133,8 @@ auto ExpressionLogicalAnd( const Expression& expression )
 
    const auto view = expression.getConstView();
    auto fetch = [=] __cuda_callable__ ( IndexType i ) { return view[ i ]; };
+   static_assert( std::numeric_limits< ResultType >::is_specialized,
+                  "std::numeric_limits is not specialized for the reduction's result type" );
    return Algorithms::Reduction< typename Expression::DeviceType >::reduce( ( IndexType ) 0, expression.getSize(), std::logical_and<>{}, fetch, std::numeric_limits< ResultType >::max() );
 }
 
@@ -149,6 +159,8 @@ auto ExpressionBinaryAnd( const Expression& expression )
 
    const auto view = expression.getConstView();
    auto fetch = [=] __cuda_callable__ ( IndexType i ) { return view[ i ]; };
+   static_assert( std::numeric_limits< ResultType >::is_specialized,
+                  "std::numeric_limits is not specialized for the reduction's result type" );
    return Algorithms::Reduction< typename Expression::DeviceType >::reduce( ( IndexType ) 0, expression.getSize(), std::bit_and<>{}, fetch, std::numeric_limits< ResultType >::max() );
 }
 
-- 
GitLab


From c09870c5c37d00fc732558cabd1dcd06a55748a6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Mon, 6 Jul 2020 17:02:50 +0200
Subject: [PATCH 08/15] Fixed tests for expression templates to work with
 nested vector types

Tests for vertical operations which are not well defined for nested
vectors are disabled.
---
 src/UnitTests/Containers/CMakeLists.txt       | 23 +++++------
 .../Containers/VectorBinaryOperationsTest.cpp |  1 +
 .../Containers/VectorBinaryOperationsTest.cu  |  1 +
 .../Containers/VectorBinaryOperationsTest.h   |  8 +---
 .../Containers/VectorHelperFunctions.h        | 32 +++++++++++++++
 .../Containers/VectorUnaryOperationsTest.cpp  |  1 +
 .../Containers/VectorUnaryOperationsTest.cu   |  1 +
 .../Containers/VectorUnaryOperationsTest.h    | 37 +++++++++--------
 .../VectorVerticalOperationsTest.cpp          |  1 +
 .../VectorVerticalOperationsTest.cu           |  1 +
 .../Containers/VectorVerticalOperationsTest.h | 40 +++++++++++++------
 11 files changed, 98 insertions(+), 48 deletions(-)

diff --git a/src/UnitTests/Containers/CMakeLists.txt b/src/UnitTests/Containers/CMakeLists.txt
index 227a86551..21be3ded1 100644
--- a/src/UnitTests/Containers/CMakeLists.txt
+++ b/src/UnitTests/Containers/CMakeLists.txt
@@ -32,10 +32,9 @@ ADD_EXECUTABLE( VectorVerticalOperationsTest VectorVerticalOperationsTest.cpp )
 TARGET_COMPILE_OPTIONS( VectorVerticalOperationsTest PRIVATE ${CXX_TESTS_FLAGS} )
 TARGET_LINK_LIBRARIES( VectorVerticalOperationsTest ${GTEST_BOTH_LIBRARIES} )
 
-# FIXME
-#ADD_EXECUTABLE( VectorOfStaticVectorsTest VectorOfStaticVectorsTest.cpp )
-#TARGET_COMPILE_OPTIONS( VectorOfStaticVectorsTest PRIVATE ${CXX_TESTS_FLAGS} )
-#TARGET_LINK_LIBRARIES( VectorOfStaticVectorsTest ${GTEST_BOTH_LIBRARIES} )
+ADD_EXECUTABLE( VectorOfStaticVectorsTest VectorOfStaticVectorsTest.cpp )
+TARGET_COMPILE_OPTIONS( VectorOfStaticVectorsTest PRIVATE ${CXX_TESTS_FLAGS} )
+TARGET_LINK_LIBRARIES( VectorOfStaticVectorsTest ${GTEST_BOTH_LIBRARIES} )
 
 IF( BUILD_CUDA )
    CUDA_ADD_EXECUTABLE( ArrayTestCuda ArrayTest.cu
@@ -62,9 +61,8 @@ IF( BUILD_CUDA )
    CUDA_ADD_EXECUTABLE( VectorVerticalOperationsTestCuda VectorVerticalOperationsTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
    TARGET_LINK_LIBRARIES( VectorVerticalOperationsTestCuda ${GTEST_BOTH_LIBRARIES} )
 
-   # FIXME
-#   CUDA_ADD_EXECUTABLE( VectorOfStaticVectorsTestCuda VectorOfStaticVectorsTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
-#   TARGET_LINK_LIBRARIES( VectorOfStaticVectorsTestCuda ${GTEST_BOTH_LIBRARIES} )
+   CUDA_ADD_EXECUTABLE( VectorOfStaticVectorsTestCuda VectorOfStaticVectorsTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( VectorOfStaticVectorsTestCuda ${GTEST_BOTH_LIBRARIES} )
 ENDIF( BUILD_CUDA )
 
 ADD_EXECUTABLE( StaticArrayTest StaticArrayTest.cpp )
@@ -79,10 +77,9 @@ ADD_EXECUTABLE( StaticVectorOperationsTest StaticVectorOperationsTest.cpp )
 TARGET_COMPILE_OPTIONS( StaticVectorOperationsTest PRIVATE ${CXX_TESTS_FLAGS} )
 TARGET_LINK_LIBRARIES( StaticVectorOperationsTest ${GTEST_BOTH_LIBRARIES} )
 
-# FIXME
-#ADD_EXECUTABLE( StaticVectorOfStaticVectorsTest StaticVectorOfStaticVectorsTest.cpp )
-#TARGET_COMPILE_OPTIONS( StaticVectorOfStaticVectorsTest PRIVATE ${CXX_TESTS_FLAGS} )
-#TARGET_LINK_LIBRARIES( StaticVectorOfStaticVectorsTest ${GTEST_BOTH_LIBRARIES} )
+ADD_EXECUTABLE( StaticVectorOfStaticVectorsTest StaticVectorOfStaticVectorsTest.cpp )
+TARGET_COMPILE_OPTIONS( StaticVectorOfStaticVectorsTest PRIVATE ${CXX_TESTS_FLAGS} )
+TARGET_LINK_LIBRARIES( StaticVectorOfStaticVectorsTest ${GTEST_BOTH_LIBRARIES} )
 
 
 ADD_TEST( ArrayTest ${EXECUTABLE_OUTPUT_PATH}/ArrayTest${CMAKE_EXECUTABLE_SUFFIX} )
@@ -93,7 +90,7 @@ ADD_TEST( VectorEvaluateAndReduceTest ${EXECUTABLE_OUTPUT_PATH}/VectorEvaluateAn
 ADD_TEST( VectorBinaryOperationsTest ${EXECUTABLE_OUTPUT_PATH}/VectorBinaryOperationsTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( VectorUnaryOperationsTest ${EXECUTABLE_OUTPUT_PATH}/VectorUnaryOperationsTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( VectorVerticalOperationsTest ${EXECUTABLE_OUTPUT_PATH}/VectorVerticalOperationsTest${CMAKE_EXECUTABLE_SUFFIX} )
-#ADD_TEST( VectorOfStaticVectorsTest ${EXECUTABLE_OUTPUT_PATH}/VectorOfStaticVectorsTest${CMAKE_EXECUTABLE_SUFFIX} )
+ADD_TEST( VectorOfStaticVectorsTest ${EXECUTABLE_OUTPUT_PATH}/VectorOfStaticVectorsTest${CMAKE_EXECUTABLE_SUFFIX} )
 IF( BUILD_CUDA )
    ADD_TEST( ArrayTestCuda ${EXECUTABLE_OUTPUT_PATH}/ArrayTestCuda${CMAKE_EXECUTABLE_SUFFIX} )
    ADD_TEST( ArrayViewTestCuda ${EXECUTABLE_OUTPUT_PATH}/ArrayViewTestCuda${CMAKE_EXECUTABLE_SUFFIX} )
@@ -108,7 +105,7 @@ ENDIF()
 ADD_TEST( StaticArrayTest ${EXECUTABLE_OUTPUT_PATH}/StaticArrayTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( StaticVectorTest ${EXECUTABLE_OUTPUT_PATH}/StaticVectorTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( StaticVectorOperationsTest ${EXECUTABLE_OUTPUT_PATH}/StaticVectorOperationsTest${CMAKE_EXECUTABLE_SUFFIX} )
-#ADD_TEST( StaticVectorOfStaticVectorsTest ${EXECUTABLE_OUTPUT_PATH}/StaticVectorOfStaticVectorsTest${CMAKE_EXECUTABLE_SUFFIX} )
+ADD_TEST( StaticVectorOfStaticVectorsTest ${EXECUTABLE_OUTPUT_PATH}/StaticVectorOfStaticVectorsTest${CMAKE_EXECUTABLE_SUFFIX} )
 
 
 ADD_SUBDIRECTORY( Multimaps )
diff --git a/src/UnitTests/Containers/VectorBinaryOperationsTest.cpp b/src/UnitTests/Containers/VectorBinaryOperationsTest.cpp
index 717f56e90..30b01f108 100644
--- a/src/UnitTests/Containers/VectorBinaryOperationsTest.cpp
+++ b/src/UnitTests/Containers/VectorBinaryOperationsTest.cpp
@@ -1 +1,2 @@
 #include "VectorBinaryOperationsTest.h"
+#include "../main.h"
diff --git a/src/UnitTests/Containers/VectorBinaryOperationsTest.cu b/src/UnitTests/Containers/VectorBinaryOperationsTest.cu
index 717f56e90..30b01f108 100644
--- a/src/UnitTests/Containers/VectorBinaryOperationsTest.cu
+++ b/src/UnitTests/Containers/VectorBinaryOperationsTest.cu
@@ -1 +1,2 @@
 #include "VectorBinaryOperationsTest.h"
+#include "../main.h"
diff --git a/src/UnitTests/Containers/VectorBinaryOperationsTest.h b/src/UnitTests/Containers/VectorBinaryOperationsTest.h
index 46833c235..818d45e32 100644
--- a/src/UnitTests/Containers/VectorBinaryOperationsTest.h
+++ b/src/UnitTests/Containers/VectorBinaryOperationsTest.h
@@ -627,8 +627,8 @@ TYPED_TEST( VectorBinaryOperationsTest, comparisonOnDifferentDevices )
 {
    SETUP_BINARY_TEST_ALIASES;
 
-   using RightHostVector = typename TestFixture::RightVector::Self< typename TestFixture::RightVector::RealType, Devices::Sequential >;
-   using RightHost = typename TestFixture::Right::Self< typename TestFixture::Right::RealType, Devices::Sequential >;
+   using RightHostVector = typename TestFixture::RightVector::template Self< typename TestFixture::RightVector::RealType, Devices::Sequential >;
+   using RightHost = typename TestFixture::Right::template Self< typename TestFixture::Right::RealType, Devices::Sequential >;
 
    RightHostVector _R1_h; _R1_h = this->_R1;
    RightHost R1_h( _R1_h );
@@ -646,7 +646,3 @@ TYPED_TEST( VectorBinaryOperationsTest, comparisonOnDifferentDevices )
 } // namespace binary_tests
 
 #endif // HAVE_GTEST
-
-#if !defined(DISTRIBUTED_VECTOR) && !defined(STATIC_VECTOR)
-#include "../main.h"
-#endif
diff --git a/src/UnitTests/Containers/VectorHelperFunctions.h b/src/UnitTests/Containers/VectorHelperFunctions.h
index 4e8c64fae..649de1cee 100644
--- a/src/UnitTests/Containers/VectorHelperFunctions.h
+++ b/src/UnitTests/Containers/VectorHelperFunctions.h
@@ -112,3 +112,35 @@ void bindOrAssign( V1& v1, V2& v2 )
 {
    v1 = v2;
 }
+
+
+#ifdef HAVE_GTEST
+#include "gtest/gtest.h"
+
+template< typename T1, typename T2,
+          std::enable_if_t< ! TNL::HasSubscriptOperator< T1 >::value &&
+                            ! TNL::HasSubscriptOperator< T2 >::value, bool > = true >
+void expect_near( const T1& arg, const T2& expected, double epsilon )
+{
+   EXPECT_NEAR( arg, expected, epsilon );
+}
+
+template< typename T1, typename T2,
+          std::enable_if_t< TNL::HasSubscriptOperator< T1 >::value &&
+                            ! TNL::HasSubscriptOperator< T2 >::value, bool > = true >
+void expect_near( const T1& arg, const T2& expected, double epsilon )
+{
+   for( int i = 0; i < arg.getSize(); i++ )
+      expect_near( arg[ i ], expected, epsilon );
+}
+
+template< typename T1, typename T2,
+          std::enable_if_t< TNL::HasSubscriptOperator< T1 >::value &&
+                            TNL::HasSubscriptOperator< T2 >::value, bool > = true >
+void expect_near( const T1& arg, const T2& expected, double epsilon )
+{
+   ASSERT_EQ( arg.getSize(), expected.getSize() );
+   for( int i = 0; i < arg.getSize(); i++ )
+      expect_near( arg[ i ], expected[ i ], epsilon );
+}
+#endif
diff --git a/src/UnitTests/Containers/VectorUnaryOperationsTest.cpp b/src/UnitTests/Containers/VectorUnaryOperationsTest.cpp
index 1b9bcdc6c..1638e7ce7 100644
--- a/src/UnitTests/Containers/VectorUnaryOperationsTest.cpp
+++ b/src/UnitTests/Containers/VectorUnaryOperationsTest.cpp
@@ -1 +1,2 @@
 #include "VectorUnaryOperationsTest.h"
+#include "../main.h"
diff --git a/src/UnitTests/Containers/VectorUnaryOperationsTest.cu b/src/UnitTests/Containers/VectorUnaryOperationsTest.cu
index 1b9bcdc6c..1638e7ce7 100644
--- a/src/UnitTests/Containers/VectorUnaryOperationsTest.cu
+++ b/src/UnitTests/Containers/VectorUnaryOperationsTest.cu
@@ -1 +1,2 @@
 #include "VectorUnaryOperationsTest.h"
+#include "../main.h"
diff --git a/src/UnitTests/Containers/VectorUnaryOperationsTest.h b/src/UnitTests/Containers/VectorUnaryOperationsTest.h
index 9baea01c5..a5beb58d9 100644
--- a/src/UnitTests/Containers/VectorUnaryOperationsTest.h
+++ b/src/UnitTests/Containers/VectorUnaryOperationsTest.h
@@ -139,6 +139,10 @@ protected:
 
 TYPED_TEST_SUITE( VectorUnaryOperationsTest, VectorTypes );
 
+
+#define EXPECTED_VECTOR( TestFixture, function ) \
+   using ExpectedVector = typename TestFixture::template Vector< Expressions::RemoveET< decltype(function(typename VectorOrView::RealType{})) > >;
+
 #ifdef STATIC_VECTOR
    #define SETUP_UNARY_VECTOR_TEST( _ ) \
       using VectorOrView = typename TestFixture::VectorOrView; \
@@ -151,8 +155,8 @@ TYPED_TEST_SUITE( VectorUnaryOperationsTest, VectorTypes );
    #define SETUP_UNARY_VECTOR_TEST_FUNCTION( _, begin, end, function ) \
       using VectorOrView = typename TestFixture::VectorOrView; \
       using RealType = typename VectorOrView::RealType;        \
-      using ExpectedVector = typename TestFixture::template Vector< decltype(function(RealType{})) >; \
-      constexpr int _size = VectorOrView::getSize();            \
+      EXPECTED_VECTOR( TestFixture, function );                \
+      constexpr int _size = VectorOrView::getSize();           \
                                                                \
       VectorOrView V1;                                         \
       ExpectedVector expected;                                 \
@@ -187,9 +191,9 @@ TYPED_TEST_SUITE( VectorUnaryOperationsTest, VectorTypes );
       using VectorType = typename TestFixture::VectorType;     \
       using VectorOrView = typename TestFixture::VectorOrView; \
       using RealType = typename VectorType::RealType;          \
-      using ExpectedVector = typename TestFixture::template Vector< decltype(function(RealType{})) >; \
+      EXPECTED_VECTOR( TestFixture, function );                \
       using HostVector = typename VectorType::template Self< RealType, Devices::Host >; \
-      using HostExpectedVector = typename ExpectedVector::template Self< decltype(function(RealType{})), Devices::Host >; \
+      using HostExpectedVector = typename ExpectedVector::template Self< typename ExpectedVector::RealType, Devices::Host >; \
       using CommunicatorType = typename VectorOrView::CommunicatorType; \
       const auto group = CommunicatorType::AllGroup; \
       using LocalRangeType = typename VectorOrView::LocalRangeType; \
@@ -228,9 +232,9 @@ TYPED_TEST_SUITE( VectorUnaryOperationsTest, VectorTypes );
       using VectorType = typename TestFixture::VectorType;     \
       using VectorOrView = typename TestFixture::VectorOrView; \
       using RealType = typename VectorType::RealType;          \
-      using ExpectedVector = typename TestFixture::template Vector< decltype(function(RealType{})) >; \
+      EXPECTED_VECTOR( TestFixture, function );                \
       using HostVector = typename VectorType::template Self< RealType, Devices::Host >; \
-      using HostExpectedVector = typename ExpectedVector::template Self< decltype(function(RealType{})), Devices::Host >; \
+      using HostExpectedVector = typename ExpectedVector::template Self< typename ExpectedVector::RealType, Devices::Host >; \
                                                                \
       HostVector _V1h( size );                                 \
       HostExpectedVector expected_h( size );                   \
@@ -261,10 +265,10 @@ void expect_vectors_near( const Left& _v1, const Right& _v2 )
    ASSERT_EQ( _v1.getSize(), _v2.getSize() );
 #ifdef STATIC_VECTOR
    for( int i = 0; i < _v1.getSize(); i++ )
-      EXPECT_NEAR( _v1[i], _v2[i], 1e-6 ) << "i = " << i;
+      expect_near( _v1[i], _v2[i], 1e-6 );
 #else
-   using LeftNonConstReal = std::remove_const_t< typename Left::RealType >;
-   using RightNonConstReal = std::remove_const_t< typename Right::RealType >;
+   using LeftNonConstReal = Expressions::RemoveET< std::remove_const_t< typename Left::RealType > >;
+   using RightNonConstReal = Expressions::RemoveET< std::remove_const_t< typename Right::RealType > >;
 #ifdef DISTRIBUTED_VECTOR
    using CommunicatorType = typename Left::CommunicatorType;
    static_assert( std::is_same< typename Right::CommunicatorType, CommunicatorType >::value,
@@ -290,7 +294,7 @@ void expect_vectors_near( const Left& _v1, const Right& _v2 )
 #else
    for( int i = 0; i < v1.getSize(); i++ )
 #endif
-      EXPECT_NEAR( v1_h[i], v2_h[i], 1e-6 ) << "i = " << i;
+      expect_near( v1_h[i], v2_h[i], 1e-6 );
 #endif
 }
 
@@ -465,8 +469,8 @@ TYPED_TEST( VectorUnaryOperationsTest, atanh )
 TYPED_TEST( VectorUnaryOperationsTest, pow )
 {
    // FIXME: for integer exponent, the test fails with CUDA
-//   auto pow3 = [](double i) { return TNL::pow(i, 3); };
-   auto pow3 = [](double i) { return TNL::pow(i, 3.0); };
+//   auto pow3 = [](auto i) { return TNL::pow(i, 3); };
+   auto pow3 = [](auto i) { return TNL::pow(i, 3.0); };
    SETUP_UNARY_VECTOR_TEST_FUNCTION( VECTOR_TEST_SIZE, -VECTOR_TEST_SIZE, VECTOR_TEST_SIZE, pow3 );
 
    // vector or view
@@ -585,9 +589,11 @@ TYPED_TEST( VectorUnaryOperationsTest, sign )
    expect_vectors_near( sign(-(-V1)), expected );
 }
 
+// This test is not suitable for vector-of-static-vectors where the RealType cannot be cast to bool.
+#ifndef VECTOR_OF_STATIC_VECTORS
 TYPED_TEST( VectorUnaryOperationsTest, cast )
 {
-   auto identity = [](int i) { return i; };
+   auto identity = [](auto i) { return i; };
    SETUP_UNARY_VECTOR_TEST_FUNCTION( VECTOR_TEST_SIZE, 1, VECTOR_TEST_SIZE, identity );
 
    // vector or vector view
@@ -612,11 +618,8 @@ TYPED_TEST( VectorUnaryOperationsTest, cast )
 //   EXPECT_EQ( expression3, true );
    EXPECT_EQ( cast<bool>(-V1), true );
 }
+#endif
 
 } // namespace unary_tests
 
 #endif // HAVE_GTEST
-
-#if !defined(DISTRIBUTED_VECTOR) && !defined(STATIC_VECTOR)
-#include "../main.h"
-#endif
diff --git a/src/UnitTests/Containers/VectorVerticalOperationsTest.cpp b/src/UnitTests/Containers/VectorVerticalOperationsTest.cpp
index 376d6a0b7..ee386fab1 100644
--- a/src/UnitTests/Containers/VectorVerticalOperationsTest.cpp
+++ b/src/UnitTests/Containers/VectorVerticalOperationsTest.cpp
@@ -1 +1,2 @@
 #include "VectorVerticalOperationsTest.h"
+#include "../main.h"
diff --git a/src/UnitTests/Containers/VectorVerticalOperationsTest.cu b/src/UnitTests/Containers/VectorVerticalOperationsTest.cu
index 376d6a0b7..ee386fab1 100644
--- a/src/UnitTests/Containers/VectorVerticalOperationsTest.cu
+++ b/src/UnitTests/Containers/VectorVerticalOperationsTest.cu
@@ -1 +1,2 @@
 #include "VectorVerticalOperationsTest.h"
+#include "../main.h"
diff --git a/src/UnitTests/Containers/VectorVerticalOperationsTest.h b/src/UnitTests/Containers/VectorVerticalOperationsTest.h
index dae1dc198..3aa60e612 100644
--- a/src/UnitTests/Containers/VectorVerticalOperationsTest.h
+++ b/src/UnitTests/Containers/VectorVerticalOperationsTest.h
@@ -175,6 +175,8 @@ protected:
 
 TYPED_TEST_SUITE( VectorVerticalOperationsTest, VectorTypes );
 
+// FIXME: function does not work for nested vectors - std::numeric_limits does not make sense for vector types
+#ifndef VECTOR_OF_STATIC_VECTORS
 TYPED_TEST( VectorVerticalOperationsTest, max )
 {
    SETUP_VERTICAL_TEST_ALIASES;
@@ -186,7 +188,10 @@ TYPED_TEST( VectorVerticalOperationsTest, max )
    // binary expression
    EXPECT_EQ( max(V1 + 2), size - 1 + 2 );
 }
+#endif
 
+// FIXME: function does not work for nested vectors - the reduction operation expects a scalar type
+#ifndef VECTOR_OF_STATIC_VECTORS
 TYPED_TEST( VectorVerticalOperationsTest, argMax )
 {
    SETUP_VERTICAL_TEST_ALIASES;
@@ -199,7 +204,10 @@ TYPED_TEST( VectorVerticalOperationsTest, argMax )
    // expression
    EXPECT_EQ( argMax(V1 + 2), std::make_pair( (RealType) size - 1 + 2, size - 1 ) );
 }
+#endif
 
+// FIXME: function does not work for nested vectors - std::numeric_limits does not make sense for vector types
+#ifndef VECTOR_OF_STATIC_VECTORS
 TYPED_TEST( VectorVerticalOperationsTest, min )
 {
    SETUP_VERTICAL_TEST_ALIASES;
@@ -211,7 +219,10 @@ TYPED_TEST( VectorVerticalOperationsTest, min )
    // binary expression
    EXPECT_EQ( min(V1 + 2), 2 );
 }
+#endif
 
+// FIXME: function does not work for nested vectors - the reduction operation expects a scalar type
+#ifndef VECTOR_OF_STATIC_VECTORS
 TYPED_TEST( VectorVerticalOperationsTest, argMin )
 {
    SETUP_VERTICAL_TEST_ALIASES;
@@ -224,6 +235,7 @@ TYPED_TEST( VectorVerticalOperationsTest, argMin )
    // binary expression
    EXPECT_EQ( argMin(V1 + 2), std::make_pair( (RealType) 2 , 0 ) );
 }
+#endif
 
 TYPED_TEST( VectorVerticalOperationsTest, sum )
 {
@@ -237,6 +249,8 @@ TYPED_TEST( VectorVerticalOperationsTest, sum )
    EXPECT_EQ( sum(V1 - 1), 0.5 * size * (size - 1) - size );
 }
 
+// FIXME: function does not work for nested vectors - max does not work for nested vectors
+#ifndef VECTOR_OF_STATIC_VECTORS
 TYPED_TEST( VectorVerticalOperationsTest, maxNorm )
 {
    SETUP_VERTICAL_TEST_ALIASES;
@@ -248,6 +262,7 @@ TYPED_TEST( VectorVerticalOperationsTest, maxNorm )
    // binary expression
    EXPECT_EQ( maxNorm(V1 - size), size );
 }
+#endif
 
 TYPED_TEST( VectorVerticalOperationsTest, l1Norm )
 {
@@ -269,9 +284,11 @@ TYPED_TEST( VectorVerticalOperationsTest, l1Norm )
    EXPECT_EQ( l1Norm(2 * V1 - V1), size );
 }
 
+// FIXME: l2Norm does not work for nested vectors - dangling references due to Static*ExpressionTemplate
+//        classes binding to temporary objects which get destroyed before l2Norm returns
+#ifndef VECTOR_OF_STATIC_VECTORS
 TYPED_TEST( VectorVerticalOperationsTest, l2Norm )
 {
-   using RealType = typename TestFixture::VectorOrView::RealType;
 #ifdef STATIC_VECTOR
    setConstantSequence( this->V1, 1 );
    const typename TestFixture::VectorOrView& V1( this->V1 );
@@ -282,7 +299,7 @@ TYPED_TEST( VectorVerticalOperationsTest, l2Norm )
 #endif
    const int size = V1.getSize();
 
-   const auto expected = std::sqrt( (RealType) size );
+   const auto expected = std::sqrt( size );
 
    // vector or vector view
    EXPECT_EQ( l2Norm(V1), expected );
@@ -291,10 +308,12 @@ TYPED_TEST( VectorVerticalOperationsTest, l2Norm )
    // binary expression
    EXPECT_EQ( l2Norm(2 * V1 - V1), expected );
 }
+#endif
 
+// FIXME function does not work for nested vectors - compilation error
+#ifndef VECTOR_OF_STATIC_VECTORS
 TYPED_TEST( VectorVerticalOperationsTest, lpNorm )
 {
-   using RealType = typename TestFixture::VectorOrView::RealType;
 #ifdef STATIC_VECTOR
    setConstantSequence( this->V1, 1 );
    const typename TestFixture::VectorOrView& V1( this->V1 );
@@ -306,24 +325,25 @@ TYPED_TEST( VectorVerticalOperationsTest, lpNorm )
    const int size = V1.getSize();
 
    const auto expectedL1norm = size;
-   const auto expectedL2norm = std::sqrt( (RealType) size );
-   const auto expectedL3norm = std::cbrt( (RealType) size );
+   const auto expectedL2norm = std::sqrt( size );
+   const auto expectedL3norm = std::cbrt( size );
 
    const auto epsilon = 64 * std::numeric_limits< decltype(expectedL3norm) >::epsilon();
 
    // vector or vector view
    EXPECT_EQ( lpNorm(V1, 1.0), expectedL1norm );
    EXPECT_EQ( lpNorm(V1, 2.0), expectedL2norm );
-   EXPECT_NEAR( lpNorm(V1, 3.0), expectedL3norm, epsilon );
+   expect_near( lpNorm(V1, 3.0), expectedL3norm, epsilon );
    // unary expression
    EXPECT_EQ( lpNorm(-V1, 1.0), expectedL1norm );
    EXPECT_EQ( lpNorm(-V1, 2.0), expectedL2norm );
-   EXPECT_NEAR( lpNorm(-V1, 3.0), expectedL3norm, epsilon );
+   expect_near( lpNorm(-V1, 3.0), expectedL3norm, epsilon );
    // binary expression
    EXPECT_EQ( lpNorm(2 * V1 - V1, 1.0), expectedL1norm );
    EXPECT_EQ( lpNorm(2 * V1 - V1, 2.0), expectedL2norm );
-   EXPECT_NEAR( lpNorm(2 * V1 - V1, 3.0), expectedL3norm, epsilon );
+   expect_near( lpNorm(2 * V1 - V1, 3.0), expectedL3norm, epsilon );
 }
+#endif
 
 TYPED_TEST( VectorVerticalOperationsTest, product )
 {
@@ -353,7 +373,3 @@ TYPED_TEST( VectorVerticalOperationsTest, product )
 } // namespace vertical_tests
 
 #endif // HAVE_GTEST
-
-#if !defined(DISTRIBUTED_VECTOR) && !defined(STATIC_VECTOR)
-#include "../main.h"
-#endif
-- 
GitLab


From 4c5ddb352c9aafc93c4d3fa33f72d7aefaaa72a2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Thu, 9 Jul 2020 15:21:27 +0200
Subject: [PATCH 09/15] Fixed vector assignment for nested vectors

---
 src/TNL/Containers/detail/VectorAssignment.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/TNL/Containers/detail/VectorAssignment.h b/src/TNL/Containers/detail/VectorAssignment.h
index fa778a248..5f273d5f0 100644
--- a/src/TNL/Containers/detail/VectorAssignment.h
+++ b/src/TNL/Containers/detail/VectorAssignment.h
@@ -11,6 +11,7 @@
 #pragma once
 
 #include <TNL/TypeTraits.h>
+#include <TNL/Containers/Expressions/TypeTraits.h>
 #include <TNL/Algorithms/ParallelFor.h>
 
 namespace TNL {
@@ -22,7 +23,7 @@ namespace detail {
  */
 template< typename Vector,
           typename T,
-          bool hasSubscriptOperator = HasSubscriptOperator< T >::value >
+          bool vectorVectorAssignment = HasSubscriptOperator< T >::value && ! Expressions::IsArithmeticSubtype< T, Vector >::value >
 struct VectorAssignment;
 
 /**
@@ -30,12 +31,12 @@ struct VectorAssignment;
  */
 template< typename Vector,
           typename T,
-          bool hasSubscriptOperator = HasSubscriptOperator< T >::value,
+          bool vectorVectorAssignment = HasSubscriptOperator< T >::value && ! Expressions::IsArithmeticSubtype< T, Vector >::value,
           bool hasSetSizeMethod = HasSetSizeMethod< T >::value >
 struct VectorAssignmentWithOperation;
 
 /**
- * \brief Specialization of ASSIGNEMENT with subscript operator
+ * \brief Specialization for vector-vector assignment.
  */
 template< typename Vector,
           typename T >
@@ -73,8 +74,7 @@ struct VectorAssignment< Vector, T, true >
 };
 
 /**
- * \brief Specialization of ASSIGNEMENT for array-value assignment for other types. We assume
- * that T is convertible to Vector::ValueType.
+ * \brief Specialization for vector-value assignment. We assume that T is assignable to Vector::RealType.
  */
 template< typename Vector,
           typename T >
-- 
GitLab


From 3467033919343fa64d877a8823c9d4747ad28366 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Thu, 9 Jul 2020 15:25:51 +0200
Subject: [PATCH 10/15] Fixed binary expression templates for nested vector
 types

Fixes #60
---
 src/TNL/Containers/Expressions/TypeTraits.h   | 82 ++++++++++++-----
 .../Containers/VectorBinaryOperationsTest.h   | 87 +++++++++++++++++--
 .../Containers/VectorOfStaticVectorsTest.cpp  |  6 +-
 .../Containers/VectorOfStaticVectorsTest.cu   |  6 +-
 .../Containers/VectorOfStaticVectorsTest.h    |  5 ++
 5 files changed, 148 insertions(+), 38 deletions(-)
 create mode 100644 src/UnitTests/Containers/VectorOfStaticVectorsTest.h

diff --git a/src/TNL/Containers/Expressions/TypeTraits.h b/src/TNL/Containers/Expressions/TypeTraits.h
index 5df97785e..3142ee35e 100644
--- a/src/TNL/Containers/Expressions/TypeTraits.h
+++ b/src/TNL/Containers/Expressions/TypeTraits.h
@@ -85,13 +85,70 @@ using EnableIfDistributedBinaryExpression_t = std::enable_if_t<
       ) >;
 
 
+// helper trait class for recursively turning expression template classes into compatible vectors
+template<class T, class R = void>
+struct enable_if_type { typedef R type; };
+
+template< typename R, typename Enable = void >
+struct RemoveExpressionTemplate
+{
+   using type = std::decay_t< R >;
+};
+
+template< typename R >
+struct RemoveExpressionTemplate< R, typename enable_if_type< typename std::decay_t< R >::VectorOperandType >::type >
+{
+   using type = typename RemoveExpressionTemplate< typename std::decay_t< R >::VectorOperandType >::type;
+};
+
+template< typename R >
+using RemoveET = typename RemoveExpressionTemplate< R >::type;
+
+
+template< typename T1, typename T2 >
+constexpr std::enable_if_t<
+      ! ( std::is_arithmetic< T1 >::value && std::is_arithmetic< T2 >::value ) &&
+      ! ( IsStaticArrayType< T1 >::value && IsStaticArrayType< T2 >::value ) &&
+      ! ( IsArrayType< T1 >::value && IsArrayType< T2 >::value )
+, bool >
+compatibleForVectorAssignment()
+{
+   return false;
+}
+
+template< typename T1, typename T2 >
+constexpr std::enable_if_t< std::is_arithmetic< T1 >::value && std::is_arithmetic< T2 >::value, bool >
+compatibleForVectorAssignment()
+{
+   return true;
+}
+
+template< typename T1, typename T2 >
+constexpr std::enable_if_t< IsStaticArrayType< T1 >::value && IsStaticArrayType< T2 >::value, bool >
+compatibleForVectorAssignment()
+{
+   return T1::getSize() == T2::getSize() &&
+          compatibleForVectorAssignment< typename RemoveET< T1 >::ValueType, typename RemoveET< T2 >::ValueType >();
+}
+
+template< typename T1, typename T2 >
+constexpr std::enable_if_t< IsArrayType< T1 >::value && IsArrayType< T2 >::value, bool >
+compatibleForVectorAssignment()
+{
+   return compatibleForVectorAssignment< typename RemoveET< T1 >::ValueType, typename RemoveET< T2 >::ValueType >();
+}
+
+
 // helper trait class for proper classification of expression operands using getExpressionVariableType
 template< typename T, typename V,
-          bool enabled = IsVectorType< V >::value >
+          bool enabled = HasEnabledExpressionTemplates< V >::value ||
+                         HasEnabledStaticExpressionTemplates< V >::value ||
+                         HasEnabledDistributedExpressionTemplates< V >::value >
 struct IsArithmeticSubtype
 : public std::integral_constant< bool,
-            // TODO: use std::is_assignable?
-            std::is_same< T, typename std::decay_t< V >::RealType >::value >
+            // Note that using std::is_same would not be general enough, because e.g.
+            // StaticVector<3, int> may be assigned to StaticVector<3, double>
+            compatibleForVectorAssignment< typename V::RealType, T >() >
 {};
 
 template< typename T >
@@ -110,25 +167,6 @@ struct IsArithmeticSubtype< T, V, false >
 {};
 
 
-// helper trait class (used in unit tests)
-template<class T, class R = void>
-struct enable_if_type { typedef R type; };
-
-template< typename R, typename Enable = void >
-struct RemoveExpressionTemplate
-{
-   using type = std::decay_t< R >;
-};
-
-template< typename R >
-struct RemoveExpressionTemplate< R, typename enable_if_type< typename std::decay_t< R >::VectorOperandType >::type >
-{
-   using type = typename RemoveExpressionTemplate< typename std::decay_t< R >::VectorOperandType >::type;
-};
-
-template< typename R >
-using RemoveET = typename RemoveExpressionTemplate< R >::type;
-
 // helper trait class for Static*ExpressionTemplates classes
 template< typename R, typename Enable = void >
 struct OperandMemberType
diff --git a/src/UnitTests/Containers/VectorBinaryOperationsTest.h b/src/UnitTests/Containers/VectorBinaryOperationsTest.h
index 818d45e32..27c4c46e9 100644
--- a/src/UnitTests/Containers/VectorBinaryOperationsTest.h
+++ b/src/UnitTests/Containers/VectorBinaryOperationsTest.h
@@ -58,18 +58,18 @@ class VectorBinaryOperationsTest : public ::testing::Test
 protected:
    using Left = typename Pair::Left;
    using Right = typename Pair::Right;
+   using LeftReal = std::remove_const_t< typename Left::RealType >;
+   using RightReal = std::remove_const_t< typename Right::RealType >;
 #ifndef STATIC_VECTOR
-   using LeftNonConstReal = std::remove_const_t< typename Left::RealType >;
-   using RightNonConstReal = std::remove_const_t< typename Right::RealType >;
    #ifdef DISTRIBUTED_VECTOR
       using CommunicatorType = typename Left::CommunicatorType;
       static_assert( std::is_same< typename Right::CommunicatorType, CommunicatorType >::value,
                      "CommunicatorType must be the same for both Left and Right vectors." );
-      using LeftVector = DistributedVector< LeftNonConstReal, typename Left::DeviceType, typename Left::IndexType, CommunicatorType >;
-      using RightVector = DistributedVector< RightNonConstReal, typename Right::DeviceType, typename Right::IndexType, CommunicatorType >;
+      using LeftVector = DistributedVector< LeftReal, typename Left::DeviceType, typename Left::IndexType, CommunicatorType >;
+      using RightVector = DistributedVector< RightReal, typename Right::DeviceType, typename Right::IndexType, CommunicatorType >;
    #else
-      using LeftVector = Vector< LeftNonConstReal, typename Left::DeviceType, typename Left::IndexType >;
-      using RightVector = Vector< RightNonConstReal, typename Right::DeviceType, typename Right::IndexType >;
+      using LeftVector = Vector< LeftReal, typename Left::DeviceType, typename Left::IndexType >;
+      using RightVector = Vector< RightReal, typename Right::DeviceType, typename Right::IndexType >;
    #endif
 #endif
 
@@ -132,6 +132,8 @@ protected:
 #define SETUP_BINARY_TEST_ALIASES \
    using Left = typename TestFixture::Left;                 \
    using Right = typename TestFixture::Right;               \
+   using LeftReal = typename TestFixture::LeftReal;         \
+   using RightReal = typename TestFixture::RightReal;       \
    Left& L1 = this->L1;                                     \
    Left& L2 = this->L2;                                     \
    Right& R1 = this->R1;                                    \
@@ -263,6 +265,8 @@ TYPED_TEST( VectorBinaryOperationsTest, EQ )
    EXPECT_EQ( L1, R1 );       // vector or vector view
    EXPECT_EQ( L1, 1 );        // right scalar
    EXPECT_EQ( 1, R1 );        // left scalar
+   EXPECT_EQ( L1, RightReal(1) );   // right scalar
+   EXPECT_EQ( LeftReal(1), R1 );    // left scalar
    EXPECT_EQ( L2, R1 + R1 );  // right expression
    EXPECT_EQ( L1 + L1, R2 );  // left expression
    EXPECT_EQ( L1 + L1, R1 + R1 );  // two expressions
@@ -282,6 +286,8 @@ TYPED_TEST( VectorBinaryOperationsTest, NE )
    EXPECT_NE( L1, R2 );       // vector or vector view
    EXPECT_NE( L1, 2 );        // right scalar
    EXPECT_NE( 2, R1 );        // left scalar
+   EXPECT_NE( L1, RightReal(2) );   // right scalar
+   EXPECT_NE( LeftReal(2), R1 );    // left scalar
    EXPECT_NE( L1, R1 + R1 );  // right expression
    EXPECT_NE( L1 + L1, R1 );  // left expression
    EXPECT_NE( L1 + L1, R2 + R2 );  // two expressions
@@ -301,6 +307,8 @@ TYPED_TEST( VectorBinaryOperationsTest, LT )
    EXPECT_LT( L1, R2 );       // vector or vector view
    EXPECT_LT( L1, 2 );        // right scalar
    EXPECT_LT( 1, R2 );        // left scalar
+   EXPECT_LT( L1, RightReal(2) );   // right scalar
+   EXPECT_LT( LeftReal(1), R2 );    // left scalar
    EXPECT_LT( L1, R1 + R1 );  // right expression
    EXPECT_LT( L1 - L1, R1 );  // left expression
    EXPECT_LT( L1 - L1, R1 + R1 );  // two expressions
@@ -313,6 +321,8 @@ TYPED_TEST( VectorBinaryOperationsTest, GT )
    EXPECT_GT( L2, R1 );       // vector or vector view
    EXPECT_GT( L2, 1 );        // right scalar
    EXPECT_GT( 2, R1 );        // left scalar
+   EXPECT_GT( L2, RightReal(1) );   // right scalar
+   EXPECT_GT( LeftReal(2), R1 );    // left scalar
    EXPECT_GT( L1, R1 - R1 );  // right expression
    EXPECT_GT( L1 + L1, R1 );  // left expression
    EXPECT_GT( L1 + L1, R1 - R1 );  // two expressions
@@ -326,6 +336,8 @@ TYPED_TEST( VectorBinaryOperationsTest, LE )
    EXPECT_LE( L1, R2 );       // vector or vector view
    EXPECT_LE( L1, 2 );        // right scalar
    EXPECT_LE( 1, R2 );        // left scalar
+   EXPECT_LE( L1, RightReal(2) );   // right scalar
+   EXPECT_LE( LeftReal(1), R2 );    // left scalar
    EXPECT_LE( L1, R1 + R1 );  // right expression
    EXPECT_LE( L1 - L1, R1 );  // left expression
    EXPECT_LE( L1 - L1, R1 + R1 );  // two expressions
@@ -334,6 +346,8 @@ TYPED_TEST( VectorBinaryOperationsTest, LE )
    EXPECT_LE( L1, R1 );       // vector or vector view
    EXPECT_LE( L1, 1 );        // right scalar
    EXPECT_LE( 1, R1 );        // left scalar
+   EXPECT_LE( L1, RightReal(1) );   // right scalar
+   EXPECT_LE( LeftReal(1), R1 );    // left scalar
    EXPECT_LE( L2, R1 + R1 );  // right expression
    EXPECT_LE( L1 + L1, R2 );  // left expression
    EXPECT_LE( L1 + L1, R1 + R2 );  // two expressions
@@ -347,6 +361,8 @@ TYPED_TEST( VectorBinaryOperationsTest, GE )
    EXPECT_GE( L2, R1 );       // vector or vector view
    EXPECT_GE( L2, 1 );        // right scalar
    EXPECT_GE( 2, R1 );        // left scalar
+   EXPECT_GE( L2, RightReal(1) );   // right scalar
+   EXPECT_GE( LeftReal(2), R1 );    // left scalar
    EXPECT_GE( L1, R1 - R1 );  // right expression
    EXPECT_GE( L1 + L1, R1 );  // left expression
    EXPECT_GE( L1 + L1, R1 - R1 );  // two expressions
@@ -355,6 +371,8 @@ TYPED_TEST( VectorBinaryOperationsTest, GE )
    EXPECT_LE( L1, R1 );       // vector or vector view
    EXPECT_LE( L1, 1 );        // right scalar
    EXPECT_LE( 1, R1 );        // left scalar
+   EXPECT_LE( L1, RightReal(1) );   // right scalar
+   EXPECT_LE( LeftReal(1), R1 );    // left scalar
    EXPECT_LE( L2, R1 + R1 );  // right expression
    EXPECT_LE( L1 + L1, R2 );  // left expression
    EXPECT_LE( L1 + L1, R1 + R2 );  // two expressions
@@ -369,6 +387,8 @@ TYPED_TEST( VectorBinaryOperationsTest, addition )
    // with scalar
    EXPECT_EQ( L1 + 1, 2 );
    EXPECT_EQ( 1 + L1, 2 );
+   EXPECT_EQ( L1 + LeftReal(1), 2 );
+   EXPECT_EQ( LeftReal(1) + L1, 2 );
    // with expression
    EXPECT_EQ( L1 + (L1 + L1), 3 );
    EXPECT_EQ( (L1 + L1) + L1, 3 );
@@ -376,6 +396,11 @@ TYPED_TEST( VectorBinaryOperationsTest, addition )
    EXPECT_EQ( (L1 + L1) + R1, 3 );
    // with two expressions
    EXPECT_EQ( (L1 + L1) + (L1 + L1), 4 );
+   // with expression and scalar
+   EXPECT_EQ( (L1 + L1) + 1, 3 );
+   EXPECT_EQ( (L1 + L1) + RightReal(1), 3 );
+   EXPECT_EQ( 1 + (R1 + R1), 3 );
+   EXPECT_EQ( LeftReal(1) + (R1 + R1), 3 );
 }
 
 TYPED_TEST( VectorBinaryOperationsTest, subtraction )
@@ -387,6 +412,8 @@ TYPED_TEST( VectorBinaryOperationsTest, subtraction )
    // with scalar
    EXPECT_EQ( L1 - 1, 0 );
    EXPECT_EQ( 1 - L1, 0 );
+   EXPECT_EQ( L1 - LeftReal(1), 0 );
+   EXPECT_EQ( LeftReal(1) - L1, 0 );
    // with expression
    EXPECT_EQ( L2 - (L1 + L1), 0 );
    EXPECT_EQ( (L1 + L1) - L2, 0 );
@@ -394,6 +421,11 @@ TYPED_TEST( VectorBinaryOperationsTest, subtraction )
    EXPECT_EQ( (L1 + L1) - R2, 0 );
    // with two expressions
    EXPECT_EQ( (L1 + L1) - (L1 + L1), 0 );
+   // with expression and scalar
+   EXPECT_EQ( (L1 + L1) - 1, 1 );
+   EXPECT_EQ( (L1 + L1) - RightReal(1), 1 );
+   EXPECT_EQ( 1 - (R1 + R1), -1 );
+   EXPECT_EQ( LeftReal(1) - (R1 + R1), -1 );
 }
 
 TYPED_TEST( VectorBinaryOperationsTest, multiplication )
@@ -405,6 +437,8 @@ TYPED_TEST( VectorBinaryOperationsTest, multiplication )
    // with scalar
    EXPECT_EQ( L1 * 2, L2 );
    EXPECT_EQ( 2 * L1, L2 );
+   EXPECT_EQ( L1 * LeftReal(2), L2 );
+   EXPECT_EQ( LeftReal(2) * L1, L2 );
    // with expression
    EXPECT_EQ( L1 * (L1 + L1), L2 );
    EXPECT_EQ( (L1 + L1) * L1, L2 );
@@ -412,6 +446,11 @@ TYPED_TEST( VectorBinaryOperationsTest, multiplication )
    EXPECT_EQ( (L1 + L1) * R1, L2 );
    // with two expressions
    EXPECT_EQ( (L1 + L1) * (L1 + L1), 4 );
+   // with expression and scalar
+   EXPECT_EQ( (L1 + L1) * 1, 2 );
+   EXPECT_EQ( (L1 + L1) * RightReal(1), 2 );
+   EXPECT_EQ( 1 * (R1 + R1), 2 );
+   EXPECT_EQ( LeftReal(1) * (R1 + R1), 2 );
 }
 
 TYPED_TEST( VectorBinaryOperationsTest, division )
@@ -423,6 +462,8 @@ TYPED_TEST( VectorBinaryOperationsTest, division )
    // with scalar
    EXPECT_EQ( L2 / 2, L1 );
    EXPECT_EQ( 2 / L2, L1 );
+   EXPECT_EQ( L2 / LeftReal(2), L1 );
+   EXPECT_EQ( LeftReal(2) / L2, L1 );
    // with expression
    EXPECT_EQ( L2 / (L1 + L1), L1 );
    EXPECT_EQ( (L1 + L1) / L2, L1 );
@@ -430,6 +471,11 @@ TYPED_TEST( VectorBinaryOperationsTest, division )
    EXPECT_EQ( (L1 + L1) / R2, L1 );
    // with two expressions
    EXPECT_EQ( (L1 + L1) / (L1 + L1), L1 );
+   // with expression and scalar
+   EXPECT_EQ( (L1 + L1) / 1, 2 );
+   EXPECT_EQ( (L1 + L1) / RightReal(1), 2 );
+   EXPECT_EQ( 2 / (R1 + R1), 1 );
+   EXPECT_EQ( LeftReal(2) / (R1 + R1), 1 );
 }
 
 template< typename Left, typename Right, std::enable_if_t< std::is_const<typename Left::RealType>::value, bool > = true >
@@ -438,12 +484,15 @@ void test_assignment( Left& L1, Left& L2, Right& R1, Right& R2 )
 template< typename Left, typename Right, std::enable_if_t< ! std::is_const<typename Left::RealType>::value, bool > = true >
 void test_assignment( Left& L1, Left& L2, Right& R1, Right& R2 )
 {
+   using RightReal = std::remove_const_t< typename Right::RealType >;
    // with vector or vector view
    L1 = R2;
    EXPECT_EQ( L1, R2 );
    // with scalar
    L1 = 1;
    EXPECT_EQ( L1, 1 );
+   L1 = RightReal(1);
+   EXPECT_EQ( L1, 1 );
    // with expression
    L1 = R1 + R1;
    EXPECT_EQ( L1, R1 + R1 );
@@ -460,6 +509,7 @@ void test_add_assignment( Left& L1, Left& L2, Right& R1, Right& R2 )
 template< typename Left, typename Right, std::enable_if_t< ! std::is_const<typename Left::RealType>::value, bool > = true >
 void test_add_assignment( Left& L1, Left& L2, Right& R1, Right& R2 )
 {
+   using RightReal = std::remove_const_t< typename Right::RealType >;
    // with vector or vector view
    L1 += R2;
    EXPECT_EQ( L1, R1 + R2 );
@@ -467,6 +517,9 @@ void test_add_assignment( Left& L1, Left& L2, Right& R1, Right& R2 )
    L1 = 1;
    L1 += 2;
    EXPECT_EQ( L1, 3 );
+   L1 = 1;
+   L1 += RightReal(2);
+   EXPECT_EQ( L1, 3 );
    // with expression
    L1 = 1;
    L1 += R1 + R1;
@@ -484,6 +537,7 @@ void test_subtract_assignment( Left& L1, Left& L2, Right& R1, Right& R2 )
 template< typename Left, typename Right, std::enable_if_t< ! std::is_const<typename Left::RealType>::value, bool > = true >
 void test_subtract_assignment( Left& L1, Left& L2, Right& R1, Right& R2 )
 {
+   using RightReal = std::remove_const_t< typename Right::RealType >;
    // with vector or vector view
    L1 -= R2;
    EXPECT_EQ( L1, R1 - R2 );
@@ -491,6 +545,9 @@ void test_subtract_assignment( Left& L1, Left& L2, Right& R1, Right& R2 )
    L1 = 1;
    L1 -= 2;
    EXPECT_EQ( L1, -1 );
+   L1 = 1;
+   L1 -= RightReal(2);
+   EXPECT_EQ( L1, -1 );
    // with expression
    L1 = 1;
    L1 -= R1 + R1;
@@ -508,6 +565,7 @@ void test_multiply_assignment( Left& L1, Left& L2, Right& R1, Right& R2 )
 template< typename Left, typename Right, std::enable_if_t< ! std::is_const<typename Left::RealType>::value, bool > = true >
 void test_multiply_assignment( Left& L1, Left& L2, Right& R1, Right& R2 )
 {
+   using RightReal = std::remove_const_t< typename Right::RealType >;
    // with vector or vector view
    L1 *= R2;
    EXPECT_EQ( L1, R2 );
@@ -515,6 +573,9 @@ void test_multiply_assignment( Left& L1, Left& L2, Right& R1, Right& R2 )
    L1 = 1;
    L1 *= 2;
    EXPECT_EQ( L1, 2 );
+   L1 = 1;
+   L1 *= RightReal(2);
+   EXPECT_EQ( L1, 2 );
    // with expression
    L1 = 1;
    L1 *= R1 + R1;
@@ -532,6 +593,7 @@ void test_divide_assignment( Left& L1, Left& L2, Right& R1, Right& R2 )
 template< typename Left, typename Right, std::enable_if_t< ! std::is_const<typename Left::RealType>::value, bool > = true >
 void test_divide_assignment( Left& L1, Left& L2, Right& R1, Right& R2 )
 {
+   using RightReal = std::remove_const_t< typename Right::RealType >;
    // with vector or vector view
    L2 /= R2;
    EXPECT_EQ( L1, R1 );
@@ -539,6 +601,9 @@ void test_divide_assignment( Left& L1, Left& L2, Right& R1, Right& R2 )
    L2 = 2;
    L2 /= 2;
    EXPECT_EQ( L1, 1 );
+   L1 = 2;
+   L1 /= RightReal(2);
+   EXPECT_EQ( L1, 1 );
    // with expression
    L2 = 2;
    L2 /= R1 + R1;
@@ -602,6 +667,11 @@ TYPED_TEST( VectorBinaryOperationsTest, min )
    EXPECT_EQ( TNL::min(L1 + L1, R1), R1 );
    // with two expressions
    EXPECT_EQ( TNL::min(L1 + L1, R1 + R2), L2 );
+   // with expression and scalar
+   EXPECT_EQ( TNL::min(L1 + L1, 1), L1 );
+   EXPECT_EQ( TNL::min(L1 + L1, RightReal(1)), L1 );
+   EXPECT_EQ( TNL::min(1, R1 + R1), L1 );
+   EXPECT_EQ( TNL::min(LeftReal(1), R1 + R1), L1 );
 }
 
 TYPED_TEST( VectorBinaryOperationsTest, max )
@@ -620,6 +690,11 @@ TYPED_TEST( VectorBinaryOperationsTest, max )
    EXPECT_EQ( TNL::max(L1 + L1, R1), R2 );
    // with two expressions
    EXPECT_EQ( TNL::max(L1 - L1, R1 + R1), L2 );
+   // with expression and scalar
+   EXPECT_EQ( TNL::max(L1 + L1, 1), L2 );
+   EXPECT_EQ( TNL::max(L1 + L1, RightReal(1)), L2 );
+   EXPECT_EQ( TNL::max(1, R1 + R1), L2 );
+   EXPECT_EQ( TNL::max(LeftReal(1), R1 + R1), L2 );
 }
 
 #if defined(HAVE_CUDA) && !defined(STATIC_VECTOR)
diff --git a/src/UnitTests/Containers/VectorOfStaticVectorsTest.cpp b/src/UnitTests/Containers/VectorOfStaticVectorsTest.cpp
index 859cf045c..1f5781818 100644
--- a/src/UnitTests/Containers/VectorOfStaticVectorsTest.cpp
+++ b/src/UnitTests/Containers/VectorOfStaticVectorsTest.cpp
@@ -1,5 +1 @@
-#define VECTOR_OF_STATIC_VECTORS
-#include "VectorBinaryOperationsTest.h"
-#include "VectorUnaryOperationsTest.h"
-#include "VectorVerticalOperationsTest.h"
-#include "../main.h"
+#include "VectorOfStaticVectorsTest.h"
diff --git a/src/UnitTests/Containers/VectorOfStaticVectorsTest.cu b/src/UnitTests/Containers/VectorOfStaticVectorsTest.cu
index 859cf045c..1f5781818 100644
--- a/src/UnitTests/Containers/VectorOfStaticVectorsTest.cu
+++ b/src/UnitTests/Containers/VectorOfStaticVectorsTest.cu
@@ -1,5 +1 @@
-#define VECTOR_OF_STATIC_VECTORS
-#include "VectorBinaryOperationsTest.h"
-#include "VectorUnaryOperationsTest.h"
-#include "VectorVerticalOperationsTest.h"
-#include "../main.h"
+#include "VectorOfStaticVectorsTest.h"
diff --git a/src/UnitTests/Containers/VectorOfStaticVectorsTest.h b/src/UnitTests/Containers/VectorOfStaticVectorsTest.h
new file mode 100644
index 000000000..859cf045c
--- /dev/null
+++ b/src/UnitTests/Containers/VectorOfStaticVectorsTest.h
@@ -0,0 +1,5 @@
+#define VECTOR_OF_STATIC_VECTORS
+#include "VectorBinaryOperationsTest.h"
+#include "VectorUnaryOperationsTest.h"
+#include "VectorVerticalOperationsTest.h"
+#include "../main.h"
-- 
GitLab


From ac818bfa153661726a147ba897a2795da3e4861b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Thu, 9 Jul 2020 22:10:31 +0200
Subject: [PATCH 11/15] Added static assert to the function
 Cuda::getSharedMemory

---
 src/TNL/Cuda/SharedMemory.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/TNL/Cuda/SharedMemory.h b/src/TNL/Cuda/SharedMemory.h
index 29851952c..915bbbf9b 100644
--- a/src/TNL/Cuda/SharedMemory.h
+++ b/src/TNL/Cuda/SharedMemory.h
@@ -116,6 +116,8 @@ struct SharedMemory< T, 64 >
 template< typename T >
 __device__ inline T* getSharedMemory()
 {
+   static_assert( sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4 || sizeof(T) == 8,
+                  "Requested type has unsupported size." );
    return SharedMemory< T >{};
 }
 
-- 
GitLab


From d13fa1f1764c5591bde9b493395111579260be62 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Fri, 10 Jul 2020 00:07:41 +0200
Subject: [PATCH 12/15] Changed CudaReductionKernel to use static instead of
 dynamic shared memory

Fixes #78
---
 src/TNL/Algorithms/CudaReductionKernel.h | 73 ++++++++++++------------
 1 file changed, 35 insertions(+), 38 deletions(-)

diff --git a/src/TNL/Algorithms/CudaReductionKernel.h b/src/TNL/Algorithms/CudaReductionKernel.h
index e495e4375..5e7af9731 100644
--- a/src/TNL/Algorithms/CudaReductionKernel.h
+++ b/src/TNL/Algorithms/CudaReductionKernel.h
@@ -15,7 +15,6 @@
 #include <TNL/Assert.h>
 #include <TNL/Math.h>
 #include <TNL/Cuda/DeviceInfo.h>
-#include <TNL/Cuda/SharedMemory.h>
 #include <TNL/Algorithms/CudaReductionBuffer.h>
 #include <TNL/Algorithms/MultiDeviceMemoryOperations.h>
 #include <TNL/Exceptions/CudaSupportMissing.h>
@@ -54,11 +53,15 @@ CudaReductionKernel( const Result zero,
                      const Index end,
                      Result* output )
 {
-   Result* sdata = Cuda::getSharedMemory< Result >();
+   TNL_ASSERT_EQ( blockDim.x, blockSize, "unexpected block size in CudaReductionKernel" );
+   // when there is only one warp per blockSize.x, we need to allocate two warps
+   // worth of shared memory so that we don't index shared memory out of bounds
+   constexpr int shmemElements = (blockSize <= 32) ? 2 * blockSize : blockSize;
+   __shared__ Result sdata[shmemElements];
 
    // Get the thread id (tid), global thread id (gid) and gridSize.
    const Index tid = threadIdx.x;
-         Index gid = begin + blockIdx.x * blockDim. x + threadIdx.x;
+         Index gid = begin + blockIdx.x * blockDim.x + threadIdx.x;
    const Index gridSize = blockDim.x * gridDim.x;
 
    sdata[ tid ] = zero;
@@ -150,12 +153,16 @@ CudaReductionWithArgumentKernel( const Result zero,
                                  Index* idxOutput,
                                  const Index* idxInput = nullptr )
 {
-   Result* sdata = Cuda::getSharedMemory< Result >();
-   Index* sidx = reinterpret_cast< Index* >( &sdata[ blockDim.x ] );
+   TNL_ASSERT_EQ( blockDim.x, blockSize, "unexpected block size in CudaReductionKernel" );
+   // when there is only one warp per blockSize.x, we need to allocate two warps
+   // worth of shared memory so that we don't index shared memory out of bounds
+   constexpr int shmemElements = (blockSize <= 32) ? 2 * blockSize : blockSize;
+   __shared__ Result sdata[shmemElements];
+   __shared__ Index sidx[shmemElements];
 
    // Get the thread id (tid), global thread id (gid) and gridSize.
    const Index tid = threadIdx.x;
-         Index gid = begin + blockIdx.x * blockDim. x + threadIdx.x;
+         Index gid = begin + blockIdx.x * blockDim.x + threadIdx.x;
    const Index gridSize = blockDim.x * gridDim.x;
 
    // Start with the sequential reduction and push the result into the shared memory.
@@ -409,12 +416,6 @@ struct CudaReductionKernelLauncher
          blockSize.x = Reduction_maxThreadsPerBlock;
          gridSize.x = TNL::min( Cuda::getNumberOfBlocks( size, blockSize.x ), desGridSize );
 
-         // when there is only one warp per blockSize.x, we need to allocate two warps
-         // worth of shared memory so that we don't index shared memory out of bounds
-         const Index shmem = (blockSize.x <= 32)
-                  ? 2 * blockSize.x * sizeof( Result )
-                  : blockSize.x * sizeof( Result );
-
          // This is "general", but this method always sets blockSize.x to a specific value,
          // so runtime switch is not necessary - it only prolongs the compilation time.
 /*
@@ -423,55 +424,55 @@ struct CudaReductionKernelLauncher
          {
             case 512:
                CudaReductionKernel< 512 >
-               <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, size, output);
+               <<< gridSize, blockSize >>>( zero, dataFetcher, reduction, size, output);
                break;
             case 256:
                cudaFuncSetCacheConfig(CudaReductionKernel< 256, Result, DataFetcher, Reduction, Index >, cudaFuncCachePreferShared);
 
                CudaReductionKernel< 256 >
-               <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, size, output);
+               <<< gridSize, blockSize >>>( zero, dataFetcher, reduction, size, output);
                break;
             case 128:
                cudaFuncSetCacheConfig(CudaReductionKernel< 128, Result, DataFetcher, Reduction, Index >, cudaFuncCachePreferShared);
 
                CudaReductionKernel< 128 >
-               <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, size, output);
+               <<< gridSize, blockSize >>>( zero, dataFetcher, reduction, size, output);
                break;
             case  64:
                cudaFuncSetCacheConfig(CudaReductionKernel<  64, Result, DataFetcher, Reduction, Index >, cudaFuncCachePreferShared);
 
                CudaReductionKernel<  64 >
-               <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, size, output);
+               <<< gridSize, blockSize >>>( zero, dataFetcher, reduction, size, output);
                break;
             case  32:
                cudaFuncSetCacheConfig(CudaReductionKernel<  32, Result, DataFetcher, Reduction, Index >, cudaFuncCachePreferShared);
 
                CudaReductionKernel<  32 >
-               <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, size, output);
+               <<< gridSize, blockSize >>>( zero, dataFetcher, reduction, size, output);
                break;
             case  16:
                cudaFuncSetCacheConfig(CudaReductionKernel<  16, Result, DataFetcher, Reduction, Index >, cudaFuncCachePreferShared);
 
                CudaReductionKernel<  16 >
-               <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, size, output);
+               <<< gridSize, blockSize >>>( zero, dataFetcher, reduction, size, output);
                break;
            case   8:
                cudaFuncSetCacheConfig(CudaReductionKernel<   8, Result, DataFetcher, Reduction, Index >, cudaFuncCachePreferShared);
 
                CudaReductionKernel<   8 >
-               <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, size, output);
+               <<< gridSize, blockSize >>>( zero, dataFetcher, reduction, size, output);
                break;
             case   4:
                cudaFuncSetCacheConfig(CudaReductionKernel<   4, Result, DataFetcher, Reduction, Index >, cudaFuncCachePreferShared);
 
                CudaReductionKernel<   4 >
-               <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, size, output);
+               <<< gridSize, blockSize >>>( zero, dataFetcher, reduction, size, output);
                break;
             case   2:
                cudaFuncSetCacheConfig(CudaReductionKernel<   2, Result, DataFetcher, Reduction, Index >, cudaFuncCachePreferShared);
 
                CudaReductionKernel<   2 >
-               <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, size, output);
+               <<< gridSize, blockSize >>>( zero, dataFetcher, reduction, size, output);
                break;
             case   1:
                TNL_ASSERT( false, std::cerr << "blockSize should not be 1." << std::endl );
@@ -486,8 +487,9 @@ struct CudaReductionKernelLauncher
          if( blockSize.x == Reduction_maxThreadsPerBlock ) {
             cudaFuncSetCacheConfig(CudaReductionKernel< Reduction_maxThreadsPerBlock, Result, DataFetcher, Reduction, Index >, cudaFuncCachePreferShared);
 
+            // shared memory is allocated statically inside the kernel
             CudaReductionKernel< Reduction_maxThreadsPerBlock >
-            <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, begin, end, output);
+            <<< gridSize, blockSize >>>( zero, dataFetcher, reduction, begin, end, output);
             cudaStreamSynchronize(0);
             TNL_CHECK_CUDA_DEVICE;
          }
@@ -519,12 +521,6 @@ struct CudaReductionKernelLauncher
          blockSize.x = Reduction_maxThreadsPerBlock;
          gridSize.x = TNL::min( Cuda::getNumberOfBlocks( size, blockSize.x ), desGridSize );
 
-         // when there is only one warp per blockSize.x, we need to allocate two warps
-         // worth of shared memory so that we don't index shared memory out of bounds
-         const Index shmem = (blockSize.x <= 32)
-                  ? 2 * blockSize.x * ( sizeof( Result ) + sizeof( Index ) )
-                  : blockSize.x * ( sizeof( Result ) + sizeof( Index ) );
-
          // This is "general", but this method always sets blockSize.x to a specific value,
          // so runtime switch is not necessary - it only prolongs the compilation time.
 /*
@@ -533,55 +529,55 @@ struct CudaReductionKernelLauncher
          {
             case 512:
                CudaReductionWithArgumentKernel< 512 >
-               <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, size, output, idxOutput, idxInput );
+               <<< gridSize, blockSize >>>( zero, dataFetcher, reduction, size, output, idxOutput, idxInput );
                break;
             case 256:
                cudaFuncSetCacheConfig(CudaReductionWithArgumentKernel< 256, Result, DataFetcher, Reduction, Index >, cudaFuncCachePreferShared);
 
                CudaReductionWithArgumentKernel< 256 >
-               <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, size, output, idxOutput, idxInput );
+               <<< gridSize, blockSize >>>( zero, dataFetcher, reduction, size, output, idxOutput, idxInput );
                break;
             case 128:
                cudaFuncSetCacheConfig(CudaReductionWithArgumentKernel< 128, Result, DataFetcher, Reduction, Index >, cudaFuncCachePreferShared);
 
                CudaReductionWithArgumentKernel< 128 >
-               <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, size, output, idxOutput, idxInput );
+               <<< gridSize, blockSize >>>( zero, dataFetcher, reduction, size, output, idxOutput, idxInput );
                break;
             case  64:
                cudaFuncSetCacheConfig(CudaReductionWithArgumentKernel<  64, Result, DataFetcher, Reduction, Index >, cudaFuncCachePreferShared);
 
                CudaReductionWithArgumentKernel<  64 >
-               <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, size, output, idxOutput, idxInput );
+               <<< gridSize, blockSize >>>( zero, dataFetcher, reduction, size, output, idxOutput, idxInput );
                break;
             case  32:
                cudaFuncSetCacheConfig(CudaReductionWithArgumentKernel<  32, Result, DataFetcher, Reduction, Index >, cudaFuncCachePreferShared);
 
                CudaReductionWithArgumentKernel<  32 >
-               <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, size, output, idxOutput, idxInput );
+               <<< gridSize, blockSize >>>( zero, dataFetcher, reduction, size, output, idxOutput, idxInput );
                break;
             case  16:
                cudaFuncSetCacheConfig(CudaReductionWithArgumentKernel<  16, Result, DataFetcher, Reduction, Index >, cudaFuncCachePreferShared);
 
                CudaReductionWithArgumentKernel<  16 >
-               <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, size, output, idxOutput, idxInput );
+               <<< gridSize, blockSize >>>( zero, dataFetcher, reduction, size, output, idxOutput, idxInput );
                break;
            case   8:
                cudaFuncSetCacheConfig(CudaReductionWithArgumentKernel<   8, Result, DataFetcher, Reduction, Index >, cudaFuncCachePreferShared);
 
                CudaReductionWithArgumentKernel<   8 >
-               <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, size, output, idxOutput, idxInput );
+               <<< gridSize, blockSize >>>( zero, dataFetcher, reduction, size, output, idxOutput, idxInput );
                break;
             case   4:
                cudaFuncSetCacheConfig(CudaReductionWithArgumentKernel<   4, Result, DataFetcher, Reduction, Index >, cudaFuncCachePreferShared);
 
                CudaReductionWithArgumentKernel<   4 >
-               <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, size, output, idxOutput, idxInput );
+               <<< gridSize, blockSize >>>( zero, dataFetcher, reduction, size, output, idxOutput, idxInput );
                break;
             case   2:
                cudaFuncSetCacheConfig(CudaReductionWithArgumentKernel<   2, Result, DataFetcher, Reduction, Index >, cudaFuncCachePreferShared);
 
                CudaReductionWithArgumentKernel<   2 >
-               <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, size, output, idxOutput, idxInput );
+               <<< gridSize, blockSize >>>( zero, dataFetcher, reduction, size, output, idxOutput, idxInput );
                break;
             case   1:
                TNL_ASSERT( false, std::cerr << "blockSize should not be 1." << std::endl );
@@ -596,8 +592,9 @@ struct CudaReductionKernelLauncher
          if( blockSize.x == Reduction_maxThreadsPerBlock ) {
             cudaFuncSetCacheConfig(CudaReductionWithArgumentKernel< Reduction_maxThreadsPerBlock, Result, DataFetcher, Reduction, Index >, cudaFuncCachePreferShared);
 
+            // shared memory is allocated statically inside the kernel
             CudaReductionWithArgumentKernel< Reduction_maxThreadsPerBlock >
-            <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, begin, end, output, idxOutput, idxInput );
+            <<< gridSize, blockSize >>>( zero, dataFetcher, reduction, begin, end, output, idxOutput, idxInput );
             cudaStreamSynchronize(0);
             TNL_CHECK_CUDA_DEVICE;
          }
-- 
GitLab


From e741aa95e781838e2bc659ad113cc033a2afa5f9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Fri, 10 Jul 2020 13:13:34 +0200
Subject: [PATCH 13/15] Added CudaReductionFunctorWrapper to avoid nvcc's
 fucked up error

---
 src/TNL/Algorithms/CudaReductionKernel.h | 53 ++++++++++++++++--------
 1 file changed, 36 insertions(+), 17 deletions(-)

diff --git a/src/TNL/Algorithms/CudaReductionKernel.h b/src/TNL/Algorithms/CudaReductionKernel.h
index 5e7af9731..4519678bd 100644
--- a/src/TNL/Algorithms/CudaReductionKernel.h
+++ b/src/TNL/Algorithms/CudaReductionKernel.h
@@ -39,6 +39,25 @@ static constexpr int Reduction_registersPerThread = 32;   // empirically determi
    static constexpr int Reduction_minBlocksPerMultiprocessor = 8;
 #endif
 
+/*
+ * nvcc (as of 10.2) is totally fucked up, in some cases it does not recognize the
+ * std::plus<void>::operator() function to be constexpr and hence __host__ __device__
+ * (for example, when the arguments are StaticVector<3, double> etc). Hence, we use
+ * this wrapper which triggers only a warning and not an error as is the case when
+ * the reduction functor is called from a __global__ or __device__ function. Let's
+ * hope it works otherwise...
+ */
+template< typename Reduction, typename Arg1, typename Arg2 >
+__host__ __device__
+auto CudaReductionFunctorWrapper( Reduction&& reduction, Arg1&& arg1, Arg2&& arg2 )
+{
+// let's suppress the aforementioned warning...
+#pragma push
+#pragma diag_suppress 2979
+   return std::forward<Reduction>(reduction)( std::forward<Arg1>(arg1), std::forward<Arg2>(arg2) );
+#pragma pop
+}
+
 template< int blockSize,
           typename Result,
           typename DataFetcher,
@@ -68,19 +87,19 @@ CudaReductionKernel( const Result zero,
 
    // Start with the sequential reduction and push the result into the shared memory.
    while( gid + 4 * gridSize < end ) {
-      sdata[ tid ] = reduction( sdata[ tid ], dataFetcher( gid ) );
-      sdata[ tid ] = reduction( sdata[ tid ], dataFetcher( gid + gridSize ) );
-      sdata[ tid ] = reduction( sdata[ tid ], dataFetcher( gid + 2 * gridSize ) );
-      sdata[ tid ] = reduction( sdata[ tid ], dataFetcher( gid + 3 * gridSize ) );
+      sdata[ tid ] = CudaReductionFunctorWrapper( reduction, sdata[ tid ], dataFetcher( gid ) );
+      sdata[ tid ] = CudaReductionFunctorWrapper( reduction, sdata[ tid ], dataFetcher( gid + gridSize ) );
+      sdata[ tid ] = CudaReductionFunctorWrapper( reduction, sdata[ tid ], dataFetcher( gid + 2 * gridSize ) );
+      sdata[ tid ] = CudaReductionFunctorWrapper( reduction, sdata[ tid ], dataFetcher( gid + 3 * gridSize ) );
       gid += 4 * gridSize;
    }
    while( gid + 2 * gridSize < end ) {
-      sdata[ tid ] = reduction( sdata[ tid ], dataFetcher( gid ) );
-      sdata[ tid ] = reduction( sdata[ tid ], dataFetcher( gid + gridSize ) );
+      sdata[ tid ] = CudaReductionFunctorWrapper( reduction, sdata[ tid ], dataFetcher( gid ) );
+      sdata[ tid ] = CudaReductionFunctorWrapper( reduction, sdata[ tid ], dataFetcher( gid + gridSize ) );
       gid += 2 * gridSize;
    }
    while( gid < end ) {
-      sdata[ tid ] = reduction( sdata[ tid ], dataFetcher( gid ) );
+      sdata[ tid ] = CudaReductionFunctorWrapper( reduction, sdata[ tid ], dataFetcher( gid ) );
       gid += gridSize;
    }
    __syncthreads();
@@ -88,48 +107,48 @@ CudaReductionKernel( const Result zero,
    // Perform the parallel reduction.
    if( blockSize >= 1024 ) {
       if( tid < 512 )
-         sdata[ tid ] = reduction( sdata[ tid ], sdata[ tid + 512 ] );
+         sdata[ tid ] = CudaReductionFunctorWrapper( reduction, sdata[ tid ], sdata[ tid + 512 ] );
       __syncthreads();
    }
    if( blockSize >= 512 ) {
       if( tid < 256 )
-         sdata[ tid ] = reduction( sdata[ tid ], sdata[ tid + 256 ] );
+         sdata[ tid ] = CudaReductionFunctorWrapper( reduction, sdata[ tid ], sdata[ tid + 256 ] );
       __syncthreads();
    }
    if( blockSize >= 256 ) {
       if( tid < 128 )
-         sdata[ tid ] = reduction( sdata[ tid ], sdata[ tid + 128 ] );
+         sdata[ tid ] = CudaReductionFunctorWrapper( reduction, sdata[ tid ], sdata[ tid + 128 ] );
       __syncthreads();
    }
    if( blockSize >= 128 ) {
       if( tid <  64 )
-         sdata[ tid ] = reduction( sdata[ tid ], sdata[ tid + 64 ] );
+         sdata[ tid ] = CudaReductionFunctorWrapper( reduction, sdata[ tid ], sdata[ tid + 64 ] );
       __syncthreads();
    }
 
    // This runs in one warp so we use __syncwarp() instead of __syncthreads().
    if( tid < 32 ) {
       if( blockSize >= 64 )
-         sdata[ tid ] = reduction( sdata[ tid ], sdata[ tid + 32 ] );
+         sdata[ tid ] = CudaReductionFunctorWrapper( reduction, sdata[ tid ], sdata[ tid + 32 ] );
       __syncwarp();
       // Note that here we do not have to check if tid < 16 etc, because we have
       // 2 * blockSize.x elements of shared memory per block, so we do not
       // access out of bounds. The results for the upper half will be undefined,
       // but unused anyway.
       if( blockSize >= 32 )
-         sdata[ tid ] = reduction( sdata[ tid ], sdata[ tid + 16 ] );
+         sdata[ tid ] = CudaReductionFunctorWrapper( reduction, sdata[ tid ], sdata[ tid + 16 ] );
       __syncwarp();
       if( blockSize >= 16 )
-         sdata[ tid ] = reduction( sdata[ tid ], sdata[ tid + 8 ] );
+         sdata[ tid ] = CudaReductionFunctorWrapper( reduction, sdata[ tid ], sdata[ tid + 8 ] );
       __syncwarp();
       if( blockSize >=  8 )
-         sdata[ tid ] = reduction( sdata[ tid ], sdata[ tid + 4 ] );
+         sdata[ tid ] = CudaReductionFunctorWrapper( reduction, sdata[ tid ], sdata[ tid + 4 ] );
       __syncwarp();
       if( blockSize >=  4 )
-         sdata[ tid ] = reduction( sdata[ tid ], sdata[ tid + 2 ] );
+         sdata[ tid ] = CudaReductionFunctorWrapper( reduction, sdata[ tid ], sdata[ tid + 2 ] );
       __syncwarp();
       if( blockSize >=  2 )
-         sdata[ tid ] = reduction( sdata[ tid ], sdata[ tid + 1 ] );
+         sdata[ tid ] = CudaReductionFunctorWrapper( reduction, sdata[ tid ], sdata[ tid + 1 ] );
    }
 
    // Store the result back in the global memory.
-- 
GitLab


From 8606d15edb7f8599faba0c20e78705c2509c2583 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Fri, 10 Jul 2020 14:08:37 +0200
Subject: [PATCH 14/15] Fixed VectorAssignmentWithOperation and changed types
 for VectorBinaryOperationsTest to cover operands with different types

---
 src/TNL/Containers/detail/VectorAssignment.h  |   8 +-
 .../Containers/VectorBinaryOperationsTest.h   | 102 +++++++++---------
 2 files changed, 55 insertions(+), 55 deletions(-)

diff --git a/src/TNL/Containers/detail/VectorAssignment.h b/src/TNL/Containers/detail/VectorAssignment.h
index 5f273d5f0..5a36d971c 100644
--- a/src/TNL/Containers/detail/VectorAssignment.h
+++ b/src/TNL/Containers/detail/VectorAssignment.h
@@ -120,22 +120,22 @@ struct VectorAssignmentWithOperation< Vector, T, true, true >
 {
    static void addition( Vector& v, const T& t )
    {
-      VectorAssignmentWithOperation< Vector, typename Vector::ConstViewType >::addition( v, t.getConstView() );
+      VectorAssignmentWithOperation< Vector, typename T::ConstViewType >::addition( v, t.getConstView() );
    }
 
    static void subtraction( Vector& v, const T& t )
    {
-      VectorAssignmentWithOperation< Vector, typename Vector::ConstViewType >::subtraction( v, t.getConstView() );
+      VectorAssignmentWithOperation< Vector, typename T::ConstViewType >::subtraction( v, t.getConstView() );
    }
 
    static void multiplication( Vector& v, const T& t )
    {
-      VectorAssignmentWithOperation< Vector, typename Vector::ConstViewType >::multiplication( v, t.getConstView() );
+      VectorAssignmentWithOperation< Vector, typename T::ConstViewType >::multiplication( v, t.getConstView() );
    }
 
    static void division( Vector& v, const T& t )
    {
-      VectorAssignmentWithOperation< Vector, typename Vector::ConstViewType >::subtraction( v, t.getConstView() );
+      VectorAssignmentWithOperation< Vector, typename T::ConstViewType >::subtraction( v, t.getConstView() );
    }
 };
 
diff --git a/src/UnitTests/Containers/VectorBinaryOperationsTest.h b/src/UnitTests/Containers/VectorBinaryOperationsTest.h
index 27c4c46e9..e438d649f 100644
--- a/src/UnitTests/Containers/VectorBinaryOperationsTest.h
+++ b/src/UnitTests/Containers/VectorBinaryOperationsTest.h
@@ -147,62 +147,62 @@ protected:
 #if defined(DISTRIBUTED_VECTOR)
    using VectorPairs = ::testing::Types<
    #ifndef HAVE_CUDA
-      Pair< DistributedVector<     double, Devices::Host, int, Communicators::MpiCommunicator >,
-            DistributedVector<     double, Devices::Host, int, Communicators::MpiCommunicator > >,
-      Pair< DistributedVector<     double, Devices::Host, int, Communicators::MpiCommunicator >,
-            DistributedVectorView< double, Devices::Host, int, Communicators::MpiCommunicator > >,
-      Pair< DistributedVectorView< double, Devices::Host, int, Communicators::MpiCommunicator >,
-            DistributedVector<     double, Devices::Host, int, Communicators::MpiCommunicator > >,
-      Pair< DistributedVectorView< double, Devices::Host, int, Communicators::MpiCommunicator >,
-            DistributedVectorView< double, Devices::Host, int, Communicators::MpiCommunicator > >,
-
-      Pair< DistributedVector<     double, Devices::Host, int, Communicators::NoDistrCommunicator >,
-            DistributedVector<     double, Devices::Host, int, Communicators::NoDistrCommunicator > >,
-      Pair< DistributedVector<     double, Devices::Host, int, Communicators::NoDistrCommunicator >,
-            DistributedVectorView< double, Devices::Host, int, Communicators::NoDistrCommunicator > >,
-      Pair< DistributedVectorView< double, Devices::Host, int, Communicators::NoDistrCommunicator >,
-            DistributedVector<     double, Devices::Host, int, Communicators::NoDistrCommunicator > >,
-      Pair< DistributedVectorView< double, Devices::Host, int, Communicators::NoDistrCommunicator >,
-            DistributedVectorView< double, Devices::Host, int, Communicators::NoDistrCommunicator > >
+      Pair< DistributedVector<     int,   Devices::Host, int, Communicators::MpiCommunicator >,
+            DistributedVector<     short, Devices::Host, int, Communicators::MpiCommunicator > >,
+      Pair< DistributedVector<     int,   Devices::Host, int, Communicators::MpiCommunicator >,
+            DistributedVectorView< short, Devices::Host, int, Communicators::MpiCommunicator > >,
+      Pair< DistributedVectorView< int,   Devices::Host, int, Communicators::MpiCommunicator >,
+            DistributedVector<     short, Devices::Host, int, Communicators::MpiCommunicator > >,
+      Pair< DistributedVectorView< int,   Devices::Host, int, Communicators::MpiCommunicator >,
+            DistributedVectorView< short, Devices::Host, int, Communicators::MpiCommunicator > >,
+
+      Pair< DistributedVector<     int,   Devices::Host, int, Communicators::NoDistrCommunicator >,
+            DistributedVector<     short, Devices::Host, int, Communicators::NoDistrCommunicator > >,
+      Pair< DistributedVector<     int,   Devices::Host, int, Communicators::NoDistrCommunicator >,
+            DistributedVectorView< short, Devices::Host, int, Communicators::NoDistrCommunicator > >,
+      Pair< DistributedVectorView< int,   Devices::Host, int, Communicators::NoDistrCommunicator >,
+            DistributedVector<     short, Devices::Host, int, Communicators::NoDistrCommunicator > >,
+      Pair< DistributedVectorView< int,   Devices::Host, int, Communicators::NoDistrCommunicator >,
+            DistributedVectorView< short, Devices::Host, int, Communicators::NoDistrCommunicator > >
    #else
-      Pair< DistributedVector<     double, Devices::Cuda, int, Communicators::MpiCommunicator >,
-            DistributedVector<     double, Devices::Cuda, int, Communicators::MpiCommunicator > >,
-      Pair< DistributedVector<     double, Devices::Cuda, int, Communicators::MpiCommunicator >,
-            DistributedVectorView< double, Devices::Cuda, int, Communicators::MpiCommunicator > >,
-      Pair< DistributedVectorView< double, Devices::Cuda, int, Communicators::MpiCommunicator >,
-            DistributedVector<     double, Devices::Cuda, int, Communicators::MpiCommunicator > >,
-      Pair< DistributedVectorView< double, Devices::Cuda, int, Communicators::MpiCommunicator >,
-            DistributedVectorView< double, Devices::Cuda, int, Communicators::MpiCommunicator > >,
-      Pair< DistributedVector<     double, Devices::Cuda, int, Communicators::NoDistrCommunicator >,
-            DistributedVector<     double, Devices::Cuda, int, Communicators::NoDistrCommunicator > >,
-      Pair< DistributedVector<     double, Devices::Cuda, int, Communicators::NoDistrCommunicator >,
-            DistributedVectorView< double, Devices::Cuda, int, Communicators::NoDistrCommunicator > >,
-      Pair< DistributedVectorView< double, Devices::Cuda, int, Communicators::NoDistrCommunicator >,
-            DistributedVector<     double, Devices::Cuda, int, Communicators::NoDistrCommunicator > >,
-      Pair< DistributedVectorView< double, Devices::Cuda, int, Communicators::NoDistrCommunicator >,
-            DistributedVectorView< double, Devices::Cuda, int, Communicators::NoDistrCommunicator > >
+      Pair< DistributedVector<     int,   Devices::Cuda, int, Communicators::MpiCommunicator >,
+            DistributedVector<     short, Devices::Cuda, int, Communicators::MpiCommunicator > >,
+      Pair< DistributedVector<     int,   Devices::Cuda, int, Communicators::MpiCommunicator >,
+            DistributedVectorView< short, Devices::Cuda, int, Communicators::MpiCommunicator > >,
+      Pair< DistributedVectorView< int,   Devices::Cuda, int, Communicators::MpiCommunicator >,
+            DistributedVector<     short, Devices::Cuda, int, Communicators::MpiCommunicator > >,
+      Pair< DistributedVectorView< int,   Devices::Cuda, int, Communicators::MpiCommunicator >,
+            DistributedVectorView< short, Devices::Cuda, int, Communicators::MpiCommunicator > >,
+      Pair< DistributedVector<     int,   Devices::Cuda, int, Communicators::NoDistrCommunicator >,
+            DistributedVector<     short, Devices::Cuda, int, Communicators::NoDistrCommunicator > >,
+      Pair< DistributedVector<     int,   Devices::Cuda, int, Communicators::NoDistrCommunicator >,
+            DistributedVectorView< short, Devices::Cuda, int, Communicators::NoDistrCommunicator > >,
+      Pair< DistributedVectorView< int,   Devices::Cuda, int, Communicators::NoDistrCommunicator >,
+            DistributedVector<     short, Devices::Cuda, int, Communicators::NoDistrCommunicator > >,
+      Pair< DistributedVectorView< int,   Devices::Cuda, int, Communicators::NoDistrCommunicator >,
+            DistributedVectorView< short, Devices::Cuda, int, Communicators::NoDistrCommunicator > >
    #endif
    >;
 #elif defined(STATIC_VECTOR)
    #ifdef VECTOR_OF_STATIC_VECTORS
       using VectorPairs = ::testing::Types<
-         Pair< StaticVector< 1, StaticVector< 3, double > >,  StaticVector< 1, StaticVector< 3, double > > >,
-         Pair< StaticVector< 2, StaticVector< 3, double > >,  StaticVector< 2, StaticVector< 3, double > > >,
-         Pair< StaticVector< 3, StaticVector< 3, double > >,  StaticVector< 3, StaticVector< 3, double > > >,
-         Pair< StaticVector< 4, StaticVector< 3, double > >,  StaticVector< 4, StaticVector< 3, double > > >,
-         Pair< StaticVector< 5, StaticVector< 3, double > >,  StaticVector< 5, StaticVector< 3, double > > >
+         Pair< StaticVector< 1, StaticVector< 3, int > >,  StaticVector< 1, StaticVector< 3, short > > >,
+         Pair< StaticVector< 2, StaticVector< 3, int > >,  StaticVector< 2, StaticVector< 3, short > > >,
+         Pair< StaticVector< 3, StaticVector< 3, int > >,  StaticVector< 3, StaticVector< 3, short > > >,
+         Pair< StaticVector< 4, StaticVector< 3, int > >,  StaticVector< 4, StaticVector< 3, short > > >,
+         Pair< StaticVector< 5, StaticVector< 3, int > >,  StaticVector< 5, StaticVector< 3, short > > >
       >;
    #else
       using VectorPairs = ::testing::Types<
-         Pair< StaticVector< 1, int >,     StaticVector< 1, int >    >,
+         Pair< StaticVector< 1, int >,     StaticVector< 1, short >    >,
          Pair< StaticVector< 1, double >,  StaticVector< 1, double > >,
-         Pair< StaticVector< 2, int >,     StaticVector< 2, int >    >,
+         Pair< StaticVector< 2, int >,     StaticVector< 2, short >    >,
          Pair< StaticVector< 2, double >,  StaticVector< 2, double > >,
-         Pair< StaticVector< 3, int >,     StaticVector< 3, int >    >,
+         Pair< StaticVector< 3, int >,     StaticVector< 3, short >    >,
          Pair< StaticVector< 3, double >,  StaticVector< 3, double > >,
-         Pair< StaticVector< 4, int >,     StaticVector< 4, int >    >,
+         Pair< StaticVector< 4, int >,     StaticVector< 4, short >    >,
          Pair< StaticVector< 4, double >,  StaticVector< 4, double > >,
-         Pair< StaticVector< 5, int >,     StaticVector< 5, int >    >,
+         Pair< StaticVector< 5, int >,     StaticVector< 5, short >    >,
          Pair< StaticVector< 5, double >,  StaticVector< 5, double > >
       >;
    #endif
@@ -210,15 +210,15 @@ protected:
    #ifdef VECTOR_OF_STATIC_VECTORS
       using VectorPairs = ::testing::Types<
       #ifndef HAVE_CUDA
-         Pair< Vector<     StaticVector< 3, double >, Devices::Host >, Vector<     StaticVector< 3, double >, Devices::Host > >,
-         Pair< VectorView< StaticVector< 3, double >, Devices::Host >, Vector<     StaticVector< 3, double >, Devices::Host > >,
-         Pair< Vector<     StaticVector< 3, double >, Devices::Host >, VectorView< StaticVector< 3, double >, Devices::Host > >,
-         Pair< VectorView< StaticVector< 3, double >, Devices::Host >, VectorView< StaticVector< 3, double >, Devices::Host > >
+         Pair< Vector<     StaticVector< 3, int >, Devices::Host >, Vector<     StaticVector< 3, short >, Devices::Host > >,
+         Pair< VectorView< StaticVector< 3, int >, Devices::Host >, Vector<     StaticVector< 3, short >, Devices::Host > >,
+         Pair< Vector<     StaticVector< 3, int >, Devices::Host >, VectorView< StaticVector< 3, short >, Devices::Host > >,
+         Pair< VectorView< StaticVector< 3, int >, Devices::Host >, VectorView< StaticVector< 3, short >, Devices::Host > >
       #else
-         Pair< Vector<     StaticVector< 3, double >, Devices::Cuda >, Vector<     StaticVector< 3, double >, Devices::Cuda > >,
-         Pair< VectorView< StaticVector< 3, double >, Devices::Cuda >, Vector<     StaticVector< 3, double >, Devices::Cuda > >,
-         Pair< Vector<     StaticVector< 3, double >, Devices::Cuda >, VectorView< StaticVector< 3, double >, Devices::Cuda > >,
-         Pair< VectorView< StaticVector< 3, double >, Devices::Cuda >, VectorView< StaticVector< 3, double >, Devices::Cuda > >
+         Pair< Vector<     StaticVector< 3, int >, Devices::Cuda >, Vector<     StaticVector< 3, short >, Devices::Cuda > >,
+         Pair< VectorView< StaticVector< 3, int >, Devices::Cuda >, Vector<     StaticVector< 3, short >, Devices::Cuda > >,
+         Pair< Vector<     StaticVector< 3, int >, Devices::Cuda >, VectorView< StaticVector< 3, short >, Devices::Cuda > >,
+         Pair< VectorView< StaticVector< 3, int >, Devices::Cuda >, VectorView< StaticVector< 3, short >, Devices::Cuda > >
       #endif
       >;
    #else
-- 
GitLab


From bacbfb842066fa943e6efed85915571473ae5a98 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Fri, 10 Jul 2020 14:21:50 +0200
Subject: [PATCH 15/15] Removed unnecessary workaround for pybind11's
 format_descriptor

---
 src/Python/pytnl/tnl/Array.h | 23 +++++------------------
 1 file changed, 5 insertions(+), 18 deletions(-)

diff --git a/src/Python/pytnl/tnl/Array.h b/src/Python/pytnl/tnl/Array.h
index b39fe6b0e..5d404f53b 100644
--- a/src/Python/pytnl/tnl/Array.h
+++ b/src/Python/pytnl/tnl/Array.h
@@ -4,25 +4,14 @@
 #include <pybind11/operators.h>
 namespace py = pybind11;
 
+// including pybind11/numpy.h is needed for the specializations of py::format_descriptor
+// for enum types, see https://github.com/pybind/pybind11/issues/2135
+#include <pybind11/numpy.h>
+
 #include "../tnl_indexing.h"
 
 #include <TNL/Containers/Array.h>
 
-
-// pybind11 should actually take care of this inside py::format_descriptor, but apparently it does not work...
-// see https://github.com/pybind/pybind11/issues/2135
-template< typename T, typename = void >
-struct underlying_type
-{
-   using type = T;
-};
-template< typename T >
-struct underlying_type< T, std::enable_if_t< std::is_enum< T >::value > >
-{
-   using type = std::underlying_type_t< T >;
-};
-
-
 template< typename ArrayType >
 void export_Array(py::module & m, const char* name)
 {
@@ -64,9 +53,7 @@ void export_Array(py::module & m, const char* name)
                 // Size of one scalar
                 sizeof( typename ArrayType::ValueType ),
                 // Python struct-style format descriptor
-                py::format_descriptor<
-                     typename underlying_type< typename ArrayType::ValueType >::type
-                >::format(),
+                py::format_descriptor< typename ArrayType::ValueType >::format(),
                 // Number of dimensions
                 1,
                 // Buffer dimensions
-- 
GitLab