From d712846b164965fbd9a2a57a87a116fe3e7aae2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= Date: Tue, 7 Jul 2020 18:29:12 +0200 Subject: [PATCH 01/15] Cleaned up redundant code in ndarray tests --- .../DistributedNDArrayOverlaps_1D_test.h | 60 +------------------ .../DistributedNDArrayOverlaps_semi1D_test.h | 60 +------------------ .../ndarray/DistributedNDArray_1D_test.h | 60 +------------------ .../ndarray/DistributedNDArray_semi1D_test.h | 60 +------------------ .../Containers/ndarray/NDArrayTest.h | 14 ++--- .../Containers/ndarray/NDSubarrayTest.cpp | 14 ++--- .../Containers/ndarray/SlicedNDArrayTest.cpp | 14 ++--- .../ndarray/StaticNDArrayCudaTest.cu | 14 ++--- .../Containers/ndarray/StaticNDArrayTest.cpp | 14 ++--- 9 files changed, 24 insertions(+), 286 deletions(-) diff --git a/src/UnitTests/Containers/ndarray/DistributedNDArrayOverlaps_1D_test.h b/src/UnitTests/Containers/ndarray/DistributedNDArrayOverlaps_1D_test.h index a7609ee74..7377cbff2 100644 --- a/src/UnitTests/Containers/ndarray/DistributedNDArrayOverlaps_1D_test.h +++ b/src/UnitTests/Containers/ndarray/DistributedNDArrayOverlaps_1D_test.h @@ -308,62 +308,4 @@ TYPED_TEST( DistributedNDArrayOverlaps_1D_test, synchronize ) #endif // HAVE_GTEST -#if (defined(HAVE_GTEST) && defined(HAVE_MPI)) -using CommunicatorType = Communicators::MpiCommunicator; - -#include - -class MinimalistBufferedPrinter -: public ::testing::EmptyTestEventListener -{ -private: - std::stringstream sout; - -public: - // Called before a test starts. - virtual void OnTestStart(const ::testing::TestInfo& test_info) - { - sout << test_info.test_case_name() << "." << test_info.name() << " Start." << std::endl; - } - - // Called after a failed assertion or a SUCCEED() invocation. - virtual void OnTestPartResult(const ::testing::TestPartResult& test_part_result) - { - sout << (test_part_result.failed() ? "====Failure=== " : "===Success=== ") - << test_part_result.file_name() << " " - << test_part_result.line_number() <listeners(); - - delete listeners.Release(listeners.default_result_printer()); - listeners.Append(new MinimalistBufferedPrinter); - - Communicators::ScopedInitializer< CommunicatorType > mpi(argc, argv); - #endif - return RUN_ALL_TESTS(); -#else - throw GtestMissingError(); -#endif -} +#include "../../main_mpi.h" diff --git a/src/UnitTests/Containers/ndarray/DistributedNDArrayOverlaps_semi1D_test.h b/src/UnitTests/Containers/ndarray/DistributedNDArrayOverlaps_semi1D_test.h index a7f28ead5..f1ac970eb 100644 --- a/src/UnitTests/Containers/ndarray/DistributedNDArrayOverlaps_semi1D_test.h +++ b/src/UnitTests/Containers/ndarray/DistributedNDArrayOverlaps_semi1D_test.h @@ -348,62 +348,4 @@ void test_helper_synchronize( DistributedArray& a, const int rank, const int npr #endif // HAVE_GTEST -#if (defined(HAVE_GTEST) && defined(HAVE_MPI)) -using CommunicatorType = Communicators::MpiCommunicator; - -#include - -class MinimalistBufferedPrinter -: public ::testing::EmptyTestEventListener -{ -private: - std::stringstream sout; - -public: - // Called before a test starts. - virtual void OnTestStart(const ::testing::TestInfo& test_info) - { - sout << test_info.test_case_name() << "." << test_info.name() << " Start." << std::endl; - } - - // Called after a failed assertion or a SUCCEED() invocation. - virtual void OnTestPartResult(const ::testing::TestPartResult& test_part_result) - { - sout << (test_part_result.failed() ? "====Failure=== " : "===Success=== ") - << test_part_result.file_name() << " " - << test_part_result.line_number() <listeners(); - - delete listeners.Release(listeners.default_result_printer()); - listeners.Append(new MinimalistBufferedPrinter); - - Communicators::ScopedInitializer< CommunicatorType > mpi(argc, argv); - #endif - return RUN_ALL_TESTS(); -#else - throw GtestMissingError(); -#endif -} +#include "../../main_mpi.h" diff --git a/src/UnitTests/Containers/ndarray/DistributedNDArray_1D_test.h b/src/UnitTests/Containers/ndarray/DistributedNDArray_1D_test.h index 04afb91a4..a8d3bcdab 100644 --- a/src/UnitTests/Containers/ndarray/DistributedNDArray_1D_test.h +++ b/src/UnitTests/Containers/ndarray/DistributedNDArray_1D_test.h @@ -495,62 +495,4 @@ TYPED_TEST( DistributedNDArray_1D_test, forOverlaps ) #endif // HAVE_GTEST -#if (defined(HAVE_GTEST) && defined(HAVE_MPI)) -using CommunicatorType = Communicators::MpiCommunicator; - -#include - -class MinimalistBufferedPrinter -: public ::testing::EmptyTestEventListener -{ -private: - std::stringstream sout; - -public: - // Called before a test starts. - virtual void OnTestStart(const ::testing::TestInfo& test_info) - { - sout << test_info.test_case_name() << "." << test_info.name() << " Start." << std::endl; - } - - // Called after a failed assertion or a SUCCEED() invocation. - virtual void OnTestPartResult(const ::testing::TestPartResult& test_part_result) - { - sout << (test_part_result.failed() ? "====Failure=== " : "===Success=== ") - << test_part_result.file_name() << " " - << test_part_result.line_number() <listeners(); - - delete listeners.Release(listeners.default_result_printer()); - listeners.Append(new MinimalistBufferedPrinter); - - Communicators::ScopedInitializer< CommunicatorType > mpi(argc, argv); - #endif - return RUN_ALL_TESTS(); -#else - throw GtestMissingError(); -#endif -} +#include "../../main_mpi.h" diff --git a/src/UnitTests/Containers/ndarray/DistributedNDArray_semi1D_test.h b/src/UnitTests/Containers/ndarray/DistributedNDArray_semi1D_test.h index 17108509d..6f777c215 100644 --- a/src/UnitTests/Containers/ndarray/DistributedNDArray_semi1D_test.h +++ b/src/UnitTests/Containers/ndarray/DistributedNDArray_semi1D_test.h @@ -480,62 +480,4 @@ TYPED_TEST( DistributedNDArray_semi1D_test, forOverlaps ) #endif // HAVE_GTEST -#if (defined(HAVE_GTEST) && defined(HAVE_MPI)) -using CommunicatorType = Communicators::MpiCommunicator; - -#include - -class MinimalistBufferedPrinter -: public ::testing::EmptyTestEventListener -{ -private: - std::stringstream sout; - -public: - // Called before a test starts. - virtual void OnTestStart(const ::testing::TestInfo& test_info) - { - sout << test_info.test_case_name() << "." << test_info.name() << " Start." << std::endl; - } - - // Called after a failed assertion or a SUCCEED() invocation. - virtual void OnTestPartResult(const ::testing::TestPartResult& test_part_result) - { - sout << (test_part_result.failed() ? "====Failure=== " : "===Success=== ") - << test_part_result.file_name() << " " - << test_part_result.line_number() <listeners(); - - delete listeners.Release(listeners.default_result_printer()); - listeners.Append(new MinimalistBufferedPrinter); - - Communicators::ScopedInitializer< CommunicatorType > mpi(argc, argv); - #endif - return RUN_ALL_TESTS(); -#else - throw GtestMissingError(); -#endif -} +#include "../../main_mpi.h" diff --git a/src/UnitTests/Containers/ndarray/NDArrayTest.h b/src/UnitTests/Containers/ndarray/NDArrayTest.h index 1e5d9a30c..3d2c85888 100644 --- a/src/UnitTests/Containers/ndarray/NDArrayTest.h +++ b/src/UnitTests/Containers/ndarray/NDArrayTest.h @@ -1,3 +1,4 @@ +#ifdef HAVE_GTEST #include "gtest/gtest.h" #include @@ -1325,14 +1326,7 @@ TEST( NDArrayTest, forBoundary_static_6D ) << "i = " << i << ", j = " << j << ", k = " << k << ", l = " << l << ", m = " << m << ", n = " << n; } } +#endif // HAVE_GTEST -//#include "GtestMissingError.h" -int main( int argc, char* argv[] ) -{ -//#ifdef HAVE_GTEST - ::testing::InitGoogleTest( &argc, argv ); - return RUN_ALL_TESTS(); -//#else -// throw GtestMissingError(); -//#endif -} + +#include "../../main.h" diff --git a/src/UnitTests/Containers/ndarray/NDSubarrayTest.cpp b/src/UnitTests/Containers/ndarray/NDSubarrayTest.cpp index 1b57eed28..f4f44003f 100644 --- a/src/UnitTests/Containers/ndarray/NDSubarrayTest.cpp +++ b/src/UnitTests/Containers/ndarray/NDSubarrayTest.cpp @@ -1,3 +1,4 @@ +#ifdef HAVE_GTEST #include "gtest/gtest.h" #include @@ -392,14 +393,7 @@ TEST( NDArraySubarrayTest, Dynamic_6D ) } a.setValue( 0 ); } +#endif // HAVE_GTEST -//#include "GtestMissingError.h" -int main( int argc, char* argv[] ) -{ -//#ifdef HAVE_GTEST - ::testing::InitGoogleTest( &argc, argv ); - return RUN_ALL_TESTS(); -//#else -// throw GtestMissingError(); -//#endif -} + +#include "../../main.h" diff --git a/src/UnitTests/Containers/ndarray/SlicedNDArrayTest.cpp b/src/UnitTests/Containers/ndarray/SlicedNDArrayTest.cpp index 8574a5602..eda47a615 100644 --- a/src/UnitTests/Containers/ndarray/SlicedNDArrayTest.cpp +++ b/src/UnitTests/Containers/ndarray/SlicedNDArrayTest.cpp @@ -1,3 +1,4 @@ +#ifdef HAVE_GTEST #include "gtest/gtest.h" #include @@ -238,14 +239,7 @@ TEST( SlicedNDArrayTest, CopySemantics ) EXPECT_EQ( a.getConstView(), c.getConstView() ); EXPECT_EQ( a.getConstView(), c_view.getConstView() ); } +#endif // HAVE_GTEST -//#include "GtestMissingError.h" -int main( int argc, char* argv[] ) -{ -//#ifdef HAVE_GTEST - ::testing::InitGoogleTest( &argc, argv ); - return RUN_ALL_TESTS(); -//#else -// throw GtestMissingError(); -//#endif -} + +#include "../../main.h" diff --git a/src/UnitTests/Containers/ndarray/StaticNDArrayCudaTest.cu b/src/UnitTests/Containers/ndarray/StaticNDArrayCudaTest.cu index 5a0561955..f2c83ba16 100644 --- a/src/UnitTests/Containers/ndarray/StaticNDArrayCudaTest.cu +++ b/src/UnitTests/Containers/ndarray/StaticNDArrayCudaTest.cu @@ -1,3 +1,4 @@ +#ifdef HAVE_GTEST #include "gtest/gtest.h" #include @@ -76,14 +77,7 @@ TEST( StaticNDArrayCudaTest, CopyFromArray ) { __test_CopyFromArray(); } +#endif // HAVE_GTEST -//#include "GtestMissingError.h" -int main( int argc, char* argv[] ) -{ -//#ifdef HAVE_GTEST - ::testing::InitGoogleTest( &argc, argv ); - return RUN_ALL_TESTS(); -//#else -// throw GtestMissingError(); -//#endif -} + +#include "../../main.h" diff --git a/src/UnitTests/Containers/ndarray/StaticNDArrayTest.cpp b/src/UnitTests/Containers/ndarray/StaticNDArrayTest.cpp index e3ea290f2..23ffd8066 100644 --- a/src/UnitTests/Containers/ndarray/StaticNDArrayTest.cpp +++ b/src/UnitTests/Containers/ndarray/StaticNDArrayTest.cpp @@ -1,3 +1,4 @@ +#ifdef HAVE_GTEST #include "gtest/gtest.h" #include @@ -92,14 +93,7 @@ TEST( StaticNDArrayTest, CopySemantics ) EXPECT_EQ( a.getConstView(), c.getConstView() ); EXPECT_EQ( a.getConstView(), c_view.getConstView() ); } +#endif // HAVE_GTEST -//#include "GtestMissingError.h" -int main( int argc, char* argv[] ) -{ -//#ifdef HAVE_GTEST - ::testing::InitGoogleTest( &argc, argv ); - return RUN_ALL_TESTS(); -//#else -// throw GtestMissingError(); -//#endif -} + +#include "../../main.h" -- GitLab From a50188ff1b3fdad6809c201e67f9210662dc882e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= Date: Tue, 7 Jul 2020 23:53:36 +0200 Subject: [PATCH 02/15] Tests: added overloads for the GTest's PrintTo function to force the use of our operator<< for vectors --- src/UnitTests/GtestPrintToOverrides.h | 35 +++++++++++++++++++++++++++ src/UnitTests/main.h | 1 + src/UnitTests/main_mpi.h | 1 + 3 files changed, 37 insertions(+) create mode 100644 src/UnitTests/GtestPrintToOverrides.h diff --git a/src/UnitTests/GtestPrintToOverrides.h b/src/UnitTests/GtestPrintToOverrides.h new file mode 100644 index 000000000..ed545890e --- /dev/null +++ b/src/UnitTests/GtestPrintToOverrides.h @@ -0,0 +1,35 @@ +#pragma once + +// Overrides due to GTest's fuckup... +// https://stackoverflow.com/a/25265174 + +#include +#include +#include + +namespace TNL { +namespace Containers { + +template< typename Value, typename Device, typename Index, typename Allocator > +void PrintTo( const Vector< Value, Device, Index, Allocator >& vec, + std::ostream *str ) +{ + *str << vec; +} + +template< typename Value, typename Device, typename Index > +void PrintTo( const VectorView< Value, Device, Index >& vec, + std::ostream *str ) +{ + *str << vec; +} + +template< int Size, typename Value > +void PrintTo( const StaticVector< Size, Value >& vec, + std::ostream *str ) +{ + *str << vec; +} + +} // namespace Containers +} // namespace TNL diff --git a/src/UnitTests/main.h b/src/UnitTests/main.h index 87da37fa8..00cb11be6 100644 --- a/src/UnitTests/main.h +++ b/src/UnitTests/main.h @@ -1,5 +1,6 @@ #ifdef HAVE_GTEST #include +#include "GtestPrintToOverrides.h" #else #include "GtestMissingError.h" #endif diff --git a/src/UnitTests/main_mpi.h b/src/UnitTests/main_mpi.h index 3c7c39a86..9fe75c850 100644 --- a/src/UnitTests/main_mpi.h +++ b/src/UnitTests/main_mpi.h @@ -1,5 +1,6 @@ #ifdef HAVE_GTEST #include +#include "GtestPrintToOverrides.h" #else #include "GtestMissingError.h" #endif -- GitLab From c53284ebe77d4d046e922bbb83a1f5c300e524d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= Date: Tue, 7 Jul 2020 11:17:21 +0200 Subject: [PATCH 03/15] CMakeLists.txt: enabled address and undefined sanitizers for Debug builds --- CMakeLists.txt | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index c9e570042..27451ef82 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -127,6 +127,16 @@ if( CMAKE_CXX_COMPILER_ID STREQUAL "Clang" ) set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-self-assign-overloaded" ) endif() +# enable address sanitizer (does not work with MPI due to many false positives, does not work with nvcc at all) +if( CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang" ) + if( NOT ${WITH_MPI} AND NOT ${WITH_CUDA} ) + set( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fsanitize=address -fsanitize=undefined -fno-omit-frame-pointer" ) + set( CMAKE_SHARED_LIBRARY_LINK_C_FLAGS_DEBUG "${CMAKE_SHARED_LIBRARY_LINK_C_FLAGS_DEBUG} -fsanitize=address -fsanitize=undefined" ) + set( CMAKE_EXE_LINKER_FLAGS_DEBUG "${CMAKE_EXE_LINKER_FLAGS_DEBUG} -fsanitize=address -fsanitize=undefined" ) + set( CMAKE_SHARED_LINKER_FLAGS_DEBUG "${CMAKE_SHARED_LINKER_FLAGS_DEBUG} -fsanitize=address -fsanitize=undefined" ) + endif() +endif() + # enable link time optimizations (but not in continuous integration) if( NOT DEFINED ENV{CI_JOB_NAME} ) if( CMAKE_CXX_COMPILER_ID STREQUAL "GNU" ) -- GitLab From f1d3d92a9b1c1e4ddf8508a55048586af0ba4a26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= Date: Mon, 6 Jul 2020 16:59:11 +0200 Subject: [PATCH 04/15] Added OperandMemberType to avoid storing const-reference to temporary objects created in subexpressions --- .../Expressions/HorizontalOperations.h | 2 +- .../Expressions/StaticExpressionTemplates.h | 14 ++++----- src/TNL/Containers/Expressions/TypeTraits.h | 31 +++++++++++++++++-- 3 files changed, 36 insertions(+), 11 deletions(-) diff --git a/src/TNL/Containers/Expressions/HorizontalOperations.h b/src/TNL/Containers/Expressions/HorizontalOperations.h index cb49f7d47..614f2c878 100644 --- a/src/TNL/Containers/Expressions/HorizontalOperations.h +++ b/src/TNL/Containers/Expressions/HorizontalOperations.h @@ -60,7 +60,7 @@ struct Min { template< typename T1, typename T2 > __cuda_callable__ - static auto evaluate( const T1& a, const T2& b ) -> decltype( min( a , b ) ) + static auto evaluate( const T1& a, const T2& b ) -> decltype( min( a, b ) ) { return min( a, b ); } diff --git a/src/TNL/Containers/Expressions/StaticExpressionTemplates.h b/src/TNL/Containers/Expressions/StaticExpressionTemplates.h index 89e13d7f1..9ae232a06 100644 --- a/src/TNL/Containers/Expressions/StaticExpressionTemplates.h +++ b/src/TNL/Containers/Expressions/StaticExpressionTemplates.h @@ -103,8 +103,8 @@ struct StaticBinaryExpressionTemplate< T1, T2, Operation, VectorExpressionVariab } protected: - const T1& op1; - const T2& op2; + typename OperandMemberType< T1 >::type op1; + typename OperandMemberType< T2 >::type op2; }; template< typename T1, @@ -151,8 +151,8 @@ struct StaticBinaryExpressionTemplate< T1, T2, Operation, VectorExpressionVariab } protected: - const T1& op1; - const T2 op2; + typename OperandMemberType< T1 >::type op1; + typename OperandMemberType< T2 >::type op2; }; template< typename T1, @@ -199,8 +199,8 @@ struct StaticBinaryExpressionTemplate< T1, T2, Operation, ArithmeticVariable, Ve } protected: - const T1 op1; - const T2& op2; + typename OperandMemberType< T1 >::type op1; + typename OperandMemberType< T2 >::type op2; }; //// @@ -248,7 +248,7 @@ struct StaticUnaryExpressionTemplate } protected: - const T1& operand; + typename OperandMemberType< T1 >::type operand; }; #ifndef DOXYGEN_ONLY diff --git a/src/TNL/Containers/Expressions/TypeTraits.h b/src/TNL/Containers/Expressions/TypeTraits.h index 1024bb465..9a0570345 100644 --- a/src/TNL/Containers/Expressions/TypeTraits.h +++ b/src/TNL/Containers/Expressions/TypeTraits.h @@ -103,22 +103,47 @@ struct IsArithmeticSubtype< T, V, false > // helper trait class (used in unit tests) -template< typename R, bool enabled = ! HasEnabledStaticExpressionTemplates< R >::value > +template +struct enable_if_type { typedef R type; }; + +template< typename R, typename Enable = void > struct RemoveExpressionTemplate { using type = R; }; template< typename R > -struct RemoveExpressionTemplate< R, false > +struct RemoveExpressionTemplate< R, typename enable_if_type< typename R::VectorOperandType >::type > { -// using type = StaticVector< R::getSize(), typename RemoveExpressionTemplate< typename R::RealType >::type >; using type = typename RemoveExpressionTemplate< typename R::VectorOperandType >::type; }; template< typename R > using RemoveET = typename RemoveExpressionTemplate< R >::type; +// helper trait class for Static*ExpressionTemplates classes +template< typename R, typename Enable = void > +struct OperandMemberType +{ + using type = std::conditional_t< std::is_fundamental< R >::value, + // non-reference for fundamental types + std::add_const_t< std::remove_reference_t< R > >, + // lvalue-reference for other types (especially StaticVector) + std::add_lvalue_reference_t< std::add_const_t< R > > + >; +// using type = std::add_const_t< std::remove_reference_t< R > >; +}; + +// assuming that only the StaticBinaryExpressionTemplate and StaticUnaryTemplate classes have a VectorOperandType type member +template< typename R > +struct OperandMemberType< R, typename enable_if_type< typename R::VectorOperandType >::type > +{ + // non-reference for StaticBinaryExpressionTemplate and StaticUnaryExpressionTemplate + // (otherwise we would get segfaults - binding const-reference to temporary Static*ExpressionTemplate + // objects does not work as expected...) + using type = std::add_const_t< std::remove_reference_t< R > >; +}; + } // namespace Expressions } // namespace Containers } // namespace TNL -- GitLab From 43597c11c81552075b50b755d3dc36a2164de901 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= Date: Mon, 6 Jul 2020 16:59:49 +0200 Subject: [PATCH 05/15] Wrapped the result type of vertical operations with RemoveET --- .../Expressions/VerticalOperations.h | 49 +++++++++++-------- 1 file changed, 29 insertions(+), 20 deletions(-) diff --git a/src/TNL/Containers/Expressions/VerticalOperations.h b/src/TNL/Containers/Expressions/VerticalOperations.h index 63b339cb8..6e429414e 100644 --- a/src/TNL/Containers/Expressions/VerticalOperations.h +++ b/src/TNL/Containers/Expressions/VerticalOperations.h @@ -14,6 +14,7 @@ #include #include +#include //// // By vertical operations we mean those applied across vector elements or @@ -26,9 +27,10 @@ namespace Expressions { //// // Vertical operations template< typename Expression > -auto ExpressionMin( const Expression& expression ) -> std::decay_t< decltype( expression[0] ) > +auto ExpressionMin( const Expression& expression ) +-> RemoveET< std::decay_t< decltype( expression[0] ) > > { - using ResultType = std::decay_t< decltype( expression[0] ) >; + using ResultType = RemoveET< std::decay_t< decltype( expression[0] ) > >; using IndexType = typename Expression::IndexType; const auto view = expression.getConstView(); @@ -39,9 +41,9 @@ auto ExpressionMin( const Expression& expression ) -> std::decay_t< decltype( ex template< typename Expression > auto ExpressionArgMin( const Expression& expression ) --> std::pair< std::decay_t< decltype( expression[0] ) >, typename Expression::IndexType > +-> RemoveET< std::pair< std::decay_t< decltype( expression[0] ) >, typename Expression::IndexType > > { - using ResultType = std::decay_t< decltype( expression[0] ) >; + using ResultType = RemoveET< std::decay_t< decltype( expression[0] ) > >; using IndexType = typename Expression::IndexType; const auto view = expression.getConstView(); @@ -58,9 +60,10 @@ auto ExpressionArgMin( const Expression& expression ) } template< typename Expression > -auto ExpressionMax( const Expression& expression ) -> std::decay_t< decltype( expression[0] ) > +auto ExpressionMax( const Expression& expression ) +-> RemoveET< std::decay_t< decltype( expression[0] ) > > { - using ResultType = std::decay_t< decltype( expression[0] ) >; + using ResultType = RemoveET< std::decay_t< decltype( expression[0] ) > >; using IndexType = typename Expression::IndexType; const auto view = expression.getConstView(); @@ -71,9 +74,9 @@ auto ExpressionMax( const Expression& expression ) -> std::decay_t< decltype( ex template< typename Expression > auto ExpressionArgMax( const Expression& expression ) --> std::pair< std::decay_t< decltype( expression[0] ) >, typename Expression::IndexType > +-> RemoveET< std::pair< std::decay_t< decltype( expression[0] ) >, typename Expression::IndexType > > { - using ResultType = std::decay_t< decltype( expression[0] ) >; + using ResultType = RemoveET< std::decay_t< decltype( expression[0] ) > >; using IndexType = typename Expression::IndexType; const auto view = expression.getConstView(); @@ -90,9 +93,10 @@ auto ExpressionArgMax( const Expression& expression ) } template< typename Expression > -auto ExpressionSum( const Expression& expression ) -> std::decay_t< decltype( expression[0] + expression[0] ) > +auto ExpressionSum( const Expression& expression ) +-> RemoveET< std::decay_t< decltype( expression[0] + expression[0] ) > > { - using ResultType = std::decay_t< decltype( expression[0] + expression[0] ) >; + using ResultType = RemoveET< std::decay_t< decltype( expression[0] + expression[0] ) > >; using IndexType = typename Expression::IndexType; const auto view = expression.getConstView(); @@ -101,9 +105,10 @@ auto ExpressionSum( const Expression& expression ) -> std::decay_t< decltype( ex } template< typename Expression > -auto ExpressionProduct( const Expression& expression ) -> std::decay_t< decltype( expression[0] * expression[0] ) > +auto ExpressionProduct( const Expression& expression ) +-> RemoveET< std::decay_t< decltype( expression[0] * expression[0] ) > > { - using ResultType = std::decay_t< decltype( expression[0] * expression[0] ) >; + using ResultType = RemoveET< std::decay_t< decltype( expression[0] * expression[0] ) > >; using IndexType = typename Expression::IndexType; const auto view = expression.getConstView(); @@ -112,9 +117,10 @@ auto ExpressionProduct( const Expression& expression ) -> std::decay_t< decltype } template< typename Expression > -auto ExpressionLogicalAnd( const Expression& expression ) -> std::decay_t< decltype( expression[0] && expression[0] ) > +auto ExpressionLogicalAnd( const Expression& expression ) +-> RemoveET< std::decay_t< decltype( expression[0] && expression[0] ) > > { - using ResultType = std::decay_t< decltype( expression[0] && expression[0] ) >; + using ResultType = RemoveET< std::decay_t< decltype( expression[0] && expression[0] ) > >; using IndexType = typename Expression::IndexType; const auto view = expression.getConstView(); @@ -123,9 +129,10 @@ auto ExpressionLogicalAnd( const Expression& expression ) -> std::decay_t< declt } template< typename Expression > -auto ExpressionLogicalOr( const Expression& expression ) -> std::decay_t< decltype( expression[0] || expression[0] ) > +auto ExpressionLogicalOr( const Expression& expression ) +-> RemoveET< std::decay_t< decltype( expression[0] || expression[0] ) > > { - using ResultType = std::decay_t< decltype( expression[0] || expression[0] ) >; + using ResultType = RemoveET< std::decay_t< decltype( expression[0] || expression[0] ) > >; using IndexType = typename Expression::IndexType; const auto view = expression.getConstView(); @@ -134,9 +141,10 @@ auto ExpressionLogicalOr( const Expression& expression ) -> std::decay_t< declty } template< typename Expression > -auto ExpressionBinaryAnd( const Expression& expression ) -> std::decay_t< decltype( expression[0] & expression[0] ) > +auto ExpressionBinaryAnd( const Expression& expression ) +-> RemoveET< std::decay_t< decltype( expression[0] & expression[0] ) > > { - using ResultType = std::decay_t< decltype( expression[0] & expression[0] ) >; + using ResultType = RemoveET< std::decay_t< decltype( expression[0] & expression[0] ) > >; using IndexType = typename Expression::IndexType; const auto view = expression.getConstView(); @@ -145,9 +153,10 @@ auto ExpressionBinaryAnd( const Expression& expression ) -> std::decay_t< declty } template< typename Expression > -auto ExpressionBinaryOr( const Expression& expression ) -> std::decay_t< decltype( expression[0] | expression[0] ) > +auto ExpressionBinaryOr( const Expression& expression ) +-> RemoveET< std::decay_t< decltype( expression[0] | expression[0] ) > > { - using ResultType = std::decay_t< decltype( expression[0] | expression[0] ) >; + using ResultType = RemoveET< std::decay_t< decltype( expression[0] | expression[0] ) > >; using IndexType = typename Expression::IndexType; const auto view = expression.getConstView(); -- GitLab From 2df931ad3e5c30e7442b8cf53e8f18233e6fb6fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= Date: Tue, 7 Jul 2020 13:58:02 +0200 Subject: [PATCH 06/15] Improved type traits to work even with reference types --- .../Expressions/ExpressionVariableType.h | 2 +- src/TNL/Containers/Expressions/TypeTraits.h | 46 +++++++++++-------- src/TNL/TypeTraits.h | 16 +++---- 3 files changed, 36 insertions(+), 28 deletions(-) diff --git a/src/TNL/Containers/Expressions/ExpressionVariableType.h b/src/TNL/Containers/Expressions/ExpressionVariableType.h index 2095a4352..10acb65cc 100644 --- a/src/TNL/Containers/Expressions/ExpressionVariableType.h +++ b/src/TNL/Containers/Expressions/ExpressionVariableType.h @@ -22,7 +22,7 @@ template< typename T, typename V = T > constexpr ExpressionVariableType getExpressionVariableType() { - if( std::is_arithmetic< std::decay_t< T > >::value ) + if( std::is_arithmetic< T >::value ) return ArithmeticVariable; // vectors must be considered as an arithmetic type when used as RealType in another vector if( IsArithmeticSubtype< T, V >::value ) diff --git a/src/TNL/Containers/Expressions/TypeTraits.h b/src/TNL/Containers/Expressions/TypeTraits.h index 9a0570345..5df97785e 100644 --- a/src/TNL/Containers/Expressions/TypeTraits.h +++ b/src/TNL/Containers/Expressions/TypeTraits.h @@ -33,47 +33,55 @@ struct HasEnabledDistributedExpressionTemplates : std::false_type // type aliases for enabling specific operators and functions using SFINAE template< typename ET1 > using EnableIfStaticUnaryExpression_t = std::enable_if_t< - HasEnabledStaticExpressionTemplates< ET1 >::value >; + HasEnabledStaticExpressionTemplates< std::decay_t< ET1 > >::value >; template< typename ET1, typename ET2 > using EnableIfStaticBinaryExpression_t = std::enable_if_t< - HasEnabledStaticExpressionTemplates< ET1 >::value || - HasEnabledStaticExpressionTemplates< ET2 >::value >; + ( + HasEnabledStaticExpressionTemplates< std::decay_t< ET1 > >::value || + HasEnabledStaticExpressionTemplates< std::decay_t< ET2 > >::value + ) && ! + ( + HasEnabledExpressionTemplates< std::decay_t< ET2 > >::value || + HasEnabledExpressionTemplates< std::decay_t< ET1 > >::value || + HasEnabledDistributedExpressionTemplates< std::decay_t< ET2 > >::value || + HasEnabledDistributedExpressionTemplates< std::decay_t< ET1 > >::value + ) >; template< typename ET1 > using EnableIfUnaryExpression_t = std::enable_if_t< - HasEnabledExpressionTemplates< ET1 >::value >; + HasEnabledExpressionTemplates< std::decay_t< ET1 > >::value >; template< typename ET1, typename ET2 > using EnableIfBinaryExpression_t = std::enable_if_t< // we need to avoid ambiguity with operators defined in Array (e.g. Array::operator==) // so the first operand must not be Array ( - HasAddAssignmentOperator< ET1 >::value || - HasEnabledExpressionTemplates< ET1 >::value || - std::is_arithmetic< ET1 >::value + HasAddAssignmentOperator< std::decay_t< ET1 > >::value || + HasEnabledExpressionTemplates< std::decay_t< ET1 > >::value || + std::is_arithmetic< std::decay_t< ET1 > >::value ) && ( - HasEnabledExpressionTemplates< ET2 >::value || - HasEnabledExpressionTemplates< ET1 >::value + HasEnabledExpressionTemplates< std::decay_t< ET2 > >::value || + HasEnabledExpressionTemplates< std::decay_t< ET1 > >::value ) >; template< typename ET1 > using EnableIfDistributedUnaryExpression_t = std::enable_if_t< - HasEnabledDistributedExpressionTemplates< ET1 >::value >; + HasEnabledDistributedExpressionTemplates< std::decay_t< ET1 > >::value >; template< typename ET1, typename ET2 > using EnableIfDistributedBinaryExpression_t = std::enable_if_t< // we need to avoid ambiguity with operators defined in Array (e.g. Array::operator==) // so the first operand must not be Array ( - HasAddAssignmentOperator< ET1 >::value || - HasEnabledDistributedExpressionTemplates< ET1 >::value || - std::is_arithmetic< ET1 >::value + HasAddAssignmentOperator< std::decay_t< ET1 > >::value || + HasEnabledDistributedExpressionTemplates< std::decay_t< ET1 > >::value || + std::is_arithmetic< std::decay_t< ET1 > >::value ) && ( - HasEnabledDistributedExpressionTemplates< ET2 >::value || - HasEnabledDistributedExpressionTemplates< ET1 >::value + HasEnabledDistributedExpressionTemplates< std::decay_t< ET2 > >::value || + HasEnabledDistributedExpressionTemplates< std::decay_t< ET1 > >::value ) >; @@ -83,7 +91,7 @@ template< typename T, typename V, struct IsArithmeticSubtype : public std::integral_constant< bool, // TODO: use std::is_assignable? - std::is_same< T, typename V::RealType >::value > + std::is_same< T, typename std::decay_t< V >::RealType >::value > {}; template< typename T > @@ -109,13 +117,13 @@ struct enable_if_type { typedef R type; }; template< typename R, typename Enable = void > struct RemoveExpressionTemplate { - using type = R; + using type = std::decay_t< R >; }; template< typename R > -struct RemoveExpressionTemplate< R, typename enable_if_type< typename R::VectorOperandType >::type > +struct RemoveExpressionTemplate< R, typename enable_if_type< typename std::decay_t< R >::VectorOperandType >::type > { - using type = typename RemoveExpressionTemplate< typename R::VectorOperandType >::type; + using type = typename RemoveExpressionTemplate< typename std::decay_t< R >::VectorOperandType >::type; }; template< typename R > diff --git a/src/TNL/TypeTraits.h b/src/TNL/TypeTraits.h index 3032a0b0d..2afda7aad 100644 --- a/src/TNL/TypeTraits.h +++ b/src/TNL/TypeTraits.h @@ -29,7 +29,7 @@ private: template< typename C > static NoType& test(...); public: - static constexpr bool value = ( sizeof( test< T >(0) ) == sizeof( YesType ) ); + static constexpr bool value = ( sizeof( test< std::decay_t >(0) ) == sizeof( YesType ) ); }; /** @@ -46,7 +46,7 @@ private: template< typename C > static NoType& test(...); public: - static constexpr bool value = ( sizeof( test< T >(0) ) == sizeof( YesType ) ); + static constexpr bool value = ( sizeof( test< std::decay_t >(0) ) == sizeof( YesType ) ); }; /** @@ -70,7 +70,7 @@ private: template< typename > static constexpr std::false_type check(...); - using type = decltype(check(0)); + using type = decltype(check>(0)); public: static constexpr bool value = type::value; @@ -97,7 +97,7 @@ private: template< typename > static constexpr std::false_type check(...); - using type = decltype(check(0)); + using type = decltype(check>(0)); public: static constexpr bool value = type::value; @@ -124,7 +124,7 @@ private: template< typename > static constexpr std::false_type check(...); - using type = decltype(check(0)); + using type = decltype(check>(0)); public: static constexpr bool value = type::value; @@ -188,7 +188,7 @@ private: template< typename M, M method > static constexpr std::false_type is_constexpr_impl(...); - using type = decltype(is_constexpr_impl< decltype(&T::getSize), &T::getSize >(0)); + using type = decltype(is_constexpr_impl< decltype(&std::decay_t::getSize), &std::decay_t::getSize >(0)); }; // specialization for types which don't have getSize() method at all @@ -223,7 +223,7 @@ struct IsStaticArrayType template< typename T > struct IsViewType : public std::integral_constant< bool, - std::is_same< typename T::ViewType, T >::value > + std::is_same< typename std::decay_t::ViewType, T >::value > {}; /** @@ -247,7 +247,7 @@ private: template< typename > static constexpr std::false_type check(...); - using type = decltype(check(0)); + using type = decltype(check>(0)); public: static constexpr bool value = type::value; -- GitLab From ee68bdc95593807f40ca269535f17baa0efaba98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= Date: Wed, 8 Jul 2020 15:45:43 +0200 Subject: [PATCH 07/15] Added static asserts for std::numeric_limits::is_specialized to vector reductions --- .../Expressions/DistributedVerticalOperations.h | 12 ++++++++++++ src/TNL/Containers/Expressions/VerticalOperations.h | 12 ++++++++++++ 2 files changed, 24 insertions(+) diff --git a/src/TNL/Containers/Expressions/DistributedVerticalOperations.h b/src/TNL/Containers/Expressions/DistributedVerticalOperations.h index e92391a7d..43390b529 100644 --- a/src/TNL/Containers/Expressions/DistributedVerticalOperations.h +++ b/src/TNL/Containers/Expressions/DistributedVerticalOperations.h @@ -25,6 +25,8 @@ auto DistributedExpressionMin( const Expression& expression ) -> std::decay_t< d using ResultType = std::decay_t< decltype( expression[0] ) >; using CommunicatorType = typename Expression::CommunicatorType; + static_assert( std::numeric_limits< ResultType >::is_specialized, + "std::numeric_limits is not specialized for the reduction's result type" ); ResultType result = std::numeric_limits< ResultType >::max(); if( expression.getCommunicationGroup() != CommunicatorType::NullGroup ) { const ResultType localResult = ExpressionMin( expression.getConstLocalView() ); @@ -42,6 +44,8 @@ auto DistributedExpressionArgMin( const Expression& expression ) using ResultType = std::pair< RealType, IndexType >; using CommunicatorType = typename Expression::CommunicatorType; + static_assert( std::numeric_limits< RealType >::is_specialized, + "std::numeric_limits is not specialized for the reduction's real type" ); ResultType result( -1, std::numeric_limits< RealType >::max() ); const auto group = expression.getCommunicationGroup(); if( group != CommunicatorType::NullGroup ) { @@ -82,6 +86,8 @@ auto DistributedExpressionMax( const Expression& expression ) -> std::decay_t< d using ResultType = std::decay_t< decltype( expression[0] ) >; using CommunicatorType = typename Expression::CommunicatorType; + static_assert( std::numeric_limits< ResultType >::is_specialized, + "std::numeric_limits is not specialized for the reduction's result type" ); ResultType result = std::numeric_limits< ResultType >::lowest(); if( expression.getCommunicationGroup() != CommunicatorType::NullGroup ) { const ResultType localResult = ExpressionMax( expression.getConstLocalView() ); @@ -99,6 +105,8 @@ auto DistributedExpressionArgMax( const Expression& expression ) using ResultType = std::pair< RealType, IndexType >; using CommunicatorType = typename Expression::CommunicatorType; + static_assert( std::numeric_limits< RealType >::is_specialized, + "std::numeric_limits is not specialized for the reduction's real type" ); ResultType result( -1, std::numeric_limits< RealType >::lowest() ); const auto group = expression.getCommunicationGroup(); if( group != CommunicatorType::NullGroup ) { @@ -168,6 +176,8 @@ auto DistributedExpressionLogicalAnd( const Expression& expression ) -> std::dec using ResultType = std::decay_t< decltype( expression[0] && expression[0] ) >; using CommunicatorType = typename Expression::CommunicatorType; + static_assert( std::numeric_limits< ResultType >::is_specialized, + "std::numeric_limits is not specialized for the reduction's result type" ); ResultType result = std::numeric_limits< ResultType >::max(); if( expression.getCommunicationGroup() != CommunicatorType::NullGroup ) { const ResultType localResult = ExpressionLogicalAnd( expression.getConstLocalView() ); @@ -196,6 +206,8 @@ auto DistributedExpressionBinaryAnd( const Expression& expression ) -> std::deca using ResultType = std::decay_t< decltype( expression[0] & expression[0] ) >; using CommunicatorType = typename Expression::CommunicatorType; + static_assert( std::numeric_limits< ResultType >::is_specialized, + "std::numeric_limits is not specialized for the reduction's result type" ); ResultType result = std::numeric_limits< ResultType >::max(); if( expression.getCommunicationGroup() != CommunicatorType::NullGroup ) { const ResultType localResult = ExpressionLogicalBinaryAnd( expression.getConstLocalView() ); diff --git a/src/TNL/Containers/Expressions/VerticalOperations.h b/src/TNL/Containers/Expressions/VerticalOperations.h index 6e429414e..68360495c 100644 --- a/src/TNL/Containers/Expressions/VerticalOperations.h +++ b/src/TNL/Containers/Expressions/VerticalOperations.h @@ -36,6 +36,8 @@ auto ExpressionMin( const Expression& expression ) const auto view = expression.getConstView(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return view[ i ]; }; auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return TNL::min( a, b ); }; + static_assert( std::numeric_limits< ResultType >::is_specialized, + "std::numeric_limits is not specialized for the reduction's result type" ); return Algorithms::Reduction< typename Expression::DeviceType >::reduce( ( IndexType ) 0, expression.getSize(), reduction, fetch, std::numeric_limits< ResultType >::max() ); } @@ -56,6 +58,8 @@ auto ExpressionArgMin( const Expression& expression ) else if( a == b && bIdx < aIdx ) aIdx = bIdx; }; + static_assert( std::numeric_limits< ResultType >::is_specialized, + "std::numeric_limits is not specialized for the reduction's result type" ); return Algorithms::Reduction< typename Expression::DeviceType >::reduceWithArgument( ( IndexType ) 0, expression.getSize(), reduction, fetch, std::numeric_limits< ResultType >::max() ); } @@ -69,6 +73,8 @@ auto ExpressionMax( const Expression& expression ) const auto view = expression.getConstView(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return view[ i ]; }; auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return TNL::max( a, b ); }; + static_assert( std::numeric_limits< ResultType >::is_specialized, + "std::numeric_limits is not specialized for the reduction's result type" ); return Algorithms::Reduction< typename Expression::DeviceType >::reduce( ( IndexType ) 0, expression.getSize(), reduction, fetch, std::numeric_limits< ResultType >::lowest() ); } @@ -89,6 +95,8 @@ auto ExpressionArgMax( const Expression& expression ) else if( a == b && bIdx < aIdx ) aIdx = bIdx; }; + static_assert( std::numeric_limits< ResultType >::is_specialized, + "std::numeric_limits is not specialized for the reduction's result type" ); return Algorithms::Reduction< typename Expression::DeviceType >::reduceWithArgument( ( IndexType ) 0, expression.getSize(), reduction, fetch, std::numeric_limits< ResultType >::lowest() ); } @@ -125,6 +133,8 @@ auto ExpressionLogicalAnd( const Expression& expression ) const auto view = expression.getConstView(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return view[ i ]; }; + static_assert( std::numeric_limits< ResultType >::is_specialized, + "std::numeric_limits is not specialized for the reduction's result type" ); return Algorithms::Reduction< typename Expression::DeviceType >::reduce( ( IndexType ) 0, expression.getSize(), std::logical_and<>{}, fetch, std::numeric_limits< ResultType >::max() ); } @@ -149,6 +159,8 @@ auto ExpressionBinaryAnd( const Expression& expression ) const auto view = expression.getConstView(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return view[ i ]; }; + static_assert( std::numeric_limits< ResultType >::is_specialized, + "std::numeric_limits is not specialized for the reduction's result type" ); return Algorithms::Reduction< typename Expression::DeviceType >::reduce( ( IndexType ) 0, expression.getSize(), std::bit_and<>{}, fetch, std::numeric_limits< ResultType >::max() ); } -- GitLab From c09870c5c37d00fc732558cabd1dcd06a55748a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= Date: Mon, 6 Jul 2020 17:02:50 +0200 Subject: [PATCH 08/15] Fixed tests for expression templates to work with nested vector types Tests for vertical operations which are not well defined for nested vectors are disabled. --- src/UnitTests/Containers/CMakeLists.txt | 23 +++++------ .../Containers/VectorBinaryOperationsTest.cpp | 1 + .../Containers/VectorBinaryOperationsTest.cu | 1 + .../Containers/VectorBinaryOperationsTest.h | 8 +--- .../Containers/VectorHelperFunctions.h | 32 +++++++++++++++ .../Containers/VectorUnaryOperationsTest.cpp | 1 + .../Containers/VectorUnaryOperationsTest.cu | 1 + .../Containers/VectorUnaryOperationsTest.h | 37 +++++++++-------- .../VectorVerticalOperationsTest.cpp | 1 + .../VectorVerticalOperationsTest.cu | 1 + .../Containers/VectorVerticalOperationsTest.h | 40 +++++++++++++------ 11 files changed, 98 insertions(+), 48 deletions(-) diff --git a/src/UnitTests/Containers/CMakeLists.txt b/src/UnitTests/Containers/CMakeLists.txt index 227a86551..21be3ded1 100644 --- a/src/UnitTests/Containers/CMakeLists.txt +++ b/src/UnitTests/Containers/CMakeLists.txt @@ -32,10 +32,9 @@ ADD_EXECUTABLE( VectorVerticalOperationsTest VectorVerticalOperationsTest.cpp ) TARGET_COMPILE_OPTIONS( VectorVerticalOperationsTest PRIVATE ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( VectorVerticalOperationsTest ${GTEST_BOTH_LIBRARIES} ) -# FIXME -#ADD_EXECUTABLE( VectorOfStaticVectorsTest VectorOfStaticVectorsTest.cpp ) -#TARGET_COMPILE_OPTIONS( VectorOfStaticVectorsTest PRIVATE ${CXX_TESTS_FLAGS} ) -#TARGET_LINK_LIBRARIES( VectorOfStaticVectorsTest ${GTEST_BOTH_LIBRARIES} ) +ADD_EXECUTABLE( VectorOfStaticVectorsTest VectorOfStaticVectorsTest.cpp ) +TARGET_COMPILE_OPTIONS( VectorOfStaticVectorsTest PRIVATE ${CXX_TESTS_FLAGS} ) +TARGET_LINK_LIBRARIES( VectorOfStaticVectorsTest ${GTEST_BOTH_LIBRARIES} ) IF( BUILD_CUDA ) CUDA_ADD_EXECUTABLE( ArrayTestCuda ArrayTest.cu @@ -62,9 +61,8 @@ IF( BUILD_CUDA ) CUDA_ADD_EXECUTABLE( VectorVerticalOperationsTestCuda VectorVerticalOperationsTest.cu OPTIONS ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( VectorVerticalOperationsTestCuda ${GTEST_BOTH_LIBRARIES} ) - # FIXME -# CUDA_ADD_EXECUTABLE( VectorOfStaticVectorsTestCuda VectorOfStaticVectorsTest.cu OPTIONS ${CXX_TESTS_FLAGS} ) -# TARGET_LINK_LIBRARIES( VectorOfStaticVectorsTestCuda ${GTEST_BOTH_LIBRARIES} ) + CUDA_ADD_EXECUTABLE( VectorOfStaticVectorsTestCuda VectorOfStaticVectorsTest.cu OPTIONS ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( VectorOfStaticVectorsTestCuda ${GTEST_BOTH_LIBRARIES} ) ENDIF( BUILD_CUDA ) ADD_EXECUTABLE( StaticArrayTest StaticArrayTest.cpp ) @@ -79,10 +77,9 @@ ADD_EXECUTABLE( StaticVectorOperationsTest StaticVectorOperationsTest.cpp ) TARGET_COMPILE_OPTIONS( StaticVectorOperationsTest PRIVATE ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( StaticVectorOperationsTest ${GTEST_BOTH_LIBRARIES} ) -# FIXME -#ADD_EXECUTABLE( StaticVectorOfStaticVectorsTest StaticVectorOfStaticVectorsTest.cpp ) -#TARGET_COMPILE_OPTIONS( StaticVectorOfStaticVectorsTest PRIVATE ${CXX_TESTS_FLAGS} ) -#TARGET_LINK_LIBRARIES( StaticVectorOfStaticVectorsTest ${GTEST_BOTH_LIBRARIES} ) +ADD_EXECUTABLE( StaticVectorOfStaticVectorsTest StaticVectorOfStaticVectorsTest.cpp ) +TARGET_COMPILE_OPTIONS( StaticVectorOfStaticVectorsTest PRIVATE ${CXX_TESTS_FLAGS} ) +TARGET_LINK_LIBRARIES( StaticVectorOfStaticVectorsTest ${GTEST_BOTH_LIBRARIES} ) ADD_TEST( ArrayTest ${EXECUTABLE_OUTPUT_PATH}/ArrayTest${CMAKE_EXECUTABLE_SUFFIX} ) @@ -93,7 +90,7 @@ ADD_TEST( VectorEvaluateAndReduceTest ${EXECUTABLE_OUTPUT_PATH}/VectorEvaluateAn ADD_TEST( VectorBinaryOperationsTest ${EXECUTABLE_OUTPUT_PATH}/VectorBinaryOperationsTest${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( VectorUnaryOperationsTest ${EXECUTABLE_OUTPUT_PATH}/VectorUnaryOperationsTest${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( VectorVerticalOperationsTest ${EXECUTABLE_OUTPUT_PATH}/VectorVerticalOperationsTest${CMAKE_EXECUTABLE_SUFFIX} ) -#ADD_TEST( VectorOfStaticVectorsTest ${EXECUTABLE_OUTPUT_PATH}/VectorOfStaticVectorsTest${CMAKE_EXECUTABLE_SUFFIX} ) +ADD_TEST( VectorOfStaticVectorsTest ${EXECUTABLE_OUTPUT_PATH}/VectorOfStaticVectorsTest${CMAKE_EXECUTABLE_SUFFIX} ) IF( BUILD_CUDA ) ADD_TEST( ArrayTestCuda ${EXECUTABLE_OUTPUT_PATH}/ArrayTestCuda${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( ArrayViewTestCuda ${EXECUTABLE_OUTPUT_PATH}/ArrayViewTestCuda${CMAKE_EXECUTABLE_SUFFIX} ) @@ -108,7 +105,7 @@ ENDIF() ADD_TEST( StaticArrayTest ${EXECUTABLE_OUTPUT_PATH}/StaticArrayTest${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( StaticVectorTest ${EXECUTABLE_OUTPUT_PATH}/StaticVectorTest${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( StaticVectorOperationsTest ${EXECUTABLE_OUTPUT_PATH}/StaticVectorOperationsTest${CMAKE_EXECUTABLE_SUFFIX} ) -#ADD_TEST( StaticVectorOfStaticVectorsTest ${EXECUTABLE_OUTPUT_PATH}/StaticVectorOfStaticVectorsTest${CMAKE_EXECUTABLE_SUFFIX} ) +ADD_TEST( StaticVectorOfStaticVectorsTest ${EXECUTABLE_OUTPUT_PATH}/StaticVectorOfStaticVectorsTest${CMAKE_EXECUTABLE_SUFFIX} ) ADD_SUBDIRECTORY( Multimaps ) diff --git a/src/UnitTests/Containers/VectorBinaryOperationsTest.cpp b/src/UnitTests/Containers/VectorBinaryOperationsTest.cpp index 717f56e90..30b01f108 100644 --- a/src/UnitTests/Containers/VectorBinaryOperationsTest.cpp +++ b/src/UnitTests/Containers/VectorBinaryOperationsTest.cpp @@ -1 +1,2 @@ #include "VectorBinaryOperationsTest.h" +#include "../main.h" diff --git a/src/UnitTests/Containers/VectorBinaryOperationsTest.cu b/src/UnitTests/Containers/VectorBinaryOperationsTest.cu index 717f56e90..30b01f108 100644 --- a/src/UnitTests/Containers/VectorBinaryOperationsTest.cu +++ b/src/UnitTests/Containers/VectorBinaryOperationsTest.cu @@ -1 +1,2 @@ #include "VectorBinaryOperationsTest.h" +#include "../main.h" diff --git a/src/UnitTests/Containers/VectorBinaryOperationsTest.h b/src/UnitTests/Containers/VectorBinaryOperationsTest.h index 46833c235..818d45e32 100644 --- a/src/UnitTests/Containers/VectorBinaryOperationsTest.h +++ b/src/UnitTests/Containers/VectorBinaryOperationsTest.h @@ -627,8 +627,8 @@ TYPED_TEST( VectorBinaryOperationsTest, comparisonOnDifferentDevices ) { SETUP_BINARY_TEST_ALIASES; - using RightHostVector = typename TestFixture::RightVector::Self< typename TestFixture::RightVector::RealType, Devices::Sequential >; - using RightHost = typename TestFixture::Right::Self< typename TestFixture::Right::RealType, Devices::Sequential >; + using RightHostVector = typename TestFixture::RightVector::template Self< typename TestFixture::RightVector::RealType, Devices::Sequential >; + using RightHost = typename TestFixture::Right::template Self< typename TestFixture::Right::RealType, Devices::Sequential >; RightHostVector _R1_h; _R1_h = this->_R1; RightHost R1_h( _R1_h ); @@ -646,7 +646,3 @@ TYPED_TEST( VectorBinaryOperationsTest, comparisonOnDifferentDevices ) } // namespace binary_tests #endif // HAVE_GTEST - -#if !defined(DISTRIBUTED_VECTOR) && !defined(STATIC_VECTOR) -#include "../main.h" -#endif diff --git a/src/UnitTests/Containers/VectorHelperFunctions.h b/src/UnitTests/Containers/VectorHelperFunctions.h index 4e8c64fae..649de1cee 100644 --- a/src/UnitTests/Containers/VectorHelperFunctions.h +++ b/src/UnitTests/Containers/VectorHelperFunctions.h @@ -112,3 +112,35 @@ void bindOrAssign( V1& v1, V2& v2 ) { v1 = v2; } + + +#ifdef HAVE_GTEST +#include "gtest/gtest.h" + +template< typename T1, typename T2, + std::enable_if_t< ! TNL::HasSubscriptOperator< T1 >::value && + ! TNL::HasSubscriptOperator< T2 >::value, bool > = true > +void expect_near( const T1& arg, const T2& expected, double epsilon ) +{ + EXPECT_NEAR( arg, expected, epsilon ); +} + +template< typename T1, typename T2, + std::enable_if_t< TNL::HasSubscriptOperator< T1 >::value && + ! TNL::HasSubscriptOperator< T2 >::value, bool > = true > +void expect_near( const T1& arg, const T2& expected, double epsilon ) +{ + for( int i = 0; i < arg.getSize(); i++ ) + expect_near( arg[ i ], expected, epsilon ); +} + +template< typename T1, typename T2, + std::enable_if_t< TNL::HasSubscriptOperator< T1 >::value && + TNL::HasSubscriptOperator< T2 >::value, bool > = true > +void expect_near( const T1& arg, const T2& expected, double epsilon ) +{ + ASSERT_EQ( arg.getSize(), expected.getSize() ); + for( int i = 0; i < arg.getSize(); i++ ) + expect_near( arg[ i ], expected[ i ], epsilon ); +} +#endif diff --git a/src/UnitTests/Containers/VectorUnaryOperationsTest.cpp b/src/UnitTests/Containers/VectorUnaryOperationsTest.cpp index 1b9bcdc6c..1638e7ce7 100644 --- a/src/UnitTests/Containers/VectorUnaryOperationsTest.cpp +++ b/src/UnitTests/Containers/VectorUnaryOperationsTest.cpp @@ -1 +1,2 @@ #include "VectorUnaryOperationsTest.h" +#include "../main.h" diff --git a/src/UnitTests/Containers/VectorUnaryOperationsTest.cu b/src/UnitTests/Containers/VectorUnaryOperationsTest.cu index 1b9bcdc6c..1638e7ce7 100644 --- a/src/UnitTests/Containers/VectorUnaryOperationsTest.cu +++ b/src/UnitTests/Containers/VectorUnaryOperationsTest.cu @@ -1 +1,2 @@ #include "VectorUnaryOperationsTest.h" +#include "../main.h" diff --git a/src/UnitTests/Containers/VectorUnaryOperationsTest.h b/src/UnitTests/Containers/VectorUnaryOperationsTest.h index 9baea01c5..a5beb58d9 100644 --- a/src/UnitTests/Containers/VectorUnaryOperationsTest.h +++ b/src/UnitTests/Containers/VectorUnaryOperationsTest.h @@ -139,6 +139,10 @@ protected: TYPED_TEST_SUITE( VectorUnaryOperationsTest, VectorTypes ); + +#define EXPECTED_VECTOR( TestFixture, function ) \ + using ExpectedVector = typename TestFixture::template Vector< Expressions::RemoveET< decltype(function(typename VectorOrView::RealType{})) > >; + #ifdef STATIC_VECTOR #define SETUP_UNARY_VECTOR_TEST( _ ) \ using VectorOrView = typename TestFixture::VectorOrView; \ @@ -151,8 +155,8 @@ TYPED_TEST_SUITE( VectorUnaryOperationsTest, VectorTypes ); #define SETUP_UNARY_VECTOR_TEST_FUNCTION( _, begin, end, function ) \ using VectorOrView = typename TestFixture::VectorOrView; \ using RealType = typename VectorOrView::RealType; \ - using ExpectedVector = typename TestFixture::template Vector< decltype(function(RealType{})) >; \ - constexpr int _size = VectorOrView::getSize(); \ + EXPECTED_VECTOR( TestFixture, function ); \ + constexpr int _size = VectorOrView::getSize(); \ \ VectorOrView V1; \ ExpectedVector expected; \ @@ -187,9 +191,9 @@ TYPED_TEST_SUITE( VectorUnaryOperationsTest, VectorTypes ); using VectorType = typename TestFixture::VectorType; \ using VectorOrView = typename TestFixture::VectorOrView; \ using RealType = typename VectorType::RealType; \ - using ExpectedVector = typename TestFixture::template Vector< decltype(function(RealType{})) >; \ + EXPECTED_VECTOR( TestFixture, function ); \ using HostVector = typename VectorType::template Self< RealType, Devices::Host >; \ - using HostExpectedVector = typename ExpectedVector::template Self< decltype(function(RealType{})), Devices::Host >; \ + using HostExpectedVector = typename ExpectedVector::template Self< typename ExpectedVector::RealType, Devices::Host >; \ using CommunicatorType = typename VectorOrView::CommunicatorType; \ const auto group = CommunicatorType::AllGroup; \ using LocalRangeType = typename VectorOrView::LocalRangeType; \ @@ -228,9 +232,9 @@ TYPED_TEST_SUITE( VectorUnaryOperationsTest, VectorTypes ); using VectorType = typename TestFixture::VectorType; \ using VectorOrView = typename TestFixture::VectorOrView; \ using RealType = typename VectorType::RealType; \ - using ExpectedVector = typename TestFixture::template Vector< decltype(function(RealType{})) >; \ + EXPECTED_VECTOR( TestFixture, function ); \ using HostVector = typename VectorType::template Self< RealType, Devices::Host >; \ - using HostExpectedVector = typename ExpectedVector::template Self< decltype(function(RealType{})), Devices::Host >; \ + using HostExpectedVector = typename ExpectedVector::template Self< typename ExpectedVector::RealType, Devices::Host >; \ \ HostVector _V1h( size ); \ HostExpectedVector expected_h( size ); \ @@ -261,10 +265,10 @@ void expect_vectors_near( const Left& _v1, const Right& _v2 ) ASSERT_EQ( _v1.getSize(), _v2.getSize() ); #ifdef STATIC_VECTOR for( int i = 0; i < _v1.getSize(); i++ ) - EXPECT_NEAR( _v1[i], _v2[i], 1e-6 ) << "i = " << i; + expect_near( _v1[i], _v2[i], 1e-6 ); #else - using LeftNonConstReal = std::remove_const_t< typename Left::RealType >; - using RightNonConstReal = std::remove_const_t< typename Right::RealType >; + using LeftNonConstReal = Expressions::RemoveET< std::remove_const_t< typename Left::RealType > >; + using RightNonConstReal = Expressions::RemoveET< std::remove_const_t< typename Right::RealType > >; #ifdef DISTRIBUTED_VECTOR using CommunicatorType = typename Left::CommunicatorType; static_assert( std::is_same< typename Right::CommunicatorType, CommunicatorType >::value, @@ -290,7 +294,7 @@ void expect_vectors_near( const Left& _v1, const Right& _v2 ) #else for( int i = 0; i < v1.getSize(); i++ ) #endif - EXPECT_NEAR( v1_h[i], v2_h[i], 1e-6 ) << "i = " << i; + expect_near( v1_h[i], v2_h[i], 1e-6 ); #endif } @@ -465,8 +469,8 @@ TYPED_TEST( VectorUnaryOperationsTest, atanh ) TYPED_TEST( VectorUnaryOperationsTest, pow ) { // FIXME: for integer exponent, the test fails with CUDA -// auto pow3 = [](double i) { return TNL::pow(i, 3); }; - auto pow3 = [](double i) { return TNL::pow(i, 3.0); }; +// auto pow3 = [](auto i) { return TNL::pow(i, 3); }; + auto pow3 = [](auto i) { return TNL::pow(i, 3.0); }; SETUP_UNARY_VECTOR_TEST_FUNCTION( VECTOR_TEST_SIZE, -VECTOR_TEST_SIZE, VECTOR_TEST_SIZE, pow3 ); // vector or view @@ -585,9 +589,11 @@ TYPED_TEST( VectorUnaryOperationsTest, sign ) expect_vectors_near( sign(-(-V1)), expected ); } +// This test is not suitable for vector-of-static-vectors where the RealType cannot be cast to bool. +#ifndef VECTOR_OF_STATIC_VECTORS TYPED_TEST( VectorUnaryOperationsTest, cast ) { - auto identity = [](int i) { return i; }; + auto identity = [](auto i) { return i; }; SETUP_UNARY_VECTOR_TEST_FUNCTION( VECTOR_TEST_SIZE, 1, VECTOR_TEST_SIZE, identity ); // vector or vector view @@ -612,11 +618,8 @@ TYPED_TEST( VectorUnaryOperationsTest, cast ) // EXPECT_EQ( expression3, true ); EXPECT_EQ( cast(-V1), true ); } +#endif } // namespace unary_tests #endif // HAVE_GTEST - -#if !defined(DISTRIBUTED_VECTOR) && !defined(STATIC_VECTOR) -#include "../main.h" -#endif diff --git a/src/UnitTests/Containers/VectorVerticalOperationsTest.cpp b/src/UnitTests/Containers/VectorVerticalOperationsTest.cpp index 376d6a0b7..ee386fab1 100644 --- a/src/UnitTests/Containers/VectorVerticalOperationsTest.cpp +++ b/src/UnitTests/Containers/VectorVerticalOperationsTest.cpp @@ -1 +1,2 @@ #include "VectorVerticalOperationsTest.h" +#include "../main.h" diff --git a/src/UnitTests/Containers/VectorVerticalOperationsTest.cu b/src/UnitTests/Containers/VectorVerticalOperationsTest.cu index 376d6a0b7..ee386fab1 100644 --- a/src/UnitTests/Containers/VectorVerticalOperationsTest.cu +++ b/src/UnitTests/Containers/VectorVerticalOperationsTest.cu @@ -1 +1,2 @@ #include "VectorVerticalOperationsTest.h" +#include "../main.h" diff --git a/src/UnitTests/Containers/VectorVerticalOperationsTest.h b/src/UnitTests/Containers/VectorVerticalOperationsTest.h index dae1dc198..3aa60e612 100644 --- a/src/UnitTests/Containers/VectorVerticalOperationsTest.h +++ b/src/UnitTests/Containers/VectorVerticalOperationsTest.h @@ -175,6 +175,8 @@ protected: TYPED_TEST_SUITE( VectorVerticalOperationsTest, VectorTypes ); +// FIXME: function does not work for nested vectors - std::numeric_limits does not make sense for vector types +#ifndef VECTOR_OF_STATIC_VECTORS TYPED_TEST( VectorVerticalOperationsTest, max ) { SETUP_VERTICAL_TEST_ALIASES; @@ -186,7 +188,10 @@ TYPED_TEST( VectorVerticalOperationsTest, max ) // binary expression EXPECT_EQ( max(V1 + 2), size - 1 + 2 ); } +#endif +// FIXME: function does not work for nested vectors - the reduction operation expects a scalar type +#ifndef VECTOR_OF_STATIC_VECTORS TYPED_TEST( VectorVerticalOperationsTest, argMax ) { SETUP_VERTICAL_TEST_ALIASES; @@ -199,7 +204,10 @@ TYPED_TEST( VectorVerticalOperationsTest, argMax ) // expression EXPECT_EQ( argMax(V1 + 2), std::make_pair( (RealType) size - 1 + 2, size - 1 ) ); } +#endif +// FIXME: function does not work for nested vectors - std::numeric_limits does not make sense for vector types +#ifndef VECTOR_OF_STATIC_VECTORS TYPED_TEST( VectorVerticalOperationsTest, min ) { SETUP_VERTICAL_TEST_ALIASES; @@ -211,7 +219,10 @@ TYPED_TEST( VectorVerticalOperationsTest, min ) // binary expression EXPECT_EQ( min(V1 + 2), 2 ); } +#endif +// FIXME: function does not work for nested vectors - the reduction operation expects a scalar type +#ifndef VECTOR_OF_STATIC_VECTORS TYPED_TEST( VectorVerticalOperationsTest, argMin ) { SETUP_VERTICAL_TEST_ALIASES; @@ -224,6 +235,7 @@ TYPED_TEST( VectorVerticalOperationsTest, argMin ) // binary expression EXPECT_EQ( argMin(V1 + 2), std::make_pair( (RealType) 2 , 0 ) ); } +#endif TYPED_TEST( VectorVerticalOperationsTest, sum ) { @@ -237,6 +249,8 @@ TYPED_TEST( VectorVerticalOperationsTest, sum ) EXPECT_EQ( sum(V1 - 1), 0.5 * size * (size - 1) - size ); } +// FIXME: function does not work for nested vectors - max does not work for nested vectors +#ifndef VECTOR_OF_STATIC_VECTORS TYPED_TEST( VectorVerticalOperationsTest, maxNorm ) { SETUP_VERTICAL_TEST_ALIASES; @@ -248,6 +262,7 @@ TYPED_TEST( VectorVerticalOperationsTest, maxNorm ) // binary expression EXPECT_EQ( maxNorm(V1 - size), size ); } +#endif TYPED_TEST( VectorVerticalOperationsTest, l1Norm ) { @@ -269,9 +284,11 @@ TYPED_TEST( VectorVerticalOperationsTest, l1Norm ) EXPECT_EQ( l1Norm(2 * V1 - V1), size ); } +// FIXME: l2Norm does not work for nested vectors - dangling references due to Static*ExpressionTemplate +// classes binding to temporary objects which get destroyed before l2Norm returns +#ifndef VECTOR_OF_STATIC_VECTORS TYPED_TEST( VectorVerticalOperationsTest, l2Norm ) { - using RealType = typename TestFixture::VectorOrView::RealType; #ifdef STATIC_VECTOR setConstantSequence( this->V1, 1 ); const typename TestFixture::VectorOrView& V1( this->V1 ); @@ -282,7 +299,7 @@ TYPED_TEST( VectorVerticalOperationsTest, l2Norm ) #endif const int size = V1.getSize(); - const auto expected = std::sqrt( (RealType) size ); + const auto expected = std::sqrt( size ); // vector or vector view EXPECT_EQ( l2Norm(V1), expected ); @@ -291,10 +308,12 @@ TYPED_TEST( VectorVerticalOperationsTest, l2Norm ) // binary expression EXPECT_EQ( l2Norm(2 * V1 - V1), expected ); } +#endif +// FIXME function does not work for nested vectors - compilation error +#ifndef VECTOR_OF_STATIC_VECTORS TYPED_TEST( VectorVerticalOperationsTest, lpNorm ) { - using RealType = typename TestFixture::VectorOrView::RealType; #ifdef STATIC_VECTOR setConstantSequence( this->V1, 1 ); const typename TestFixture::VectorOrView& V1( this->V1 ); @@ -306,24 +325,25 @@ TYPED_TEST( VectorVerticalOperationsTest, lpNorm ) const int size = V1.getSize(); const auto expectedL1norm = size; - const auto expectedL2norm = std::sqrt( (RealType) size ); - const auto expectedL3norm = std::cbrt( (RealType) size ); + const auto expectedL2norm = std::sqrt( size ); + const auto expectedL3norm = std::cbrt( size ); const auto epsilon = 64 * std::numeric_limits< decltype(expectedL3norm) >::epsilon(); // vector or vector view EXPECT_EQ( lpNorm(V1, 1.0), expectedL1norm ); EXPECT_EQ( lpNorm(V1, 2.0), expectedL2norm ); - EXPECT_NEAR( lpNorm(V1, 3.0), expectedL3norm, epsilon ); + expect_near( lpNorm(V1, 3.0), expectedL3norm, epsilon ); // unary expression EXPECT_EQ( lpNorm(-V1, 1.0), expectedL1norm ); EXPECT_EQ( lpNorm(-V1, 2.0), expectedL2norm ); - EXPECT_NEAR( lpNorm(-V1, 3.0), expectedL3norm, epsilon ); + expect_near( lpNorm(-V1, 3.0), expectedL3norm, epsilon ); // binary expression EXPECT_EQ( lpNorm(2 * V1 - V1, 1.0), expectedL1norm ); EXPECT_EQ( lpNorm(2 * V1 - V1, 2.0), expectedL2norm ); - EXPECT_NEAR( lpNorm(2 * V1 - V1, 3.0), expectedL3norm, epsilon ); + expect_near( lpNorm(2 * V1 - V1, 3.0), expectedL3norm, epsilon ); } +#endif TYPED_TEST( VectorVerticalOperationsTest, product ) { @@ -353,7 +373,3 @@ TYPED_TEST( VectorVerticalOperationsTest, product ) } // namespace vertical_tests #endif // HAVE_GTEST - -#if !defined(DISTRIBUTED_VECTOR) && !defined(STATIC_VECTOR) -#include "../main.h" -#endif -- GitLab From 4c5ddb352c9aafc93c4d3fa33f72d7aefaaa72a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= Date: Thu, 9 Jul 2020 15:21:27 +0200 Subject: [PATCH 09/15] Fixed vector assignment for nested vectors --- src/TNL/Containers/detail/VectorAssignment.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/TNL/Containers/detail/VectorAssignment.h b/src/TNL/Containers/detail/VectorAssignment.h index fa778a248..5f273d5f0 100644 --- a/src/TNL/Containers/detail/VectorAssignment.h +++ b/src/TNL/Containers/detail/VectorAssignment.h @@ -11,6 +11,7 @@ #pragma once #include +#include #include namespace TNL { @@ -22,7 +23,7 @@ namespace detail { */ template< typename Vector, typename T, - bool hasSubscriptOperator = HasSubscriptOperator< T >::value > + bool vectorVectorAssignment = HasSubscriptOperator< T >::value && ! Expressions::IsArithmeticSubtype< T, Vector >::value > struct VectorAssignment; /** @@ -30,12 +31,12 @@ struct VectorAssignment; */ template< typename Vector, typename T, - bool hasSubscriptOperator = HasSubscriptOperator< T >::value, + bool vectorVectorAssignment = HasSubscriptOperator< T >::value && ! Expressions::IsArithmeticSubtype< T, Vector >::value, bool hasSetSizeMethod = HasSetSizeMethod< T >::value > struct VectorAssignmentWithOperation; /** - * \brief Specialization of ASSIGNEMENT with subscript operator + * \brief Specialization for vector-vector assignment. */ template< typename Vector, typename T > @@ -73,8 +74,7 @@ struct VectorAssignment< Vector, T, true > }; /** - * \brief Specialization of ASSIGNEMENT for array-value assignment for other types. We assume - * that T is convertible to Vector::ValueType. + * \brief Specialization for vector-value assignment. We assume that T is assignable to Vector::RealType. */ template< typename Vector, typename T > -- GitLab From 3467033919343fa64d877a8823c9d4747ad28366 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= Date: Thu, 9 Jul 2020 15:25:51 +0200 Subject: [PATCH 10/15] Fixed binary expression templates for nested vector types Fixes #60 --- src/TNL/Containers/Expressions/TypeTraits.h | 82 ++++++++++++----- .../Containers/VectorBinaryOperationsTest.h | 87 +++++++++++++++++-- .../Containers/VectorOfStaticVectorsTest.cpp | 6 +- .../Containers/VectorOfStaticVectorsTest.cu | 6 +- .../Containers/VectorOfStaticVectorsTest.h | 5 ++ 5 files changed, 148 insertions(+), 38 deletions(-) create mode 100644 src/UnitTests/Containers/VectorOfStaticVectorsTest.h diff --git a/src/TNL/Containers/Expressions/TypeTraits.h b/src/TNL/Containers/Expressions/TypeTraits.h index 5df97785e..3142ee35e 100644 --- a/src/TNL/Containers/Expressions/TypeTraits.h +++ b/src/TNL/Containers/Expressions/TypeTraits.h @@ -85,13 +85,70 @@ using EnableIfDistributedBinaryExpression_t = std::enable_if_t< ) >; +// helper trait class for recursively turning expression template classes into compatible vectors +template +struct enable_if_type { typedef R type; }; + +template< typename R, typename Enable = void > +struct RemoveExpressionTemplate +{ + using type = std::decay_t< R >; +}; + +template< typename R > +struct RemoveExpressionTemplate< R, typename enable_if_type< typename std::decay_t< R >::VectorOperandType >::type > +{ + using type = typename RemoveExpressionTemplate< typename std::decay_t< R >::VectorOperandType >::type; +}; + +template< typename R > +using RemoveET = typename RemoveExpressionTemplate< R >::type; + + +template< typename T1, typename T2 > +constexpr std::enable_if_t< + ! ( std::is_arithmetic< T1 >::value && std::is_arithmetic< T2 >::value ) && + ! ( IsStaticArrayType< T1 >::value && IsStaticArrayType< T2 >::value ) && + ! ( IsArrayType< T1 >::value && IsArrayType< T2 >::value ) +, bool > +compatibleForVectorAssignment() +{ + return false; +} + +template< typename T1, typename T2 > +constexpr std::enable_if_t< std::is_arithmetic< T1 >::value && std::is_arithmetic< T2 >::value, bool > +compatibleForVectorAssignment() +{ + return true; +} + +template< typename T1, typename T2 > +constexpr std::enable_if_t< IsStaticArrayType< T1 >::value && IsStaticArrayType< T2 >::value, bool > +compatibleForVectorAssignment() +{ + return T1::getSize() == T2::getSize() && + compatibleForVectorAssignment< typename RemoveET< T1 >::ValueType, typename RemoveET< T2 >::ValueType >(); +} + +template< typename T1, typename T2 > +constexpr std::enable_if_t< IsArrayType< T1 >::value && IsArrayType< T2 >::value, bool > +compatibleForVectorAssignment() +{ + return compatibleForVectorAssignment< typename RemoveET< T1 >::ValueType, typename RemoveET< T2 >::ValueType >(); +} + + // helper trait class for proper classification of expression operands using getExpressionVariableType template< typename T, typename V, - bool enabled = IsVectorType< V >::value > + bool enabled = HasEnabledExpressionTemplates< V >::value || + HasEnabledStaticExpressionTemplates< V >::value || + HasEnabledDistributedExpressionTemplates< V >::value > struct IsArithmeticSubtype : public std::integral_constant< bool, - // TODO: use std::is_assignable? - std::is_same< T, typename std::decay_t< V >::RealType >::value > + // Note that using std::is_same would not be general enough, because e.g. + // StaticVector<3, int> may be assigned to StaticVector<3, double> + compatibleForVectorAssignment< typename V::RealType, T >() > {}; template< typename T > @@ -110,25 +167,6 @@ struct IsArithmeticSubtype< T, V, false > {}; -// helper trait class (used in unit tests) -template -struct enable_if_type { typedef R type; }; - -template< typename R, typename Enable = void > -struct RemoveExpressionTemplate -{ - using type = std::decay_t< R >; -}; - -template< typename R > -struct RemoveExpressionTemplate< R, typename enable_if_type< typename std::decay_t< R >::VectorOperandType >::type > -{ - using type = typename RemoveExpressionTemplate< typename std::decay_t< R >::VectorOperandType >::type; -}; - -template< typename R > -using RemoveET = typename RemoveExpressionTemplate< R >::type; - // helper trait class for Static*ExpressionTemplates classes template< typename R, typename Enable = void > struct OperandMemberType diff --git a/src/UnitTests/Containers/VectorBinaryOperationsTest.h b/src/UnitTests/Containers/VectorBinaryOperationsTest.h index 818d45e32..27c4c46e9 100644 --- a/src/UnitTests/Containers/VectorBinaryOperationsTest.h +++ b/src/UnitTests/Containers/VectorBinaryOperationsTest.h @@ -58,18 +58,18 @@ class VectorBinaryOperationsTest : public ::testing::Test protected: using Left = typename Pair::Left; using Right = typename Pair::Right; + using LeftReal = std::remove_const_t< typename Left::RealType >; + using RightReal = std::remove_const_t< typename Right::RealType >; #ifndef STATIC_VECTOR - using LeftNonConstReal = std::remove_const_t< typename Left::RealType >; - using RightNonConstReal = std::remove_const_t< typename Right::RealType >; #ifdef DISTRIBUTED_VECTOR using CommunicatorType = typename Left::CommunicatorType; static_assert( std::is_same< typename Right::CommunicatorType, CommunicatorType >::value, "CommunicatorType must be the same for both Left and Right vectors." ); - using LeftVector = DistributedVector< LeftNonConstReal, typename Left::DeviceType, typename Left::IndexType, CommunicatorType >; - using RightVector = DistributedVector< RightNonConstReal, typename Right::DeviceType, typename Right::IndexType, CommunicatorType >; + using LeftVector = DistributedVector< LeftReal, typename Left::DeviceType, typename Left::IndexType, CommunicatorType >; + using RightVector = DistributedVector< RightReal, typename Right::DeviceType, typename Right::IndexType, CommunicatorType >; #else - using LeftVector = Vector< LeftNonConstReal, typename Left::DeviceType, typename Left::IndexType >; - using RightVector = Vector< RightNonConstReal, typename Right::DeviceType, typename Right::IndexType >; + using LeftVector = Vector< LeftReal, typename Left::DeviceType, typename Left::IndexType >; + using RightVector = Vector< RightReal, typename Right::DeviceType, typename Right::IndexType >; #endif #endif @@ -132,6 +132,8 @@ protected: #define SETUP_BINARY_TEST_ALIASES \ using Left = typename TestFixture::Left; \ using Right = typename TestFixture::Right; \ + using LeftReal = typename TestFixture::LeftReal; \ + using RightReal = typename TestFixture::RightReal; \ Left& L1 = this->L1; \ Left& L2 = this->L2; \ Right& R1 = this->R1; \ @@ -263,6 +265,8 @@ TYPED_TEST( VectorBinaryOperationsTest, EQ ) EXPECT_EQ( L1, R1 ); // vector or vector view EXPECT_EQ( L1, 1 ); // right scalar EXPECT_EQ( 1, R1 ); // left scalar + EXPECT_EQ( L1, RightReal(1) ); // right scalar + EXPECT_EQ( LeftReal(1), R1 ); // left scalar EXPECT_EQ( L2, R1 + R1 ); // right expression EXPECT_EQ( L1 + L1, R2 ); // left expression EXPECT_EQ( L1 + L1, R1 + R1 ); // two expressions @@ -282,6 +286,8 @@ TYPED_TEST( VectorBinaryOperationsTest, NE ) EXPECT_NE( L1, R2 ); // vector or vector view EXPECT_NE( L1, 2 ); // right scalar EXPECT_NE( 2, R1 ); // left scalar + EXPECT_NE( L1, RightReal(2) ); // right scalar + EXPECT_NE( LeftReal(2), R1 ); // left scalar EXPECT_NE( L1, R1 + R1 ); // right expression EXPECT_NE( L1 + L1, R1 ); // left expression EXPECT_NE( L1 + L1, R2 + R2 ); // two expressions @@ -301,6 +307,8 @@ TYPED_TEST( VectorBinaryOperationsTest, LT ) EXPECT_LT( L1, R2 ); // vector or vector view EXPECT_LT( L1, 2 ); // right scalar EXPECT_LT( 1, R2 ); // left scalar + EXPECT_LT( L1, RightReal(2) ); // right scalar + EXPECT_LT( LeftReal(1), R2 ); // left scalar EXPECT_LT( L1, R1 + R1 ); // right expression EXPECT_LT( L1 - L1, R1 ); // left expression EXPECT_LT( L1 - L1, R1 + R1 ); // two expressions @@ -313,6 +321,8 @@ TYPED_TEST( VectorBinaryOperationsTest, GT ) EXPECT_GT( L2, R1 ); // vector or vector view EXPECT_GT( L2, 1 ); // right scalar EXPECT_GT( 2, R1 ); // left scalar + EXPECT_GT( L2, RightReal(1) ); // right scalar + EXPECT_GT( LeftReal(2), R1 ); // left scalar EXPECT_GT( L1, R1 - R1 ); // right expression EXPECT_GT( L1 + L1, R1 ); // left expression EXPECT_GT( L1 + L1, R1 - R1 ); // two expressions @@ -326,6 +336,8 @@ TYPED_TEST( VectorBinaryOperationsTest, LE ) EXPECT_LE( L1, R2 ); // vector or vector view EXPECT_LE( L1, 2 ); // right scalar EXPECT_LE( 1, R2 ); // left scalar + EXPECT_LE( L1, RightReal(2) ); // right scalar + EXPECT_LE( LeftReal(1), R2 ); // left scalar EXPECT_LE( L1, R1 + R1 ); // right expression EXPECT_LE( L1 - L1, R1 ); // left expression EXPECT_LE( L1 - L1, R1 + R1 ); // two expressions @@ -334,6 +346,8 @@ TYPED_TEST( VectorBinaryOperationsTest, LE ) EXPECT_LE( L1, R1 ); // vector or vector view EXPECT_LE( L1, 1 ); // right scalar EXPECT_LE( 1, R1 ); // left scalar + EXPECT_LE( L1, RightReal(1) ); // right scalar + EXPECT_LE( LeftReal(1), R1 ); // left scalar EXPECT_LE( L2, R1 + R1 ); // right expression EXPECT_LE( L1 + L1, R2 ); // left expression EXPECT_LE( L1 + L1, R1 + R2 ); // two expressions @@ -347,6 +361,8 @@ TYPED_TEST( VectorBinaryOperationsTest, GE ) EXPECT_GE( L2, R1 ); // vector or vector view EXPECT_GE( L2, 1 ); // right scalar EXPECT_GE( 2, R1 ); // left scalar + EXPECT_GE( L2, RightReal(1) ); // right scalar + EXPECT_GE( LeftReal(2), R1 ); // left scalar EXPECT_GE( L1, R1 - R1 ); // right expression EXPECT_GE( L1 + L1, R1 ); // left expression EXPECT_GE( L1 + L1, R1 - R1 ); // two expressions @@ -355,6 +371,8 @@ TYPED_TEST( VectorBinaryOperationsTest, GE ) EXPECT_LE( L1, R1 ); // vector or vector view EXPECT_LE( L1, 1 ); // right scalar EXPECT_LE( 1, R1 ); // left scalar + EXPECT_LE( L1, RightReal(1) ); // right scalar + EXPECT_LE( LeftReal(1), R1 ); // left scalar EXPECT_LE( L2, R1 + R1 ); // right expression EXPECT_LE( L1 + L1, R2 ); // left expression EXPECT_LE( L1 + L1, R1 + R2 ); // two expressions @@ -369,6 +387,8 @@ TYPED_TEST( VectorBinaryOperationsTest, addition ) // with scalar EXPECT_EQ( L1 + 1, 2 ); EXPECT_EQ( 1 + L1, 2 ); + EXPECT_EQ( L1 + LeftReal(1), 2 ); + EXPECT_EQ( LeftReal(1) + L1, 2 ); // with expression EXPECT_EQ( L1 + (L1 + L1), 3 ); EXPECT_EQ( (L1 + L1) + L1, 3 ); @@ -376,6 +396,11 @@ TYPED_TEST( VectorBinaryOperationsTest, addition ) EXPECT_EQ( (L1 + L1) + R1, 3 ); // with two expressions EXPECT_EQ( (L1 + L1) + (L1 + L1), 4 ); + // with expression and scalar + EXPECT_EQ( (L1 + L1) + 1, 3 ); + EXPECT_EQ( (L1 + L1) + RightReal(1), 3 ); + EXPECT_EQ( 1 + (R1 + R1), 3 ); + EXPECT_EQ( LeftReal(1) + (R1 + R1), 3 ); } TYPED_TEST( VectorBinaryOperationsTest, subtraction ) @@ -387,6 +412,8 @@ TYPED_TEST( VectorBinaryOperationsTest, subtraction ) // with scalar EXPECT_EQ( L1 - 1, 0 ); EXPECT_EQ( 1 - L1, 0 ); + EXPECT_EQ( L1 - LeftReal(1), 0 ); + EXPECT_EQ( LeftReal(1) - L1, 0 ); // with expression EXPECT_EQ( L2 - (L1 + L1), 0 ); EXPECT_EQ( (L1 + L1) - L2, 0 ); @@ -394,6 +421,11 @@ TYPED_TEST( VectorBinaryOperationsTest, subtraction ) EXPECT_EQ( (L1 + L1) - R2, 0 ); // with two expressions EXPECT_EQ( (L1 + L1) - (L1 + L1), 0 ); + // with expression and scalar + EXPECT_EQ( (L1 + L1) - 1, 1 ); + EXPECT_EQ( (L1 + L1) - RightReal(1), 1 ); + EXPECT_EQ( 1 - (R1 + R1), -1 ); + EXPECT_EQ( LeftReal(1) - (R1 + R1), -1 ); } TYPED_TEST( VectorBinaryOperationsTest, multiplication ) @@ -405,6 +437,8 @@ TYPED_TEST( VectorBinaryOperationsTest, multiplication ) // with scalar EXPECT_EQ( L1 * 2, L2 ); EXPECT_EQ( 2 * L1, L2 ); + EXPECT_EQ( L1 * LeftReal(2), L2 ); + EXPECT_EQ( LeftReal(2) * L1, L2 ); // with expression EXPECT_EQ( L1 * (L1 + L1), L2 ); EXPECT_EQ( (L1 + L1) * L1, L2 ); @@ -412,6 +446,11 @@ TYPED_TEST( VectorBinaryOperationsTest, multiplication ) EXPECT_EQ( (L1 + L1) * R1, L2 ); // with two expressions EXPECT_EQ( (L1 + L1) * (L1 + L1), 4 ); + // with expression and scalar + EXPECT_EQ( (L1 + L1) * 1, 2 ); + EXPECT_EQ( (L1 + L1) * RightReal(1), 2 ); + EXPECT_EQ( 1 * (R1 + R1), 2 ); + EXPECT_EQ( LeftReal(1) * (R1 + R1), 2 ); } TYPED_TEST( VectorBinaryOperationsTest, division ) @@ -423,6 +462,8 @@ TYPED_TEST( VectorBinaryOperationsTest, division ) // with scalar EXPECT_EQ( L2 / 2, L1 ); EXPECT_EQ( 2 / L2, L1 ); + EXPECT_EQ( L2 / LeftReal(2), L1 ); + EXPECT_EQ( LeftReal(2) / L2, L1 ); // with expression EXPECT_EQ( L2 / (L1 + L1), L1 ); EXPECT_EQ( (L1 + L1) / L2, L1 ); @@ -430,6 +471,11 @@ TYPED_TEST( VectorBinaryOperationsTest, division ) EXPECT_EQ( (L1 + L1) / R2, L1 ); // with two expressions EXPECT_EQ( (L1 + L1) / (L1 + L1), L1 ); + // with expression and scalar + EXPECT_EQ( (L1 + L1) / 1, 2 ); + EXPECT_EQ( (L1 + L1) / RightReal(1), 2 ); + EXPECT_EQ( 2 / (R1 + R1), 1 ); + EXPECT_EQ( LeftReal(2) / (R1 + R1), 1 ); } template< typename Left, typename Right, std::enable_if_t< std::is_const::value, bool > = true > @@ -438,12 +484,15 @@ void test_assignment( Left& L1, Left& L2, Right& R1, Right& R2 ) template< typename Left, typename Right, std::enable_if_t< ! std::is_const::value, bool > = true > void test_assignment( Left& L1, Left& L2, Right& R1, Right& R2 ) { + using RightReal = std::remove_const_t< typename Right::RealType >; // with vector or vector view L1 = R2; EXPECT_EQ( L1, R2 ); // with scalar L1 = 1; EXPECT_EQ( L1, 1 ); + L1 = RightReal(1); + EXPECT_EQ( L1, 1 ); // with expression L1 = R1 + R1; EXPECT_EQ( L1, R1 + R1 ); @@ -460,6 +509,7 @@ void test_add_assignment( Left& L1, Left& L2, Right& R1, Right& R2 ) template< typename Left, typename Right, std::enable_if_t< ! std::is_const::value, bool > = true > void test_add_assignment( Left& L1, Left& L2, Right& R1, Right& R2 ) { + using RightReal = std::remove_const_t< typename Right::RealType >; // with vector or vector view L1 += R2; EXPECT_EQ( L1, R1 + R2 ); @@ -467,6 +517,9 @@ void test_add_assignment( Left& L1, Left& L2, Right& R1, Right& R2 ) L1 = 1; L1 += 2; EXPECT_EQ( L1, 3 ); + L1 = 1; + L1 += RightReal(2); + EXPECT_EQ( L1, 3 ); // with expression L1 = 1; L1 += R1 + R1; @@ -484,6 +537,7 @@ void test_subtract_assignment( Left& L1, Left& L2, Right& R1, Right& R2 ) template< typename Left, typename Right, std::enable_if_t< ! std::is_const::value, bool > = true > void test_subtract_assignment( Left& L1, Left& L2, Right& R1, Right& R2 ) { + using RightReal = std::remove_const_t< typename Right::RealType >; // with vector or vector view L1 -= R2; EXPECT_EQ( L1, R1 - R2 ); @@ -491,6 +545,9 @@ void test_subtract_assignment( Left& L1, Left& L2, Right& R1, Right& R2 ) L1 = 1; L1 -= 2; EXPECT_EQ( L1, -1 ); + L1 = 1; + L1 -= RightReal(2); + EXPECT_EQ( L1, -1 ); // with expression L1 = 1; L1 -= R1 + R1; @@ -508,6 +565,7 @@ void test_multiply_assignment( Left& L1, Left& L2, Right& R1, Right& R2 ) template< typename Left, typename Right, std::enable_if_t< ! std::is_const::value, bool > = true > void test_multiply_assignment( Left& L1, Left& L2, Right& R1, Right& R2 ) { + using RightReal = std::remove_const_t< typename Right::RealType >; // with vector or vector view L1 *= R2; EXPECT_EQ( L1, R2 ); @@ -515,6 +573,9 @@ void test_multiply_assignment( Left& L1, Left& L2, Right& R1, Right& R2 ) L1 = 1; L1 *= 2; EXPECT_EQ( L1, 2 ); + L1 = 1; + L1 *= RightReal(2); + EXPECT_EQ( L1, 2 ); // with expression L1 = 1; L1 *= R1 + R1; @@ -532,6 +593,7 @@ void test_divide_assignment( Left& L1, Left& L2, Right& R1, Right& R2 ) template< typename Left, typename Right, std::enable_if_t< ! std::is_const::value, bool > = true > void test_divide_assignment( Left& L1, Left& L2, Right& R1, Right& R2 ) { + using RightReal = std::remove_const_t< typename Right::RealType >; // with vector or vector view L2 /= R2; EXPECT_EQ( L1, R1 ); @@ -539,6 +601,9 @@ void test_divide_assignment( Left& L1, Left& L2, Right& R1, Right& R2 ) L2 = 2; L2 /= 2; EXPECT_EQ( L1, 1 ); + L1 = 2; + L1 /= RightReal(2); + EXPECT_EQ( L1, 1 ); // with expression L2 = 2; L2 /= R1 + R1; @@ -602,6 +667,11 @@ TYPED_TEST( VectorBinaryOperationsTest, min ) EXPECT_EQ( TNL::min(L1 + L1, R1), R1 ); // with two expressions EXPECT_EQ( TNL::min(L1 + L1, R1 + R2), L2 ); + // with expression and scalar + EXPECT_EQ( TNL::min(L1 + L1, 1), L1 ); + EXPECT_EQ( TNL::min(L1 + L1, RightReal(1)), L1 ); + EXPECT_EQ( TNL::min(1, R1 + R1), L1 ); + EXPECT_EQ( TNL::min(LeftReal(1), R1 + R1), L1 ); } TYPED_TEST( VectorBinaryOperationsTest, max ) @@ -620,6 +690,11 @@ TYPED_TEST( VectorBinaryOperationsTest, max ) EXPECT_EQ( TNL::max(L1 + L1, R1), R2 ); // with two expressions EXPECT_EQ( TNL::max(L1 - L1, R1 + R1), L2 ); + // with expression and scalar + EXPECT_EQ( TNL::max(L1 + L1, 1), L2 ); + EXPECT_EQ( TNL::max(L1 + L1, RightReal(1)), L2 ); + EXPECT_EQ( TNL::max(1, R1 + R1), L2 ); + EXPECT_EQ( TNL::max(LeftReal(1), R1 + R1), L2 ); } #if defined(HAVE_CUDA) && !defined(STATIC_VECTOR) diff --git a/src/UnitTests/Containers/VectorOfStaticVectorsTest.cpp b/src/UnitTests/Containers/VectorOfStaticVectorsTest.cpp index 859cf045c..1f5781818 100644 --- a/src/UnitTests/Containers/VectorOfStaticVectorsTest.cpp +++ b/src/UnitTests/Containers/VectorOfStaticVectorsTest.cpp @@ -1,5 +1 @@ -#define VECTOR_OF_STATIC_VECTORS -#include "VectorBinaryOperationsTest.h" -#include "VectorUnaryOperationsTest.h" -#include "VectorVerticalOperationsTest.h" -#include "../main.h" +#include "VectorOfStaticVectorsTest.h" diff --git a/src/UnitTests/Containers/VectorOfStaticVectorsTest.cu b/src/UnitTests/Containers/VectorOfStaticVectorsTest.cu index 859cf045c..1f5781818 100644 --- a/src/UnitTests/Containers/VectorOfStaticVectorsTest.cu +++ b/src/UnitTests/Containers/VectorOfStaticVectorsTest.cu @@ -1,5 +1 @@ -#define VECTOR_OF_STATIC_VECTORS -#include "VectorBinaryOperationsTest.h" -#include "VectorUnaryOperationsTest.h" -#include "VectorVerticalOperationsTest.h" -#include "../main.h" +#include "VectorOfStaticVectorsTest.h" diff --git a/src/UnitTests/Containers/VectorOfStaticVectorsTest.h b/src/UnitTests/Containers/VectorOfStaticVectorsTest.h new file mode 100644 index 000000000..859cf045c --- /dev/null +++ b/src/UnitTests/Containers/VectorOfStaticVectorsTest.h @@ -0,0 +1,5 @@ +#define VECTOR_OF_STATIC_VECTORS +#include "VectorBinaryOperationsTest.h" +#include "VectorUnaryOperationsTest.h" +#include "VectorVerticalOperationsTest.h" +#include "../main.h" -- GitLab From ac818bfa153661726a147ba897a2795da3e4861b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= Date: Thu, 9 Jul 2020 22:10:31 +0200 Subject: [PATCH 11/15] Added static assert to the function Cuda::getSharedMemory --- src/TNL/Cuda/SharedMemory.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/TNL/Cuda/SharedMemory.h b/src/TNL/Cuda/SharedMemory.h index 29851952c..915bbbf9b 100644 --- a/src/TNL/Cuda/SharedMemory.h +++ b/src/TNL/Cuda/SharedMemory.h @@ -116,6 +116,8 @@ struct SharedMemory< T, 64 > template< typename T > __device__ inline T* getSharedMemory() { + static_assert( sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4 || sizeof(T) == 8, + "Requested type has unsupported size." ); return SharedMemory< T >{}; } -- GitLab From d13fa1f1764c5591bde9b493395111579260be62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= Date: Fri, 10 Jul 2020 00:07:41 +0200 Subject: [PATCH 12/15] Changed CudaReductionKernel to use static instead of dynamic shared memory Fixes #78 --- src/TNL/Algorithms/CudaReductionKernel.h | 73 ++++++++++++------------ 1 file changed, 35 insertions(+), 38 deletions(-) diff --git a/src/TNL/Algorithms/CudaReductionKernel.h b/src/TNL/Algorithms/CudaReductionKernel.h index e495e4375..5e7af9731 100644 --- a/src/TNL/Algorithms/CudaReductionKernel.h +++ b/src/TNL/Algorithms/CudaReductionKernel.h @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include @@ -54,11 +53,15 @@ CudaReductionKernel( const Result zero, const Index end, Result* output ) { - Result* sdata = Cuda::getSharedMemory< Result >(); + TNL_ASSERT_EQ( blockDim.x, blockSize, "unexpected block size in CudaReductionKernel" ); + // when there is only one warp per blockSize.x, we need to allocate two warps + // worth of shared memory so that we don't index shared memory out of bounds + constexpr int shmemElements = (blockSize <= 32) ? 2 * blockSize : blockSize; + __shared__ Result sdata[shmemElements]; // Get the thread id (tid), global thread id (gid) and gridSize. const Index tid = threadIdx.x; - Index gid = begin + blockIdx.x * blockDim. x + threadIdx.x; + Index gid = begin + blockIdx.x * blockDim.x + threadIdx.x; const Index gridSize = blockDim.x * gridDim.x; sdata[ tid ] = zero; @@ -150,12 +153,16 @@ CudaReductionWithArgumentKernel( const Result zero, Index* idxOutput, const Index* idxInput = nullptr ) { - Result* sdata = Cuda::getSharedMemory< Result >(); - Index* sidx = reinterpret_cast< Index* >( &sdata[ blockDim.x ] ); + TNL_ASSERT_EQ( blockDim.x, blockSize, "unexpected block size in CudaReductionKernel" ); + // when there is only one warp per blockSize.x, we need to allocate two warps + // worth of shared memory so that we don't index shared memory out of bounds + constexpr int shmemElements = (blockSize <= 32) ? 2 * blockSize : blockSize; + __shared__ Result sdata[shmemElements]; + __shared__ Index sidx[shmemElements]; // Get the thread id (tid), global thread id (gid) and gridSize. const Index tid = threadIdx.x; - Index gid = begin + blockIdx.x * blockDim. x + threadIdx.x; + Index gid = begin + blockIdx.x * blockDim.x + threadIdx.x; const Index gridSize = blockDim.x * gridDim.x; // Start with the sequential reduction and push the result into the shared memory. @@ -409,12 +416,6 @@ struct CudaReductionKernelLauncher blockSize.x = Reduction_maxThreadsPerBlock; gridSize.x = TNL::min( Cuda::getNumberOfBlocks( size, blockSize.x ), desGridSize ); - // when there is only one warp per blockSize.x, we need to allocate two warps - // worth of shared memory so that we don't index shared memory out of bounds - const Index shmem = (blockSize.x <= 32) - ? 2 * blockSize.x * sizeof( Result ) - : blockSize.x * sizeof( Result ); - // This is "general", but this method always sets blockSize.x to a specific value, // so runtime switch is not necessary - it only prolongs the compilation time. /* @@ -423,55 +424,55 @@ struct CudaReductionKernelLauncher { case 512: CudaReductionKernel< 512 > - <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, size, output); + <<< gridSize, blockSize >>>( zero, dataFetcher, reduction, size, output); break; case 256: cudaFuncSetCacheConfig(CudaReductionKernel< 256, Result, DataFetcher, Reduction, Index >, cudaFuncCachePreferShared); CudaReductionKernel< 256 > - <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, size, output); + <<< gridSize, blockSize >>>( zero, dataFetcher, reduction, size, output); break; case 128: cudaFuncSetCacheConfig(CudaReductionKernel< 128, Result, DataFetcher, Reduction, Index >, cudaFuncCachePreferShared); CudaReductionKernel< 128 > - <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, size, output); + <<< gridSize, blockSize >>>( zero, dataFetcher, reduction, size, output); break; case 64: cudaFuncSetCacheConfig(CudaReductionKernel< 64, Result, DataFetcher, Reduction, Index >, cudaFuncCachePreferShared); CudaReductionKernel< 64 > - <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, size, output); + <<< gridSize, blockSize >>>( zero, dataFetcher, reduction, size, output); break; case 32: cudaFuncSetCacheConfig(CudaReductionKernel< 32, Result, DataFetcher, Reduction, Index >, cudaFuncCachePreferShared); CudaReductionKernel< 32 > - <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, size, output); + <<< gridSize, blockSize >>>( zero, dataFetcher, reduction, size, output); break; case 16: cudaFuncSetCacheConfig(CudaReductionKernel< 16, Result, DataFetcher, Reduction, Index >, cudaFuncCachePreferShared); CudaReductionKernel< 16 > - <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, size, output); + <<< gridSize, blockSize >>>( zero, dataFetcher, reduction, size, output); break; case 8: cudaFuncSetCacheConfig(CudaReductionKernel< 8, Result, DataFetcher, Reduction, Index >, cudaFuncCachePreferShared); CudaReductionKernel< 8 > - <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, size, output); + <<< gridSize, blockSize >>>( zero, dataFetcher, reduction, size, output); break; case 4: cudaFuncSetCacheConfig(CudaReductionKernel< 4, Result, DataFetcher, Reduction, Index >, cudaFuncCachePreferShared); CudaReductionKernel< 4 > - <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, size, output); + <<< gridSize, blockSize >>>( zero, dataFetcher, reduction, size, output); break; case 2: cudaFuncSetCacheConfig(CudaReductionKernel< 2, Result, DataFetcher, Reduction, Index >, cudaFuncCachePreferShared); CudaReductionKernel< 2 > - <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, size, output); + <<< gridSize, blockSize >>>( zero, dataFetcher, reduction, size, output); break; case 1: TNL_ASSERT( false, std::cerr << "blockSize should not be 1." << std::endl ); @@ -486,8 +487,9 @@ struct CudaReductionKernelLauncher if( blockSize.x == Reduction_maxThreadsPerBlock ) { cudaFuncSetCacheConfig(CudaReductionKernel< Reduction_maxThreadsPerBlock, Result, DataFetcher, Reduction, Index >, cudaFuncCachePreferShared); + // shared memory is allocated statically inside the kernel CudaReductionKernel< Reduction_maxThreadsPerBlock > - <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, begin, end, output); + <<< gridSize, blockSize >>>( zero, dataFetcher, reduction, begin, end, output); cudaStreamSynchronize(0); TNL_CHECK_CUDA_DEVICE; } @@ -519,12 +521,6 @@ struct CudaReductionKernelLauncher blockSize.x = Reduction_maxThreadsPerBlock; gridSize.x = TNL::min( Cuda::getNumberOfBlocks( size, blockSize.x ), desGridSize ); - // when there is only one warp per blockSize.x, we need to allocate two warps - // worth of shared memory so that we don't index shared memory out of bounds - const Index shmem = (blockSize.x <= 32) - ? 2 * blockSize.x * ( sizeof( Result ) + sizeof( Index ) ) - : blockSize.x * ( sizeof( Result ) + sizeof( Index ) ); - // This is "general", but this method always sets blockSize.x to a specific value, // so runtime switch is not necessary - it only prolongs the compilation time. /* @@ -533,55 +529,55 @@ struct CudaReductionKernelLauncher { case 512: CudaReductionWithArgumentKernel< 512 > - <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, size, output, idxOutput, idxInput ); + <<< gridSize, blockSize >>>( zero, dataFetcher, reduction, size, output, idxOutput, idxInput ); break; case 256: cudaFuncSetCacheConfig(CudaReductionWithArgumentKernel< 256, Result, DataFetcher, Reduction, Index >, cudaFuncCachePreferShared); CudaReductionWithArgumentKernel< 256 > - <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, size, output, idxOutput, idxInput ); + <<< gridSize, blockSize >>>( zero, dataFetcher, reduction, size, output, idxOutput, idxInput ); break; case 128: cudaFuncSetCacheConfig(CudaReductionWithArgumentKernel< 128, Result, DataFetcher, Reduction, Index >, cudaFuncCachePreferShared); CudaReductionWithArgumentKernel< 128 > - <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, size, output, idxOutput, idxInput ); + <<< gridSize, blockSize >>>( zero, dataFetcher, reduction, size, output, idxOutput, idxInput ); break; case 64: cudaFuncSetCacheConfig(CudaReductionWithArgumentKernel< 64, Result, DataFetcher, Reduction, Index >, cudaFuncCachePreferShared); CudaReductionWithArgumentKernel< 64 > - <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, size, output, idxOutput, idxInput ); + <<< gridSize, blockSize >>>( zero, dataFetcher, reduction, size, output, idxOutput, idxInput ); break; case 32: cudaFuncSetCacheConfig(CudaReductionWithArgumentKernel< 32, Result, DataFetcher, Reduction, Index >, cudaFuncCachePreferShared); CudaReductionWithArgumentKernel< 32 > - <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, size, output, idxOutput, idxInput ); + <<< gridSize, blockSize >>>( zero, dataFetcher, reduction, size, output, idxOutput, idxInput ); break; case 16: cudaFuncSetCacheConfig(CudaReductionWithArgumentKernel< 16, Result, DataFetcher, Reduction, Index >, cudaFuncCachePreferShared); CudaReductionWithArgumentKernel< 16 > - <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, size, output, idxOutput, idxInput ); + <<< gridSize, blockSize >>>( zero, dataFetcher, reduction, size, output, idxOutput, idxInput ); break; case 8: cudaFuncSetCacheConfig(CudaReductionWithArgumentKernel< 8, Result, DataFetcher, Reduction, Index >, cudaFuncCachePreferShared); CudaReductionWithArgumentKernel< 8 > - <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, size, output, idxOutput, idxInput ); + <<< gridSize, blockSize >>>( zero, dataFetcher, reduction, size, output, idxOutput, idxInput ); break; case 4: cudaFuncSetCacheConfig(CudaReductionWithArgumentKernel< 4, Result, DataFetcher, Reduction, Index >, cudaFuncCachePreferShared); CudaReductionWithArgumentKernel< 4 > - <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, size, output, idxOutput, idxInput ); + <<< gridSize, blockSize >>>( zero, dataFetcher, reduction, size, output, idxOutput, idxInput ); break; case 2: cudaFuncSetCacheConfig(CudaReductionWithArgumentKernel< 2, Result, DataFetcher, Reduction, Index >, cudaFuncCachePreferShared); CudaReductionWithArgumentKernel< 2 > - <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, size, output, idxOutput, idxInput ); + <<< gridSize, blockSize >>>( zero, dataFetcher, reduction, size, output, idxOutput, idxInput ); break; case 1: TNL_ASSERT( false, std::cerr << "blockSize should not be 1." << std::endl ); @@ -596,8 +592,9 @@ struct CudaReductionKernelLauncher if( blockSize.x == Reduction_maxThreadsPerBlock ) { cudaFuncSetCacheConfig(CudaReductionWithArgumentKernel< Reduction_maxThreadsPerBlock, Result, DataFetcher, Reduction, Index >, cudaFuncCachePreferShared); + // shared memory is allocated statically inside the kernel CudaReductionWithArgumentKernel< Reduction_maxThreadsPerBlock > - <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, begin, end, output, idxOutput, idxInput ); + <<< gridSize, blockSize >>>( zero, dataFetcher, reduction, begin, end, output, idxOutput, idxInput ); cudaStreamSynchronize(0); TNL_CHECK_CUDA_DEVICE; } -- GitLab From e741aa95e781838e2bc659ad113cc033a2afa5f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= Date: Fri, 10 Jul 2020 13:13:34 +0200 Subject: [PATCH 13/15] Added CudaReductionFunctorWrapper to avoid nvcc's fucked up error --- src/TNL/Algorithms/CudaReductionKernel.h | 53 ++++++++++++++++-------- 1 file changed, 36 insertions(+), 17 deletions(-) diff --git a/src/TNL/Algorithms/CudaReductionKernel.h b/src/TNL/Algorithms/CudaReductionKernel.h index 5e7af9731..4519678bd 100644 --- a/src/TNL/Algorithms/CudaReductionKernel.h +++ b/src/TNL/Algorithms/CudaReductionKernel.h @@ -39,6 +39,25 @@ static constexpr int Reduction_registersPerThread = 32; // empirically determi static constexpr int Reduction_minBlocksPerMultiprocessor = 8; #endif +/* + * nvcc (as of 10.2) is totally fucked up, in some cases it does not recognize the + * std::plus::operator() function to be constexpr and hence __host__ __device__ + * (for example, when the arguments are StaticVector<3, double> etc). Hence, we use + * this wrapper which triggers only a warning and not an error as is the case when + * the reduction functor is called from a __global__ or __device__ function. Let's + * hope it works otherwise... + */ +template< typename Reduction, typename Arg1, typename Arg2 > +__host__ __device__ +auto CudaReductionFunctorWrapper( Reduction&& reduction, Arg1&& arg1, Arg2&& arg2 ) +{ +// let's suppress the aforementioned warning... +#pragma push +#pragma diag_suppress 2979 + return std::forward(reduction)( std::forward(arg1), std::forward(arg2) ); +#pragma pop +} + template< int blockSize, typename Result, typename DataFetcher, @@ -68,19 +87,19 @@ CudaReductionKernel( const Result zero, // Start with the sequential reduction and push the result into the shared memory. while( gid + 4 * gridSize < end ) { - sdata[ tid ] = reduction( sdata[ tid ], dataFetcher( gid ) ); - sdata[ tid ] = reduction( sdata[ tid ], dataFetcher( gid + gridSize ) ); - sdata[ tid ] = reduction( sdata[ tid ], dataFetcher( gid + 2 * gridSize ) ); - sdata[ tid ] = reduction( sdata[ tid ], dataFetcher( gid + 3 * gridSize ) ); + sdata[ tid ] = CudaReductionFunctorWrapper( reduction, sdata[ tid ], dataFetcher( gid ) ); + sdata[ tid ] = CudaReductionFunctorWrapper( reduction, sdata[ tid ], dataFetcher( gid + gridSize ) ); + sdata[ tid ] = CudaReductionFunctorWrapper( reduction, sdata[ tid ], dataFetcher( gid + 2 * gridSize ) ); + sdata[ tid ] = CudaReductionFunctorWrapper( reduction, sdata[ tid ], dataFetcher( gid + 3 * gridSize ) ); gid += 4 * gridSize; } while( gid + 2 * gridSize < end ) { - sdata[ tid ] = reduction( sdata[ tid ], dataFetcher( gid ) ); - sdata[ tid ] = reduction( sdata[ tid ], dataFetcher( gid + gridSize ) ); + sdata[ tid ] = CudaReductionFunctorWrapper( reduction, sdata[ tid ], dataFetcher( gid ) ); + sdata[ tid ] = CudaReductionFunctorWrapper( reduction, sdata[ tid ], dataFetcher( gid + gridSize ) ); gid += 2 * gridSize; } while( gid < end ) { - sdata[ tid ] = reduction( sdata[ tid ], dataFetcher( gid ) ); + sdata[ tid ] = CudaReductionFunctorWrapper( reduction, sdata[ tid ], dataFetcher( gid ) ); gid += gridSize; } __syncthreads(); @@ -88,48 +107,48 @@ CudaReductionKernel( const Result zero, // Perform the parallel reduction. if( blockSize >= 1024 ) { if( tid < 512 ) - sdata[ tid ] = reduction( sdata[ tid ], sdata[ tid + 512 ] ); + sdata[ tid ] = CudaReductionFunctorWrapper( reduction, sdata[ tid ], sdata[ tid + 512 ] ); __syncthreads(); } if( blockSize >= 512 ) { if( tid < 256 ) - sdata[ tid ] = reduction( sdata[ tid ], sdata[ tid + 256 ] ); + sdata[ tid ] = CudaReductionFunctorWrapper( reduction, sdata[ tid ], sdata[ tid + 256 ] ); __syncthreads(); } if( blockSize >= 256 ) { if( tid < 128 ) - sdata[ tid ] = reduction( sdata[ tid ], sdata[ tid + 128 ] ); + sdata[ tid ] = CudaReductionFunctorWrapper( reduction, sdata[ tid ], sdata[ tid + 128 ] ); __syncthreads(); } if( blockSize >= 128 ) { if( tid < 64 ) - sdata[ tid ] = reduction( sdata[ tid ], sdata[ tid + 64 ] ); + sdata[ tid ] = CudaReductionFunctorWrapper( reduction, sdata[ tid ], sdata[ tid + 64 ] ); __syncthreads(); } // This runs in one warp so we use __syncwarp() instead of __syncthreads(). if( tid < 32 ) { if( blockSize >= 64 ) - sdata[ tid ] = reduction( sdata[ tid ], sdata[ tid + 32 ] ); + sdata[ tid ] = CudaReductionFunctorWrapper( reduction, sdata[ tid ], sdata[ tid + 32 ] ); __syncwarp(); // Note that here we do not have to check if tid < 16 etc, because we have // 2 * blockSize.x elements of shared memory per block, so we do not // access out of bounds. The results for the upper half will be undefined, // but unused anyway. if( blockSize >= 32 ) - sdata[ tid ] = reduction( sdata[ tid ], sdata[ tid + 16 ] ); + sdata[ tid ] = CudaReductionFunctorWrapper( reduction, sdata[ tid ], sdata[ tid + 16 ] ); __syncwarp(); if( blockSize >= 16 ) - sdata[ tid ] = reduction( sdata[ tid ], sdata[ tid + 8 ] ); + sdata[ tid ] = CudaReductionFunctorWrapper( reduction, sdata[ tid ], sdata[ tid + 8 ] ); __syncwarp(); if( blockSize >= 8 ) - sdata[ tid ] = reduction( sdata[ tid ], sdata[ tid + 4 ] ); + sdata[ tid ] = CudaReductionFunctorWrapper( reduction, sdata[ tid ], sdata[ tid + 4 ] ); __syncwarp(); if( blockSize >= 4 ) - sdata[ tid ] = reduction( sdata[ tid ], sdata[ tid + 2 ] ); + sdata[ tid ] = CudaReductionFunctorWrapper( reduction, sdata[ tid ], sdata[ tid + 2 ] ); __syncwarp(); if( blockSize >= 2 ) - sdata[ tid ] = reduction( sdata[ tid ], sdata[ tid + 1 ] ); + sdata[ tid ] = CudaReductionFunctorWrapper( reduction, sdata[ tid ], sdata[ tid + 1 ] ); } // Store the result back in the global memory. -- GitLab From 8606d15edb7f8599faba0c20e78705c2509c2583 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= Date: Fri, 10 Jul 2020 14:08:37 +0200 Subject: [PATCH 14/15] Fixed VectorAssignmentWithOperation and changed types for VectorBinaryOperationsTest to cover operands with different types --- src/TNL/Containers/detail/VectorAssignment.h | 8 +- .../Containers/VectorBinaryOperationsTest.h | 102 +++++++++--------- 2 files changed, 55 insertions(+), 55 deletions(-) diff --git a/src/TNL/Containers/detail/VectorAssignment.h b/src/TNL/Containers/detail/VectorAssignment.h index 5f273d5f0..5a36d971c 100644 --- a/src/TNL/Containers/detail/VectorAssignment.h +++ b/src/TNL/Containers/detail/VectorAssignment.h @@ -120,22 +120,22 @@ struct VectorAssignmentWithOperation< Vector, T, true, true > { static void addition( Vector& v, const T& t ) { - VectorAssignmentWithOperation< Vector, typename Vector::ConstViewType >::addition( v, t.getConstView() ); + VectorAssignmentWithOperation< Vector, typename T::ConstViewType >::addition( v, t.getConstView() ); } static void subtraction( Vector& v, const T& t ) { - VectorAssignmentWithOperation< Vector, typename Vector::ConstViewType >::subtraction( v, t.getConstView() ); + VectorAssignmentWithOperation< Vector, typename T::ConstViewType >::subtraction( v, t.getConstView() ); } static void multiplication( Vector& v, const T& t ) { - VectorAssignmentWithOperation< Vector, typename Vector::ConstViewType >::multiplication( v, t.getConstView() ); + VectorAssignmentWithOperation< Vector, typename T::ConstViewType >::multiplication( v, t.getConstView() ); } static void division( Vector& v, const T& t ) { - VectorAssignmentWithOperation< Vector, typename Vector::ConstViewType >::subtraction( v, t.getConstView() ); + VectorAssignmentWithOperation< Vector, typename T::ConstViewType >::subtraction( v, t.getConstView() ); } }; diff --git a/src/UnitTests/Containers/VectorBinaryOperationsTest.h b/src/UnitTests/Containers/VectorBinaryOperationsTest.h index 27c4c46e9..e438d649f 100644 --- a/src/UnitTests/Containers/VectorBinaryOperationsTest.h +++ b/src/UnitTests/Containers/VectorBinaryOperationsTest.h @@ -147,62 +147,62 @@ protected: #if defined(DISTRIBUTED_VECTOR) using VectorPairs = ::testing::Types< #ifndef HAVE_CUDA - Pair< DistributedVector< double, Devices::Host, int, Communicators::MpiCommunicator >, - DistributedVector< double, Devices::Host, int, Communicators::MpiCommunicator > >, - Pair< DistributedVector< double, Devices::Host, int, Communicators::MpiCommunicator >, - DistributedVectorView< double, Devices::Host, int, Communicators::MpiCommunicator > >, - Pair< DistributedVectorView< double, Devices::Host, int, Communicators::MpiCommunicator >, - DistributedVector< double, Devices::Host, int, Communicators::MpiCommunicator > >, - Pair< DistributedVectorView< double, Devices::Host, int, Communicators::MpiCommunicator >, - DistributedVectorView< double, Devices::Host, int, Communicators::MpiCommunicator > >, - - Pair< DistributedVector< double, Devices::Host, int, Communicators::NoDistrCommunicator >, - DistributedVector< double, Devices::Host, int, Communicators::NoDistrCommunicator > >, - Pair< DistributedVector< double, Devices::Host, int, Communicators::NoDistrCommunicator >, - DistributedVectorView< double, Devices::Host, int, Communicators::NoDistrCommunicator > >, - Pair< DistributedVectorView< double, Devices::Host, int, Communicators::NoDistrCommunicator >, - DistributedVector< double, Devices::Host, int, Communicators::NoDistrCommunicator > >, - Pair< DistributedVectorView< double, Devices::Host, int, Communicators::NoDistrCommunicator >, - DistributedVectorView< double, Devices::Host, int, Communicators::NoDistrCommunicator > > + Pair< DistributedVector< int, Devices::Host, int, Communicators::MpiCommunicator >, + DistributedVector< short, Devices::Host, int, Communicators::MpiCommunicator > >, + Pair< DistributedVector< int, Devices::Host, int, Communicators::MpiCommunicator >, + DistributedVectorView< short, Devices::Host, int, Communicators::MpiCommunicator > >, + Pair< DistributedVectorView< int, Devices::Host, int, Communicators::MpiCommunicator >, + DistributedVector< short, Devices::Host, int, Communicators::MpiCommunicator > >, + Pair< DistributedVectorView< int, Devices::Host, int, Communicators::MpiCommunicator >, + DistributedVectorView< short, Devices::Host, int, Communicators::MpiCommunicator > >, + + Pair< DistributedVector< int, Devices::Host, int, Communicators::NoDistrCommunicator >, + DistributedVector< short, Devices::Host, int, Communicators::NoDistrCommunicator > >, + Pair< DistributedVector< int, Devices::Host, int, Communicators::NoDistrCommunicator >, + DistributedVectorView< short, Devices::Host, int, Communicators::NoDistrCommunicator > >, + Pair< DistributedVectorView< int, Devices::Host, int, Communicators::NoDistrCommunicator >, + DistributedVector< short, Devices::Host, int, Communicators::NoDistrCommunicator > >, + Pair< DistributedVectorView< int, Devices::Host, int, Communicators::NoDistrCommunicator >, + DistributedVectorView< short, Devices::Host, int, Communicators::NoDistrCommunicator > > #else - Pair< DistributedVector< double, Devices::Cuda, int, Communicators::MpiCommunicator >, - DistributedVector< double, Devices::Cuda, int, Communicators::MpiCommunicator > >, - Pair< DistributedVector< double, Devices::Cuda, int, Communicators::MpiCommunicator >, - DistributedVectorView< double, Devices::Cuda, int, Communicators::MpiCommunicator > >, - Pair< DistributedVectorView< double, Devices::Cuda, int, Communicators::MpiCommunicator >, - DistributedVector< double, Devices::Cuda, int, Communicators::MpiCommunicator > >, - Pair< DistributedVectorView< double, Devices::Cuda, int, Communicators::MpiCommunicator >, - DistributedVectorView< double, Devices::Cuda, int, Communicators::MpiCommunicator > >, - Pair< DistributedVector< double, Devices::Cuda, int, Communicators::NoDistrCommunicator >, - DistributedVector< double, Devices::Cuda, int, Communicators::NoDistrCommunicator > >, - Pair< DistributedVector< double, Devices::Cuda, int, Communicators::NoDistrCommunicator >, - DistributedVectorView< double, Devices::Cuda, int, Communicators::NoDistrCommunicator > >, - Pair< DistributedVectorView< double, Devices::Cuda, int, Communicators::NoDistrCommunicator >, - DistributedVector< double, Devices::Cuda, int, Communicators::NoDistrCommunicator > >, - Pair< DistributedVectorView< double, Devices::Cuda, int, Communicators::NoDistrCommunicator >, - DistributedVectorView< double, Devices::Cuda, int, Communicators::NoDistrCommunicator > > + Pair< DistributedVector< int, Devices::Cuda, int, Communicators::MpiCommunicator >, + DistributedVector< short, Devices::Cuda, int, Communicators::MpiCommunicator > >, + Pair< DistributedVector< int, Devices::Cuda, int, Communicators::MpiCommunicator >, + DistributedVectorView< short, Devices::Cuda, int, Communicators::MpiCommunicator > >, + Pair< DistributedVectorView< int, Devices::Cuda, int, Communicators::MpiCommunicator >, + DistributedVector< short, Devices::Cuda, int, Communicators::MpiCommunicator > >, + Pair< DistributedVectorView< int, Devices::Cuda, int, Communicators::MpiCommunicator >, + DistributedVectorView< short, Devices::Cuda, int, Communicators::MpiCommunicator > >, + Pair< DistributedVector< int, Devices::Cuda, int, Communicators::NoDistrCommunicator >, + DistributedVector< short, Devices::Cuda, int, Communicators::NoDistrCommunicator > >, + Pair< DistributedVector< int, Devices::Cuda, int, Communicators::NoDistrCommunicator >, + DistributedVectorView< short, Devices::Cuda, int, Communicators::NoDistrCommunicator > >, + Pair< DistributedVectorView< int, Devices::Cuda, int, Communicators::NoDistrCommunicator >, + DistributedVector< short, Devices::Cuda, int, Communicators::NoDistrCommunicator > >, + Pair< DistributedVectorView< int, Devices::Cuda, int, Communicators::NoDistrCommunicator >, + DistributedVectorView< short, Devices::Cuda, int, Communicators::NoDistrCommunicator > > #endif >; #elif defined(STATIC_VECTOR) #ifdef VECTOR_OF_STATIC_VECTORS using VectorPairs = ::testing::Types< - Pair< StaticVector< 1, StaticVector< 3, double > >, StaticVector< 1, StaticVector< 3, double > > >, - Pair< StaticVector< 2, StaticVector< 3, double > >, StaticVector< 2, StaticVector< 3, double > > >, - Pair< StaticVector< 3, StaticVector< 3, double > >, StaticVector< 3, StaticVector< 3, double > > >, - Pair< StaticVector< 4, StaticVector< 3, double > >, StaticVector< 4, StaticVector< 3, double > > >, - Pair< StaticVector< 5, StaticVector< 3, double > >, StaticVector< 5, StaticVector< 3, double > > > + Pair< StaticVector< 1, StaticVector< 3, int > >, StaticVector< 1, StaticVector< 3, short > > >, + Pair< StaticVector< 2, StaticVector< 3, int > >, StaticVector< 2, StaticVector< 3, short > > >, + Pair< StaticVector< 3, StaticVector< 3, int > >, StaticVector< 3, StaticVector< 3, short > > >, + Pair< StaticVector< 4, StaticVector< 3, int > >, StaticVector< 4, StaticVector< 3, short > > >, + Pair< StaticVector< 5, StaticVector< 3, int > >, StaticVector< 5, StaticVector< 3, short > > > >; #else using VectorPairs = ::testing::Types< - Pair< StaticVector< 1, int >, StaticVector< 1, int > >, + Pair< StaticVector< 1, int >, StaticVector< 1, short > >, Pair< StaticVector< 1, double >, StaticVector< 1, double > >, - Pair< StaticVector< 2, int >, StaticVector< 2, int > >, + Pair< StaticVector< 2, int >, StaticVector< 2, short > >, Pair< StaticVector< 2, double >, StaticVector< 2, double > >, - Pair< StaticVector< 3, int >, StaticVector< 3, int > >, + Pair< StaticVector< 3, int >, StaticVector< 3, short > >, Pair< StaticVector< 3, double >, StaticVector< 3, double > >, - Pair< StaticVector< 4, int >, StaticVector< 4, int > >, + Pair< StaticVector< 4, int >, StaticVector< 4, short > >, Pair< StaticVector< 4, double >, StaticVector< 4, double > >, - Pair< StaticVector< 5, int >, StaticVector< 5, int > >, + Pair< StaticVector< 5, int >, StaticVector< 5, short > >, Pair< StaticVector< 5, double >, StaticVector< 5, double > > >; #endif @@ -210,15 +210,15 @@ protected: #ifdef VECTOR_OF_STATIC_VECTORS using VectorPairs = ::testing::Types< #ifndef HAVE_CUDA - Pair< Vector< StaticVector< 3, double >, Devices::Host >, Vector< StaticVector< 3, double >, Devices::Host > >, - Pair< VectorView< StaticVector< 3, double >, Devices::Host >, Vector< StaticVector< 3, double >, Devices::Host > >, - Pair< Vector< StaticVector< 3, double >, Devices::Host >, VectorView< StaticVector< 3, double >, Devices::Host > >, - Pair< VectorView< StaticVector< 3, double >, Devices::Host >, VectorView< StaticVector< 3, double >, Devices::Host > > + Pair< Vector< StaticVector< 3, int >, Devices::Host >, Vector< StaticVector< 3, short >, Devices::Host > >, + Pair< VectorView< StaticVector< 3, int >, Devices::Host >, Vector< StaticVector< 3, short >, Devices::Host > >, + Pair< Vector< StaticVector< 3, int >, Devices::Host >, VectorView< StaticVector< 3, short >, Devices::Host > >, + Pair< VectorView< StaticVector< 3, int >, Devices::Host >, VectorView< StaticVector< 3, short >, Devices::Host > > #else - Pair< Vector< StaticVector< 3, double >, Devices::Cuda >, Vector< StaticVector< 3, double >, Devices::Cuda > >, - Pair< VectorView< StaticVector< 3, double >, Devices::Cuda >, Vector< StaticVector< 3, double >, Devices::Cuda > >, - Pair< Vector< StaticVector< 3, double >, Devices::Cuda >, VectorView< StaticVector< 3, double >, Devices::Cuda > >, - Pair< VectorView< StaticVector< 3, double >, Devices::Cuda >, VectorView< StaticVector< 3, double >, Devices::Cuda > > + Pair< Vector< StaticVector< 3, int >, Devices::Cuda >, Vector< StaticVector< 3, short >, Devices::Cuda > >, + Pair< VectorView< StaticVector< 3, int >, Devices::Cuda >, Vector< StaticVector< 3, short >, Devices::Cuda > >, + Pair< Vector< StaticVector< 3, int >, Devices::Cuda >, VectorView< StaticVector< 3, short >, Devices::Cuda > >, + Pair< VectorView< StaticVector< 3, int >, Devices::Cuda >, VectorView< StaticVector< 3, short >, Devices::Cuda > > #endif >; #else -- GitLab From bacbfb842066fa943e6efed85915571473ae5a98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= Date: Fri, 10 Jul 2020 14:21:50 +0200 Subject: [PATCH 15/15] Removed unnecessary workaround for pybind11's format_descriptor --- src/Python/pytnl/tnl/Array.h | 23 +++++------------------ 1 file changed, 5 insertions(+), 18 deletions(-) diff --git a/src/Python/pytnl/tnl/Array.h b/src/Python/pytnl/tnl/Array.h index b39fe6b0e..5d404f53b 100644 --- a/src/Python/pytnl/tnl/Array.h +++ b/src/Python/pytnl/tnl/Array.h @@ -4,25 +4,14 @@ #include namespace py = pybind11; +// including pybind11/numpy.h is needed for the specializations of py::format_descriptor +// for enum types, see https://github.com/pybind/pybind11/issues/2135 +#include + #include "../tnl_indexing.h" #include - -// pybind11 should actually take care of this inside py::format_descriptor, but apparently it does not work... -// see https://github.com/pybind/pybind11/issues/2135 -template< typename T, typename = void > -struct underlying_type -{ - using type = T; -}; -template< typename T > -struct underlying_type< T, std::enable_if_t< std::is_enum< T >::value > > -{ - using type = std::underlying_type_t< T >; -}; - - template< typename ArrayType > void export_Array(py::module & m, const char* name) { @@ -64,9 +53,7 @@ void export_Array(py::module & m, const char* name) // Size of one scalar sizeof( typename ArrayType::ValueType ), // Python struct-style format descriptor - py::format_descriptor< - typename underlying_type< typename ArrayType::ValueType >::type - >::format(), + py::format_descriptor< typename ArrayType::ValueType >::format(), // Number of dimensions 1, // Buffer dimensions -- GitLab