From 34680051a3fda10d1a4d889e517ba565d123f806 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Wed, 17 Apr 2019 21:54:55 +0200 Subject: [PATCH 01/93] Adding expression templates. --- .../Containers/Algorithms/ArrayAssignment.h | 6 +- .../Containers/Algorithms/VectorAssignment.h | 87 ++++++++ src/TNL/Containers/StaticArray.h | 59 +++--- src/TNL/Containers/StaticArray1D_impl.h | 16 +- src/TNL/Containers/StaticArray2D_impl.h | 16 +- src/TNL/Containers/StaticArray3D_impl.h | 16 +- src/TNL/Containers/StaticArray_impl.h | 16 +- src/TNL/Containers/StaticVector.h | 62 ++++-- src/TNL/Containers/StaticVector1D_impl.h | 14 +- src/TNL/Containers/StaticVector2D_impl.h | 17 +- src/TNL/Containers/StaticVector3D_impl.h | 19 +- src/TNL/Containers/StaticVector_impl.h | 12 +- src/TNL/Containers/VectorView.h | 5 + src/TNL/Containers/VectorView_impl.h | 55 +++++ src/UnitTests/CMakeLists.txt | 8 + .../ExpressionTemplatesDynamicTest.cpp | 194 ++++++++++++++++++ .../ExpressionTemplatesStaticTest.cpp | 181 ++++++++++++++++ 17 files changed, 689 insertions(+), 94 deletions(-) create mode 100644 src/TNL/Containers/Algorithms/VectorAssignment.h create mode 100644 src/UnitTests/ExpressionTemplatesDynamicTest.cpp create mode 100644 src/UnitTests/ExpressionTemplatesStaticTest.cpp diff --git a/src/TNL/Containers/Algorithms/ArrayAssignment.h b/src/TNL/Containers/Algorithms/ArrayAssignment.h index 7f5b8482e..73df45c25 100644 --- a/src/TNL/Containers/Algorithms/ArrayAssignment.h +++ b/src/TNL/Containers/Algorithms/ArrayAssignment.h @@ -1,8 +1,8 @@ /*************************************************************************** - ArrayOperations.h - description + ArrayAssignment.h - description ------------------- - begin : Jul 15, 2013 - copyright : (C) 2013 by Tomas Oberhuber + begin : Apr 4, 2019 + copyright : (C) 2019 by Tomas Oberhuber email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ diff --git a/src/TNL/Containers/Algorithms/VectorAssignment.h b/src/TNL/Containers/Algorithms/VectorAssignment.h new file mode 100644 index 000000000..f654b0ee7 --- /dev/null +++ b/src/TNL/Containers/Algorithms/VectorAssignment.h @@ -0,0 +1,87 @@ +/*************************************************************************** + VectorAssignment.h - description + ------------------- + begin : Apr 4, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include +#include +#include + +namespace TNL { +namespace Containers { +namespace Algorithms { + +namespace Details { +/** + * SFINAE for checking if T has getSize method + */ +template< typename T > +class HasSubscriptOperator +{ +private: + typedef char YesType[1]; + typedef char NoType[2]; + + template< typename C > static YesType& test( decltype(std::declval< C >()[0]) ); + template< typename C > static NoType& test(...); + +public: + static constexpr bool value = ( sizeof( test< T >(0) ) == sizeof( YesType ) ); +}; +} // namespace Details + +template< typename Vector, + typename T, + bool hasSubscriptOperator = Details::HasSubscriptOperator< T >::value > +struct VectorAssignment{}; + +/** + * \brief Specialization for assignment with subscript operator + */ +template< typename Vector, + typename T > +struct VectorAssignment< Vector, T, true > +{ + static void resize( Vector& v, const T& t ) + { + v.setSize( t.getSize() ); + } + + static void assign( Vector& v, const T& t ) + { + TNL_ASSERT_EQ( v.getSize(), t.getSize(), "The sizes of the vectors must be equal." ); + for( decltype( v.getSize() ) i = 0; i < v.getSize(); i ++ ) + v[ i ] = t[ i ]; + }; +}; + +/** + * \brief Specialization for array-value assignment for other types. We assume + * that T is convertible to Vector::ValueType. + */ +template< typename Vector, + typename T > +struct VectorAssignment< Vector, T, false > +{ + static void resize( Vector& v, const T& t ) + { + }; + + static void assign( Vector& v, const T& t ) + { + TNL_ASSERT_GT( v.getSize(), 0, "Cannot assign value to empty vector." ); + for( decltype( v.getSize() ) i = 0; i < v.getSize(); i ++ ) + v[ i ] = t; + }; +}; + +} // namespace Algorithms +} // namespace Containers +} // namespace TNL diff --git a/src/TNL/Containers/StaticArray.h b/src/TNL/Containers/StaticArray.h index 724e5d455..37392be78 100644 --- a/src/TNL/Containers/StaticArray.h +++ b/src/TNL/Containers/StaticArray.h @@ -8,13 +8,13 @@ /* See Copyright Notice in tnl/Copyright */ -#pragma once +#pragma once #include #include namespace TNL { -namespace Containers { +namespace Containers { /** * \brief Array with constant size. @@ -30,6 +30,12 @@ class StaticArray typedef int IndexType; enum { size = Size }; + /** + * \brief Gets size of this array. + */ + __cuda_callable__ + static constexpr int getSize(); + /** * \brief Basic constructor. * @@ -72,11 +78,6 @@ class StaticArray */ static String getType(); - /** - * \brief Gets size of this array. - */ - __cuda_callable__ - inline int getSize() const; /** * \brief Gets all data of this static array. @@ -136,7 +137,7 @@ class StaticArray template< typename Array > __cuda_callable__ inline bool operator != ( const Array& array ) const; - + template< typename OtherValue > __cuda_callable__ operator StaticArray< Size, OtherValue >() const; @@ -163,7 +164,7 @@ class StaticArray * \brief Sorts the elements in this static array into ascending order. */ void sort(); - + /** * \brief Writes the array values into stream \e str with specified \e separator. * @@ -188,6 +189,12 @@ class StaticArray< 1, Value > typedef int IndexType; enum { size = 1 }; + /** + * \brief Gets size of this array. + */ + __cuda_callable__ + static constexpr int getSize(); + /** \brief See StaticArray::StaticArray().*/ __cuda_callable__ inline StaticArray(); @@ -210,10 +217,6 @@ class StaticArray< 1, Value > /** \brief See StaticArray::getType().*/ static String getType(); - /** \brief See StaticArray::getSize().*/ - __cuda_callable__ - inline int getSize() const; - /** \brief See StaticArray::getData().*/ __cuda_callable__ inline Value* getData(); @@ -256,7 +259,7 @@ class StaticArray< 1, Value > template< typename Array > __cuda_callable__ inline bool operator != ( const Array& array ) const; - + template< typename OtherValue > __cuda_callable__ operator StaticArray< 1, OtherValue >() const; @@ -274,7 +277,7 @@ class StaticArray< 1, Value > /** \brief See StaticArray::sort().*/ void sort(); - + /** \brief See StaticArray::write().*/ std::ostream& write( std::ostream& str, const char* separator = " " ) const; @@ -293,6 +296,12 @@ class StaticArray< 2, Value > typedef int IndexType; enum { size = 2 }; + /** + * \brief Gets size of this array. + */ + __cuda_callable__ + static constexpr int getSize(); + /** \brief See StaticArray::StaticArray().*/ __cuda_callable__ inline StaticArray(); @@ -324,10 +333,6 @@ class StaticArray< 2, Value > /** \brief See StaticArray::getType().*/ static String getType(); - /** \brief See StaticArray::getSize().*/ - __cuda_callable__ - inline int getSize() const; - /** \brief See StaticArray::getData().*/ __cuda_callable__ inline Value* getData(); @@ -378,11 +383,11 @@ class StaticArray< 2, Value > template< typename Array > __cuda_callable__ inline bool operator != ( const Array& array ) const; - + template< typename OtherValue > __cuda_callable__ operator StaticArray< 2, OtherValue >() const; - + /** \brief See StaticArray::setValue().*/ __cuda_callable__ inline void setValue( const ValueType& val ); @@ -414,6 +419,12 @@ class StaticArray< 3, Value > typedef int IndexType; enum { size = 3 }; + /** + * \brief Gets size of this array. + */ + __cuda_callable__ + static constexpr int getSize(); + /** \brief See StaticArray::StaticArray().*/ __cuda_callable__ inline StaticArray(); @@ -446,10 +457,6 @@ class StaticArray< 3, Value > /** \brief See StaticArray::getType().*/ static String getType(); - /** \brief See StaticArray::getSize().*/ - __cuda_callable__ - inline int getSize() const; - /** \brief See StaticArray::getData().*/ __cuda_callable__ inline Value* getData(); @@ -508,7 +515,7 @@ class StaticArray< 3, Value > template< typename Array > __cuda_callable__ inline bool operator != ( const Array& array ) const; - + template< typename OtherValue > __cuda_callable__ operator StaticArray< 3, OtherValue >() const; diff --git a/src/TNL/Containers/StaticArray1D_impl.h b/src/TNL/Containers/StaticArray1D_impl.h index 98963dcf3..403ddb8aa 100644 --- a/src/TNL/Containers/StaticArray1D_impl.h +++ b/src/TNL/Containers/StaticArray1D_impl.h @@ -14,7 +14,14 @@ #include namespace TNL { -namespace Containers { +namespace Containers { + +template< typename Value > +__cuda_callable__ +constexpr int StaticArray< 1, Value >::getSize() +{ + return size; +} template< typename Value > __cuda_callable__ @@ -54,13 +61,6 @@ String StaticArray< 1, Value >::getType() String( " >" ); } -template< typename Value > -__cuda_callable__ -inline int StaticArray< 1, Value >::getSize() const -{ - return size; -} - template< typename Value > __cuda_callable__ inline Value* StaticArray< 1, Value >::getData() diff --git a/src/TNL/Containers/StaticArray2D_impl.h b/src/TNL/Containers/StaticArray2D_impl.h index 29dcbee59..da1d944bc 100644 --- a/src/TNL/Containers/StaticArray2D_impl.h +++ b/src/TNL/Containers/StaticArray2D_impl.h @@ -15,7 +15,14 @@ #include namespace TNL { -namespace Containers { +namespace Containers { + +template< typename Value > +__cuda_callable__ +constexpr int StaticArray< 2, Value >::getSize() +{ + return size; +} template< typename Value > __cuda_callable__ @@ -66,13 +73,6 @@ String StaticArray< 2, Value >::getType() String( " >" ); } -template< typename Value > -__cuda_callable__ -inline int StaticArray< 2, Value >::getSize() const -{ - return size; -} - template< typename Value > __cuda_callable__ inline Value* StaticArray< 2, Value >::getData() diff --git a/src/TNL/Containers/StaticArray3D_impl.h b/src/TNL/Containers/StaticArray3D_impl.h index 69c1998a4..3e3ae820e 100644 --- a/src/TNL/Containers/StaticArray3D_impl.h +++ b/src/TNL/Containers/StaticArray3D_impl.h @@ -15,7 +15,14 @@ #include namespace TNL { -namespace Containers { +namespace Containers { + +template< typename Value > +__cuda_callable__ +constexpr int StaticArray< 3, Value >::getSize() +{ + return size; +} template< typename Value > __cuda_callable__ @@ -70,13 +77,6 @@ String StaticArray< 3, Value >::getType() String( " >" ); } -template< typename Value > -__cuda_callable__ -inline int StaticArray< 3, Value >::getSize() const -{ - return size; -} - template< typename Value > __cuda_callable__ inline Value* StaticArray< 3, Value >::getData() diff --git a/src/TNL/Containers/StaticArray_impl.h b/src/TNL/Containers/StaticArray_impl.h index 9c7835ce8..4ff5d6b8f 100644 --- a/src/TNL/Containers/StaticArray_impl.h +++ b/src/TNL/Containers/StaticArray_impl.h @@ -15,7 +15,14 @@ #include namespace TNL { -namespace Containers { +namespace Containers { + +template< int Size, typename Value > +__cuda_callable__ +constexpr int StaticArray< Size, Value >::getSize() +{ + return size; +} template< int Size, typename Value > __cuda_callable__ @@ -58,13 +65,6 @@ String StaticArray< Size, Value >::getType() String( " >" ); } -template< int Size, typename Value > -__cuda_callable__ -inline int StaticArray< Size, Value >::getSize() const -{ - return size; -} - template< int Size, typename Value > __cuda_callable__ inline Value* StaticArray< Size, Value >::getData() diff --git a/src/TNL/Containers/StaticVector.h b/src/TNL/Containers/StaticVector.h index 6b7c941f2..ac0e9fe57 100644 --- a/src/TNL/Containers/StaticVector.h +++ b/src/TNL/Containers/StaticVector.h @@ -14,7 +14,7 @@ #include namespace TNL { -namespace Containers { +namespace Containers { /** * \brief Vector with constant size. @@ -29,6 +29,8 @@ class StaticVector : public StaticArray< Size, Real > typedef Real RealType; enum { size = Size }; + using StaticArray< Size, Real >::getSize; + /** * \brief Basic constructor. * @@ -73,13 +75,16 @@ class StaticVector : public StaticArray< Size, Real > * @param prefix Name of now parameter/prefix. */ bool setup( const Config::ParameterContainer& parameters, - const String& prefix = "" ); + const String& prefix = "" ); /** * \brief Gets type of this vector. */ static String getType(); + template< typename StaticVectorOperationType > + StaticVector& operator = ( const StaticVectorOperationType& vo ); + /** * \brief Adding operator. * @@ -109,7 +114,7 @@ class StaticVector : public StaticArray< Size, Real > */ __cuda_callable__ StaticVector& operator *= ( const Real& c ); - + /** * \brief Division by number * @@ -119,7 +124,7 @@ class StaticVector : public StaticArray< Size, Real > */ __cuda_callable__ StaticVector& operator /= ( const Real& c ); - + /** * \brief Addition operator. * @@ -143,7 +148,7 @@ class StaticVector : public StaticArray< Size, Real > /** * \brief Multiplication by number. * - * This function multipies this static vector by \e c and returns the resulting static vector. + * This function multiplies this static vector by \e c and returns the resulting static vector. * The addition is applied to all the vector elements separately. * \param c Multiplicator. */ @@ -247,6 +252,8 @@ class StaticVector< 1, Real > : public StaticArray< 1, Real > typedef Real RealType; enum { size = 1 }; + using StaticArray< 1, Real >::getSize; + /** \brief See StaticVector::StaticVector().*/ __cuda_callable__ StaticVector(); @@ -265,13 +272,16 @@ class StaticVector< 1, Real > : public StaticArray< 1, Real > /** \brief See StaticVector::StaticVector( const StaticVector< Size, Real >& v ).*/ __cuda_callable__ StaticVector( const StaticVector< 1, Real >& v ); - + bool setup( const Config::ParameterContainer& parameters, - const String& prefix = "" ); + const String& prefix = "" ); /** \brief See StaticVector::getType().*/ static String getType(); + template< typename StaticVectorOperationType > + StaticVector& operator = ( const StaticVectorOperationType& vo ); + /** \brief See StaticVector::operator += ( const StaticVector& v ).*/ __cuda_callable__ StaticVector& operator += ( const StaticVector& v ); @@ -283,10 +293,10 @@ class StaticVector< 1, Real > : public StaticArray< 1, Real > /** \brief See StaticVector::operator *= ( const Real& c ).*/ __cuda_callable__ StaticVector& operator *= ( const Real& c ); - + /** \brief See StaticVector::operator /= ( const Real& c ).*/ __cuda_callable__ - StaticVector& operator /= ( const Real& c ); + StaticVector& operator /= ( const Real& c ); /** \brief See StaticVector::operator + ( const StaticVector& u ) const.*/ __cuda_callable__ @@ -330,7 +340,7 @@ class StaticVector< 1, Real > : public StaticArray< 1, Real > /** \brief See StaticVector::lpNorm( const Real& p ) const.*/ __cuda_callable__ - Real lpNorm( const Real& p ) const; + Real lpNorm( const Real& p ) const; #ifdef HAVE_MIC __cuda_callable__ @@ -360,6 +370,8 @@ class StaticVector< 2, Real > : public StaticArray< 2, Real > typedef Real RealType; enum { size = 2 }; + using StaticArray< 2, Real >::getSize; + /** \brief See StaticVector::StaticVector().*/ __cuda_callable__ StaticVector(); @@ -387,13 +399,16 @@ class StaticVector< 2, Real > : public StaticArray< 2, Real > /** \brief See StaticVector::StaticVector( const StaticVector< Size, Real >& v ).*/ __cuda_callable__ StaticVector( const StaticVector< 2, Real >& v ); - + bool setup( const Config::ParameterContainer& parameters, - const String& prefix = "" ); + const String& prefix = "" ); /** \brief See StaticVector::getType().*/ static String getType(); + template< typename StaticVectorOperationType > + StaticVector& operator = ( const StaticVectorOperationType& vo ); + /** \brief See StaticVector::operator += ( const StaticVector& v ).*/ __cuda_callable__ StaticVector& operator += ( const StaticVector& v ); @@ -408,7 +423,7 @@ class StaticVector< 2, Real > : public StaticArray< 2, Real > /** \brief See StaticVector::operator /= ( const Real& c ).*/ __cuda_callable__ - StaticVector& operator /= ( const Real& c ); + StaticVector& operator /= ( const Real& c ); /** \brief See StaticVector::operator + ( const StaticVector& u ) const.*/ __cuda_callable__ @@ -441,7 +456,7 @@ class StaticVector< 2, Real > : public StaticArray< 2, Real > /** \brief See StaticVector::operator >=.*/ __cuda_callable__ bool operator >= ( const StaticVector& v ) const; - + template< typename OtherReal > __cuda_callable__ operator StaticVector< 2, OtherReal >() const; @@ -452,7 +467,7 @@ class StaticVector< 2, Real > : public StaticArray< 2, Real > /** \brief See StaticVector::lpNorm( const Real& p ) const.*/ __cuda_callable__ - Real lpNorm( const Real& p ) const; + Real lpNorm( const Real& p ) const; #ifdef HAVE_MIC __cuda_callable__ @@ -482,6 +497,8 @@ class StaticVector< 3, Real > : public StaticArray< 3, Real > typedef Real RealType; enum { size = 3 }; + using StaticArray< 3, Real >::getSize; + /** \brief See StaticVector::StaticVector().*/ __cuda_callable__ StaticVector(); @@ -510,13 +527,16 @@ class StaticVector< 3, Real > : public StaticArray< 3, Real > /** \brief See StaticVector::StaticVector( const StaticVector< Size, Real >& v ).*/ __cuda_callable__ StaticVector( const StaticVector< 3, Real >& v ); - + bool setup( const Config::ParameterContainer& parameters, - const String& prefix = "" ); + const String& prefix = "" ); /** \brief See StaticVector::getType().*/ static String getType(); + template< typename StaticVectorOperationType > + StaticVector& operator = ( const StaticVectorOperationType& vo ); + /** \brief See StaticVector::operator += ( const StaticVector& v ).*/ __cuda_callable__ StaticVector& operator += ( const StaticVector& v ); @@ -528,11 +548,11 @@ class StaticVector< 3, Real > : public StaticArray< 3, Real > /** \brief See StaticVector::operator *= ( const Real& c ).*/ __cuda_callable__ StaticVector& operator *= ( const Real& c ); - + /** \brief See StaticVector::operator /= ( const Real& c ).*/ __cuda_callable__ StaticVector& operator /= ( const Real& c ); - + /** \brief See StaticVector::operator + ( const StaticVector& u ) const.*/ __cuda_callable__ StaticVector operator + ( const StaticVector& u ) const; @@ -575,7 +595,7 @@ class StaticVector< 3, Real > : public StaticArray< 3, Real > /** \brief See StaticVector::lpNorm( const Real& p ) const.*/ __cuda_callable__ - Real lpNorm( const Real& p ) const; + Real lpNorm( const Real& p ) const; #ifdef HAVE_MIC __cuda_callable__ @@ -613,7 +633,7 @@ StaticVector< Size, Real > abs( const StaticVector< Size, Real >& u ) { return u namespace TNL { -namespace Containers { +namespace Containers { // TODO: move to some other source file template< typename Real > diff --git a/src/TNL/Containers/StaticVector1D_impl.h b/src/TNL/Containers/StaticVector1D_impl.h index 9eb2d90c0..48a1cba89 100644 --- a/src/TNL/Containers/StaticVector1D_impl.h +++ b/src/TNL/Containers/StaticVector1D_impl.h @@ -8,12 +8,14 @@ /* See Copyright Notice in tnl/Copyright */ -#pragma once +#pragma once #include +#include +#include namespace TNL { -namespace Containers { +namespace Containers { template< typename Real > __cuda_callable__ @@ -62,6 +64,14 @@ String StaticVector< 1, Real >::getType() String( " >" ); } +template< typename Real > + template< typename StaticVector_ > +StaticVector< 1, Real >& +StaticVector< 1, Real >::operator =( const StaticVector_& v ) +{ + Algorithms::VectorAssignment< StaticVector< 1, Real >, StaticVector_ >::assign( *this, v ); +} + template< typename Real > __cuda_callable__ StaticVector< 1, Real >& StaticVector< 1, Real >::operator += ( const StaticVector& v ) diff --git a/src/TNL/Containers/StaticVector2D_impl.h b/src/TNL/Containers/StaticVector2D_impl.h index 0a8cb3ec0..51a40ef23 100644 --- a/src/TNL/Containers/StaticVector2D_impl.h +++ b/src/TNL/Containers/StaticVector2D_impl.h @@ -8,12 +8,13 @@ /* See Copyright Notice in tnl/Copyright */ -#pragma once +#pragma once #include +#include namespace TNL { -namespace Containers { +namespace Containers { template< typename Real > __cuda_callable__ @@ -70,6 +71,14 @@ String StaticVector< 2, Real >::getType() String( " >" ); } +template< typename Real > + template< typename StaticVector_ > +StaticVector< 2, Real >& +StaticVector< 2, Real >::operator =( const StaticVector_& v ) +{ + Algorithms::VectorAssignment< StaticVector< 2, Real >, StaticVector_ >::assign( *this, v ); +} + template< typename Real > __cuda_callable__ StaticVector< 2, Real >& StaticVector< 2, Real >::operator += ( const StaticVector& v ) @@ -206,10 +215,10 @@ StaticVector< 2, Real >::lpNorm( const Real& p ) const if( p == 1.0 ) return TNL::abs( this->data[ 0 ] ) + TNL::abs( this->data[ 1 ] ); if( p == 2.0 ) - return TNL::sqrt( this->data[ 0 ] * this->data[ 0 ] + + return TNL::sqrt( this->data[ 0 ] * this->data[ 0 ] + this->data[ 1 ] * this->data[ 1 ] ); return TNL::pow( TNL::pow( TNL::abs( this->data[ 0 ] ), p ) + - TNL::pow( TNL::abs( this->data[ 1 ] ), p ), 1.0 / p ); + TNL::pow( TNL::abs( this->data[ 1 ] ), p ), 1.0 / p ); } } // namespace Containers diff --git a/src/TNL/Containers/StaticVector3D_impl.h b/src/TNL/Containers/StaticVector3D_impl.h index 77f1893f6..95cbe51e2 100644 --- a/src/TNL/Containers/StaticVector3D_impl.h +++ b/src/TNL/Containers/StaticVector3D_impl.h @@ -11,9 +11,10 @@ #pragma once #include +#include namespace TNL { -namespace Containers { +namespace Containers { template< typename Real > __cuda_callable__ @@ -71,6 +72,14 @@ String StaticVector< 3, Real >::getType() String( " >" ); } +template< typename Real > + template< typename StaticVector_ > +StaticVector< 3, Real >& +StaticVector< 3, Real >::operator =(const StaticVector_& v) +{ + Algorithms::VectorAssignment< StaticVector< 3, Real >, StaticVector_ >::assign( *this, v ); +} + template< typename Real > __cuda_callable__ StaticVector< 3, Real >& StaticVector< 3, Real >::operator += ( const StaticVector& v ) @@ -219,16 +228,16 @@ Real StaticVector< 3, Real >::lpNorm( const Real& p ) const { if( p == 1.0 ) - return TNL::abs( this->data[ 0 ] ) + - TNL::abs( this->data[ 1 ] ) + + return TNL::abs( this->data[ 0 ] ) + + TNL::abs( this->data[ 1 ] ) + TNL::abs( this->data[ 2 ] ); if( p == 2.0 ) - return TNL::sqrt( this->data[ 0 ] * this->data[ 0 ] + + return TNL::sqrt( this->data[ 0 ] * this->data[ 0 ] + this->data[ 1 ] * this->data[ 1 ] + this->data[ 2 ] * this->data[ 2 ] ); return TNL::pow( TNL::pow( TNL::abs( this->data[ 0 ] ), p ) + TNL::pow( TNL::abs( this->data[ 1 ] ), p ) + - TNL::pow( TNL::abs( this->data[ 2 ] ), p ), 1.0 / p ); + TNL::pow( TNL::abs( this->data[ 2 ] ), p ), 1.0 / p ); } } // namespace Containers diff --git a/src/TNL/Containers/StaticVector_impl.h b/src/TNL/Containers/StaticVector_impl.h index 09fbcc34d..53530eeb1 100644 --- a/src/TNL/Containers/StaticVector_impl.h +++ b/src/TNL/Containers/StaticVector_impl.h @@ -11,9 +11,11 @@ #pragma once #include +#include +#include namespace TNL { -namespace Containers { +namespace Containers { template< int Size, typename Real > __cuda_callable__ @@ -64,6 +66,14 @@ String StaticVector< Size, Real >::getType() String( " >" ); } +template< int Size, typename Real > + template< typename StaticVector_ > +StaticVector< Size, Real >& +StaticVector< Size, Real >::operator =( const StaticVector_& v ) +{ + Algorithms::VectorAssignment< StaticVector< Size, Real >, StaticVector_ >::assign( *this, v ); +} + template< int Size, typename Real > __cuda_callable__ StaticVector< Size, Real >& StaticVector< Size, Real >::operator += ( const StaticVector& v ) diff --git a/src/TNL/Containers/VectorView.h b/src/TNL/Containers/VectorView.h index 1ae79565b..e6294f242 100644 --- a/src/TNL/Containers/VectorView.h +++ b/src/TNL/Containers/VectorView.h @@ -73,6 +73,11 @@ public: static String getType(); + template< typename VectorOperationType > + void evaluate( const VectorOperationType& vo ); + + template< typename VectorOperationType > + void evaluateFor( const VectorOperationType& vo ); // All other Vector methods follow... void addElement( IndexType i, RealType value ); diff --git a/src/TNL/Containers/VectorView_impl.h b/src/TNL/Containers/VectorView_impl.h index af4df8537..02b95397d 100644 --- a/src/TNL/Containers/VectorView_impl.h +++ b/src/TNL/Containers/VectorView_impl.h @@ -50,6 +50,61 @@ getType() } +template< typename Real, + typename Device, + typename Index > +template< typename VectorOperationType > +void +VectorView< Real, Device, Index >:: +evaluate( const VectorOperationType& vo ) +{ + Real* dt = this->data; + auto assign = [=] __cuda_callable__ ( Index i ) + { + dt[ i ] = vo[ i ]; + }; + ParallelFor< DeviceType >::exec( 0, this->getSize(), assign ); +} + +template< typename Real, + typename Device, + typename Index > +template< typename VectorOperationType > +void +VectorView< Real, Device, Index >:: +evaluateFor( const VectorOperationType& vo ) +{ + if( std::is_same< DeviceType, Devices::Host >::value ) + { + for( int i = 0; i < this->getSize(); i++ ) + { + this->data[ i ] = vo[ i ]; + } + } + /* + if( std::is_same< DeviceType, Devices::Cuda >::value ) + { + Real* dt; + VectorOperationType* expression; + + cudaMallocManaged(&dt, this->getSize * sizeof(Real)); + cudaMallocManaged(&expression, this->getSize * sizeof(Real)); + + dt = this->data; + expression = vo; + + expressionTemplatesKernel<<<(this->getSize()+255)/256, 256>>>( dt, this->getSize(), expression ); + TNL_CHECK_CUDA_DEVICE; + //cudaDeviceSynchronize(); + + //error check + + cudaFree(dt); + cudaFree(expression); + } + */ +} + template< typename Real, typename Device, typename Index > diff --git a/src/UnitTests/CMakeLists.txt b/src/UnitTests/CMakeLists.txt index 95cca1efb..7c2d5582b 100644 --- a/src/UnitTests/CMakeLists.txt +++ b/src/UnitTests/CMakeLists.txt @@ -13,6 +13,14 @@ if( BUILD_CUDA ) TARGET_LINK_LIBRARIES( AssertCudaTest ${GTEST_BOTH_LIBRARIES} ) endif() +ADD_EXECUTABLE( ExpressionTemplatesStaticTest ExpressionTemplatesStaticTest.cpp ) +TARGET_COMPILE_OPTIONS( ExpressionTemplatesStaticTest PRIVATE ${CXX_TESTS_FLAGS} ) +TARGET_LINK_LIBRARIES( ExpressionTemplatesStaticTest ${GTEST_BOTH_LIBRARIES} ) + +ADD_EXECUTABLE( ExpressionTemplatesDynamicTest ExpressionTemplatesDynamicTest.cpp ) +TARGET_COMPILE_OPTIONS( ExpressionTemplatesDynamicTest PRIVATE ${CXX_TESTS_FLAGS} ) +TARGET_LINK_LIBRARIES( ExpressionTemplatesDynamicTest ${GTEST_BOTH_LIBRARIES} ) + if( BUILD_CUDA ) CUDA_ADD_EXECUTABLE( FileTest FileTest.cu OPTIONS ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( FileTest ${GTEST_BOTH_LIBRARIES} ) diff --git a/src/UnitTests/ExpressionTemplatesDynamicTest.cpp b/src/UnitTests/ExpressionTemplatesDynamicTest.cpp new file mode 100644 index 000000000..35147591a --- /dev/null +++ b/src/UnitTests/ExpressionTemplatesDynamicTest.cpp @@ -0,0 +1,194 @@ +/*************************************************************************** + ExpressionTemplatesTest.cpp - description + ------------------- + begin : Mar 27, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +// Implemented by Vojtech Legler + +#ifdef HAVE_GTEST +#include +#endif + +#include +#include + +using namespace TNL; +using namespace TNL::Containers; + +#ifdef HAVE_GTEST +TEST( ExpressionTemplatesDynamicTest, Addition ) +{ + Vector< double, Devices::Host, int > d1{ 1, 1.5, 9, 54, 300.4, 6 }; + Vector< double, Devices::Host, int > d2{ 1.5, 1.5, 50, 30.4, 8, 600 }; + Vector< double, Devices::Host, int > dr1( 6 ); + VectorView< double, Devices::Host, int > dv1( d1 ); + VectorView< double, Devices::Host, int > dv2( d2 ); + VectorView< double, Devices::Host, int > dvr1( dr1 ); + dvr1.evaluate( dv1 + dv2 + dv2 + dv1 ); + double temp; + for( int i = 0; i < 6; i++){ + temp = dv1[ i ] + dv2[ i ] + dv2[ i ] + dv1[ i ]; + EXPECT_EQ( dvr1[ i ], temp ); + } +} + +TEST( ExpressionTemplatesDynamicTest, Subtraction ) +{ + Vector< double, Devices::Host, int > d1{ 1, 1.5, 9, 54, 300.4, 6 }; + Vector< double, Devices::Host, int > d2{ 1.5, 1.5, 50, 30.4, 8, 600 }; + Vector< double, Devices::Host, int > dr1( 6 ); + VectorView< double, Devices::Host, int > dv1( d1 ); + VectorView< double, Devices::Host, int > dv2( d2 ); + VectorView< double, Devices::Host, int > dvr1( dr1 ); + dvr1.evaluate( dv2 - dv1 - dv1 ); + double temp; + for( int i = 0; i < 6; i++){ + temp = dv2[ i ] - dv1[ i ] - dv1[ i ]; + EXPECT_EQ( dvr1[ i ], temp ); + } +} + + +TEST( ExpressionTemplatesDynamicTest, MultiplicationLeftSide ) +{ + Vector< double, Devices::Host, int > d1{ 1, 1.5, 9, 54, 300.4, 6 }; + Vector< double, Devices::Host, int > dr1( 6 ); + VectorView< double, Devices::Host, int > dv1( d1 ); + VectorView< double, Devices::Host, int > dvr1( dr1 ); + dvr1.evaluate( 5*dv1 ); + double temp; + for( int i = 0; i < 6; i++){ + temp = 5 * dv1[ i ]; + EXPECT_EQ( dvr1[ i ], temp ); + } +} + +TEST( ExpressionTemplatesDynamicTest, ExponentialFunction ) +{ + Vector< double, Devices::Host, int > d1{ 1, 1.5, 0, 54, 300.4, 6 }; + Vector< double, Devices::Host, int > dr1( 6 ); + VectorView< double, Devices::Host, int > dv1( d1 ); + VectorView< double, Devices::Host, int > dvr1( dr1 ); + dvr1.evaluate( exp(dv1) ); + double temp; + for( int i = 0; i < 6; i++){ + temp = std::exp( dv1[ i ] ); + EXPECT_EQ( dvr1[ i ], temp ); + } +} + +TEST( ExpressionTemplatesDynamicTest, NaturalLogarithm ) +{ + Vector< double, Devices::Host, int > d1{ 1, 1.5, 1, 54, 300.4, 6 }; + Vector< double, Devices::Host, int > dr1( 6 ); + VectorView< double, Devices::Host, int > dv1( d1 ); + VectorView< double, Devices::Host, int > dvr1( dr1 ); + dvr1.evaluate( log(dv1) ); + double temp; + for( int i = 0; i < 6; i++){ + temp = std::log( dv1[ i ] ); + EXPECT_EQ( dvr1[ i ], temp ); + } +} + +TEST( ExpressionTemplatesDynamicTest, Sine ) +{ + Vector< double, Devices::Host, int > d1{ 1, 1.5, 0, 54, 300.4, 6 }; + Vector< double, Devices::Host, int > dr1( 6 ); + VectorView< double, Devices::Host, int > dv1( d1 ); + VectorView< double, Devices::Host, int > dvr1( dr1 ); + dvr1.evaluate( sin(dv1) ); + double temp; + for( int i = 0; i < 6; i++){ + temp = std::sin( dv1[ i ] ); + EXPECT_EQ( dvr1[ i ], temp ); + } +} + +TEST( ExpressionTemplatesDynamicTest, Cosine ) +{ + Vector< double, Devices::Host, int > d1{ 1, 1.5, 0, 54, 300.4, 6 }; + Vector< double, Devices::Host, int > dr1( 6 ); + VectorView< double, Devices::Host, int > dv1( d1 ); + VectorView< double, Devices::Host, int > dvr1( dr1 ); + dvr1.evaluate( cos(dv1) ); + double temp; + for( int i = 0; i < 6; i++){ + temp = std::cos( dv1[ i ] ); + EXPECT_EQ( dvr1[ i ], temp ); + } +} + +TEST( ExpressionTemplatesDynamicTest, Tangent ) +{ + Vector< double, Devices::Host, int > d1{ 1, 1.5, 0, 54, 300.4, 6 }; + Vector< double, Devices::Host, int > dr1( 6 ); + VectorView< double, Devices::Host, int > dv1( d1 ); + VectorView< double, Devices::Host, int > dvr1( dr1 ); + dvr1.evaluate( tan(dv1) ); + double temp; + for( int i = 0; i < 6; i++){ + temp = std::tan( dv1[ i ] ); + EXPECT_EQ( dvr1[ i ], temp ); + } +} + +TEST( ExpressionTemplatesDynamicTest, ArcSine ) +{ + Vector< double, Devices::Host, int > d1{ 1, -0.5, 0, 0.35, -0.4, -1 }; + Vector< double, Devices::Host, int > dr1( 6 ); + VectorView< double, Devices::Host, int > dv1( d1 ); + VectorView< double, Devices::Host, int > dvr1( dr1 ); + dvr1.evaluate( asin(dv1) ); + double temp; + for( int i = 0; i < 6; i++){ + temp = std::asin( dv1[ i ] ); + EXPECT_EQ( dvr1[ i ], temp ); + } +} + +TEST( ExpressionTemplatesDynamicTest, ArcCosine ) +{ + Vector< double, Devices::Host, int > d1{ 1, -0.5, 0, 0.35, -0.4, -1 }; + Vector< double, Devices::Host, int > dr1( 6 ); + VectorView< double, Devices::Host, int > dv1( d1 ); + VectorView< double, Devices::Host, int > dvr1( dr1 ); + dvr1.evaluate( acos(dv1) ); + double temp; + for( int i = 0; i < 6; i++){ + temp = std::acos( dv1[ i ] ); + EXPECT_EQ( dvr1[ i ], temp ); + } +} + +TEST( ExpressionTemplatesDynamicTest, ArcTangent ) +{ + Vector< double, Devices::Host, int > d1{ 1, 1.5, 0, 54, 300.4, 6 }; + Vector< double, Devices::Host, int > dr1( 6 ); + VectorView< double, Devices::Host, int > dv1( d1 ); + VectorView< double, Devices::Host, int > dvr1( dr1 ); + dvr1.evaluate( atan(dv1) ); + double temp; + for( int i = 0; i < 6; i++){ + temp = std::atan( dv1[ i ] ); + EXPECT_EQ( dvr1[ i ], temp ); + } +} + +#endif + +#include "GtestMissingError.h" +int main( int argc, char* argv[] ) +{ +#ifdef HAVE_GTEST + ::testing::InitGoogleTest( &argc, argv ); + return RUN_ALL_TESTS(); +#else + throw GtestMissingError(); +#endif +} diff --git a/src/UnitTests/ExpressionTemplatesStaticTest.cpp b/src/UnitTests/ExpressionTemplatesStaticTest.cpp new file mode 100644 index 000000000..5373c1eeb --- /dev/null +++ b/src/UnitTests/ExpressionTemplatesStaticTest.cpp @@ -0,0 +1,181 @@ +/*************************************************************************** + ExpressionTemplatesTest.cpp - description + ------------------- + begin : Mar 05, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +// Implemented by Vojtech Legler + +#ifdef HAVE_GTEST +#include +#endif + +#include + +using namespace TNL; +using namespace TNL::Containers; + +#ifdef HAVE_GTEST +TEST( ExpressionTemplatesStaticTest, Addition ) +{ + StaticVector< 6, double > sv1{ 1, 1.5, 9, 54, 300.4, 6 }; + StaticVector< 6, double > sv2{ 1.5, 1.5, 50, 30.4, 8, 600 }; + StaticVector< 6, double > svr1{}; + svr1 = sv1 + sv2 + sv2 + sv1; + double temp; + for( int i = 0; i < 6; i++){ + temp = sv1[ i ] + sv2[ i ] + sv2[ i ] + sv1[ i ]; + EXPECT_EQ( svr1[ i ], temp ); + } +} + +/*TEST( ExpressionTemplatesStaticTest, Subtraction ) +{ + StaticVector< 6, double > sv1{ 1, 1.5, 9, 54, 300.4, 6 }; + StaticVector< 6, double > sv2{ 1.5, 1.5, 50, 30.4, 8, 600 }; + StaticVector< 6, double > svr1{}; + svr1 = sv2 - sv1 - sv1; + double temp; + for( int i = 0; i < 6; i++){ + temp = sv2[ i ] - sv1[ i ] - sv1[ i ]; + EXPECT_EQ( svr1[ i ], temp ); + } +} + + +TEST( ExpressionTemplatesStaticTest, MultiplicationLeftSide ) +{ + StaticVector< 6, double > sv1{ 1, 1.5, 9, 54, 300.4, 6 }; + StaticVector< 6, double > svr1{}; + svr1 = 5 * sv1; + double temp; + for( int i = 0; i < 6; i++){ + temp = 5 * sv1[ i ]; + EXPECT_EQ( svr1[ i ], temp ); + } +} + +TEST( ExpressionTemplatesStaticTest, AbsoluteValue ) +{ + StaticVector< 6, double > sv1{ 1, 1.5, 9, 54, -300.4, 6 }; + StaticVector< 6, double > svr1{}; + svr1 = abs(sv1); + double temp; + for( int i = 0; i < 6; i++){ + temp = std::abs( sv1[ i ] ); + EXPECT_EQ( svr1[ i ], temp ); + } +} + +TEST( ExpressionTemplatesStaticTest, ExponentialFunction ) +{ + StaticVector< 6, double > sv1{ 1, 1.5, 0, 54, 300.4, 6 }; + StaticVector< 6, double > svr1{}; + svr1 = exp(sv1); + double temp; + for( int i = 0; i < 6; i++){ + temp = std::exp( sv1[ i ] ); + EXPECT_EQ( svr1[ i ], temp ); + } +} + +TEST( ExpressionTemplatesStaticTest, NaturalLogarithm ) +{ + StaticVector< 6, double > sv1{ 1, 1.5, 1, 54, 300.4, 6 }; + StaticVector< 6, double > svr1{}; + svr1 = log(sv1); + double temp; + for( int i = 0; i < 6; i++){ + temp = std::log( sv1[ i ] ); + EXPECT_EQ( svr1[ i ], temp ); + } +} + +TEST( ExpressionTemplatesStaticTest, Sine ) +{ + StaticVector< 6, double > sv1{ 1, 1.5, 0, 54, 300.4, 6 }; + StaticVector< 6, double > svr1{}; + svr1 = sin(sv1); + double temp; + for( int i = 0; i < 6; i++){ + temp = std::sin( sv1[ i ] ); + EXPECT_EQ( svr1[ i ], temp ); + } +} + +TEST( ExpressionTemplatesStaticTest, Cosine ) +{ + StaticVector< 6, double > sv1{ 1, 1.5, 0, 54, 300.4, 6 }; + StaticVector< 6, double > svr1{}; + svr1 = cos(sv1); + double temp; + for( int i = 0; i < 6; i++){ + temp = std::cos( sv1[ i ] ); + EXPECT_EQ( svr1[ i ], temp ); + } +} + +TEST( ExpressionTemplatesStaticTest, Tangent ) +{ + StaticVector< 6, double > sv1{ 1, 1.5, 0, 54, 300.4, 6 }; + StaticVector< 6, double > svr1{}; + svr1 = tan(sv1); + double temp; + for( int i = 0; i < 6; i++){ + temp = std::tan( sv1[ i ] ); + EXPECT_EQ( svr1[ i ], temp ); + } +} + +TEST( ExpressionTemplatesStaticTest, ArcSine ) +{ + StaticVector< 6, double > sv1{ 1, -0.5, 0, 0.35, -0.4, -1 }; + StaticVector< 6, double > svr1{}; + svr1 = asin(sv1); + double temp; + for( int i = 0; i < 6; i++){ + temp = std::asin( sv1[ i ] ); + EXPECT_EQ( svr1[ i ], temp ); + } +} + +TEST( ExpressionTemplatesStaticTest, ArcCosine ) +{ + StaticVector< 6, double > sv1{ 1, -0.5, 0, 0.35, -0.4, -1 }; + StaticVector< 6, double > svr1{}; + svr1 = acos(sv1); + double temp; + for( int i = 0; i < 6; i++){ + temp = std::acos( sv1[ i ] ); + EXPECT_EQ( svr1[ i ], temp ); + } +} + +TEST( ExpressionTemplatesStaticTest, ArcTangent ) +{ + StaticVector< 6, double > sv1{ 1, 1.5, 0, 54, 300.4, 6 }; + StaticVector< 6, double > svr1{}; + svr1 = atan(sv1); + double temp; + for( int i = 0; i < 6; i++){ + temp = std::atan( sv1[ i ] ); + EXPECT_EQ( svr1[ i ], temp ); + } +}*/ + +#endif + +#include "GtestMissingError.h" +int main( int argc, char* argv[] ) +{ +#ifdef HAVE_GTEST + ::testing::InitGoogleTest( &argc, argv ); + return RUN_ALL_TESTS(); +#else + throw GtestMissingError(); +#endif +} -- GitLab From 2502ab31814e68732b4d130748aa747d0d3b0058 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Thu, 18 Apr 2019 14:24:59 +0200 Subject: [PATCH 02/93] Binary expression templates are working. --- src/TNL/Containers/StaticArray.h | 8 ++++ src/TNL/Containers/StaticArray1D_impl.h | 8 ++++ src/TNL/Containers/StaticArray2D_impl.h | 8 ++++ src/TNL/Containers/StaticArray3D_impl.h | 8 ++++ src/TNL/Containers/StaticArray_impl.h | 8 ++++ src/TNL/Containers/StaticVector.h | 19 ++++++++- src/TNL/Containers/StaticVector1D_impl.h | 9 ++++ src/TNL/Containers/StaticVector2D_impl.h | 10 +++++ src/TNL/Containers/StaticVector3D_impl.h | 10 +++++ src/TNL/Containers/StaticVector_impl.h | 12 +++++- .../ExpressionTemplatesStaticTest.cpp | 41 ++++++++++++++----- 11 files changed, 129 insertions(+), 12 deletions(-) diff --git a/src/TNL/Containers/StaticArray.h b/src/TNL/Containers/StaticArray.h index 37392be78..6327b2783 100644 --- a/src/TNL/Containers/StaticArray.h +++ b/src/TNL/Containers/StaticArray.h @@ -73,6 +73,8 @@ class StaticArray __cuda_callable__ inline StaticArray( const StaticArray< Size, Value >& v ); + inline StaticArray( const std::initializer_list< Value > &elems ); + /** * \brief Gets type of this array. */ @@ -214,6 +216,8 @@ class StaticArray< 1, Value > __cuda_callable__ inline StaticArray( const StaticArray< size, Value >& v ); + inline StaticArray( const std::initializer_list< Value > &elems ); + /** \brief See StaticArray::getType().*/ static String getType(); @@ -330,6 +334,8 @@ class StaticArray< 2, Value > __cuda_callable__ inline StaticArray( const StaticArray< size, Value >& v ); + inline StaticArray( const std::initializer_list< Value > &elems ); + /** \brief See StaticArray::getType().*/ static String getType(); @@ -454,6 +460,8 @@ class StaticArray< 3, Value > __cuda_callable__ inline StaticArray( const StaticArray< size, Value >& v ); + StaticArray( const std::initializer_list< Value > &elems ); + /** \brief See StaticArray::getType().*/ static String getType(); diff --git a/src/TNL/Containers/StaticArray1D_impl.h b/src/TNL/Containers/StaticArray1D_impl.h index 403ddb8aa..90de7ac8d 100644 --- a/src/TNL/Containers/StaticArray1D_impl.h +++ b/src/TNL/Containers/StaticArray1D_impl.h @@ -51,6 +51,14 @@ inline StaticArray< 1, Value >::StaticArray( const StaticArray< size, Value >& v data[ 0 ] = v[ 0 ]; } +template< typename Value > +StaticArray< 1, Value >::StaticArray( const std::initializer_list< Value > &elems) +{ + auto it = elems.begin(); + for( int i = 0; i < getSize(); i++ ) + data[ i ] = *it++; +} + template< typename Value > String StaticArray< 1, Value >::getType() { diff --git a/src/TNL/Containers/StaticArray2D_impl.h b/src/TNL/Containers/StaticArray2D_impl.h index da1d944bc..4cddb0b65 100644 --- a/src/TNL/Containers/StaticArray2D_impl.h +++ b/src/TNL/Containers/StaticArray2D_impl.h @@ -63,6 +63,14 @@ inline StaticArray< 2, Value >::StaticArray( const StaticArray< size, Value >& v data[ 1 ] = v[ 1 ]; } +template< typename Value > +StaticArray< 2, Value >::StaticArray( const std::initializer_list< Value > &elems) +{ + auto it = elems.begin(); + for( int i = 0; i < getSize(); i++ ) + data[ i ] = *it++; +} + template< typename Value > String StaticArray< 2, Value >::getType() { diff --git a/src/TNL/Containers/StaticArray3D_impl.h b/src/TNL/Containers/StaticArray3D_impl.h index 3e3ae820e..12a44ad32 100644 --- a/src/TNL/Containers/StaticArray3D_impl.h +++ b/src/TNL/Containers/StaticArray3D_impl.h @@ -67,6 +67,14 @@ inline StaticArray< 3, Value >::StaticArray( const StaticArray< size, Value >& v data[ 2 ] = v[ 2 ]; } +template< typename Value > +StaticArray< 3, Value >::StaticArray( const std::initializer_list< Value > &elems) +{ + auto it = elems.begin(); + for( int i = 0; i < getSize(); i++ ) + data[ i ] = *it++; +} + template< typename Value > String StaticArray< 3, Value >::getType() { diff --git a/src/TNL/Containers/StaticArray_impl.h b/src/TNL/Containers/StaticArray_impl.h index 4ff5d6b8f..cc357b2be 100644 --- a/src/TNL/Containers/StaticArray_impl.h +++ b/src/TNL/Containers/StaticArray_impl.h @@ -55,6 +55,14 @@ inline StaticArray< Size, Value >::StaticArray( const StaticArray< Size, Value > data[ i ] = v[ i ]; } +template< int Size, typename Value > +StaticArray< Size, Value >::StaticArray( const std::initializer_list< Value > &elems) +{ + auto it = elems.begin(); + for( int i = 0; i < getSize(); i++ ) + data[ i ] = *it++; +} + template< int Size, typename Value > String StaticArray< Size, Value >::getType() { diff --git a/src/TNL/Containers/StaticVector.h b/src/TNL/Containers/StaticVector.h index ac0e9fe57..a21f34737 100644 --- a/src/TNL/Containers/StaticVector.h +++ b/src/TNL/Containers/StaticVector.h @@ -68,6 +68,8 @@ class StaticVector : public StaticArray< Size, Real > __cuda_callable__ StaticVector( const StaticVector< Size, Real >& v ); + StaticVector( const std::initializer_list< Real > &elems ); + /** * \brief Sets up a new (vector) parameter which means it can have more elements. * @@ -125,6 +127,7 @@ class StaticVector : public StaticArray< Size, Real > __cuda_callable__ StaticVector& operator /= ( const Real& c ); +#ifdef UNDEF /** * \brief Addition operator. * @@ -164,6 +167,7 @@ class StaticVector : public StaticArray< Size, Real > */ __cuda_callable__ Real operator * ( const StaticVector& u ) const; +#endif /** * \brief Compares this static vector with static vector \e v. @@ -273,6 +277,8 @@ class StaticVector< 1, Real > : public StaticArray< 1, Real > __cuda_callable__ StaticVector( const StaticVector< 1, Real >& v ); + StaticVector( const std::initializer_list< Real > &elems ); + bool setup( const Config::ParameterContainer& parameters, const String& prefix = "" ); @@ -298,6 +304,7 @@ class StaticVector< 1, Real > : public StaticArray< 1, Real > __cuda_callable__ StaticVector& operator /= ( const Real& c ); +#ifdef UNDEF /** \brief See StaticVector::operator + ( const StaticVector& u ) const.*/ __cuda_callable__ StaticVector operator + ( const StaticVector& u ) const; @@ -313,6 +320,7 @@ class StaticVector< 1, Real > : public StaticArray< 1, Real > /** \brief See StaticVector::operator * ( const StaticVector& u ) const.*/ __cuda_callable__ Real operator * ( const StaticVector& u ) const; +#endif /** \brief See StaticVector::operator <.*/ __cuda_callable__ @@ -400,6 +408,8 @@ class StaticVector< 2, Real > : public StaticArray< 2, Real > __cuda_callable__ StaticVector( const StaticVector< 2, Real >& v ); + StaticVector( const std::initializer_list< Real > &elems ); + bool setup( const Config::ParameterContainer& parameters, const String& prefix = "" ); @@ -425,6 +435,7 @@ class StaticVector< 2, Real > : public StaticArray< 2, Real > __cuda_callable__ StaticVector& operator /= ( const Real& c ); +#ifdef UNDEF /** \brief See StaticVector::operator + ( const StaticVector& u ) const.*/ __cuda_callable__ StaticVector operator + ( const StaticVector& u ) const; @@ -440,6 +451,7 @@ class StaticVector< 2, Real > : public StaticArray< 2, Real > /** \brief See StaticVector::operator * ( const StaticVector& u ) const.*/ __cuda_callable__ Real operator * ( const StaticVector& u ) const; +#endif /** \brief See StaticVector::operator <.*/ __cuda_callable__ @@ -528,6 +540,8 @@ class StaticVector< 3, Real > : public StaticArray< 3, Real > __cuda_callable__ StaticVector( const StaticVector< 3, Real >& v ); + StaticVector( const std::initializer_list< Real > &elems ); + bool setup( const Config::ParameterContainer& parameters, const String& prefix = "" ); @@ -553,6 +567,7 @@ class StaticVector< 3, Real > : public StaticArray< 3, Real > __cuda_callable__ StaticVector& operator /= ( const Real& c ); +#ifdef UNDEF /** \brief See StaticVector::operator + ( const StaticVector& u ) const.*/ __cuda_callable__ StaticVector operator + ( const StaticVector& u ) const; @@ -568,6 +583,7 @@ class StaticVector< 3, Real > : public StaticArray< 3, Real > /** \brief See StaticVector::operator * ( const StaticVector& u ) const.*/ __cuda_callable__ Real operator * ( const StaticVector& u ) const; +#endif /** \brief See StaticVector::operator <.*/ __cuda_callable__ @@ -614,10 +630,11 @@ class StaticVector< 3, Real > : public StaticArray< 3, Real > } #endif }; - +/* template< int Size, typename Real, typename Scalar > __cuda_callable__ StaticVector< Size, Real > operator * ( const Scalar& c, const StaticVector< Size, Real >& u ); + */ template< int Size, typename Real > __cuda_callable__ diff --git a/src/TNL/Containers/StaticVector1D_impl.h b/src/TNL/Containers/StaticVector1D_impl.h index 48a1cba89..0349bf0e1 100644 --- a/src/TNL/Containers/StaticVector1D_impl.h +++ b/src/TNL/Containers/StaticVector1D_impl.h @@ -45,6 +45,12 @@ StaticVector< 1, Real >::StaticVector( const StaticVector< 1, Real >& v ) { } +template< typename Real > +StaticVector< 1, Real >::StaticVector( const std::initializer_list< Real > &elems ) +: StaticArray< 1, Real >( elems ) +{ +} + template< typename Real > bool StaticVector< 1, Real >::setup( const Config::ParameterContainer& parameters, @@ -70,6 +76,7 @@ StaticVector< 1, Real >& StaticVector< 1, Real >::operator =( const StaticVector_& v ) { Algorithms::VectorAssignment< StaticVector< 1, Real >, StaticVector_ >::assign( *this, v ); + return *this; } template< typename Real > @@ -104,6 +111,7 @@ StaticVector< 1, Real >& StaticVector< 1, Real >::operator /= ( const Real& c ) return *this; } +#ifdef UNDEF template< typename Real > __cuda_callable__ StaticVector< 1, Real > StaticVector< 1, Real >::operator + ( const StaticVector& u ) const @@ -137,6 +145,7 @@ Real StaticVector< 1, Real >::operator * ( const StaticVector& u ) const { return this->data[ 0 ] * u[ 0 ]; } +#endif template< typename Real > __cuda_callable__ diff --git a/src/TNL/Containers/StaticVector2D_impl.h b/src/TNL/Containers/StaticVector2D_impl.h index 51a40ef23..d23caba03 100644 --- a/src/TNL/Containers/StaticVector2D_impl.h +++ b/src/TNL/Containers/StaticVector2D_impl.h @@ -51,6 +51,12 @@ StaticVector< 2, Real >::StaticVector( const StaticVector< 2, Real >& v ) { } +template< typename Real > +StaticVector< 2, Real >::StaticVector( const std::initializer_list< Real > &elems ) +: StaticArray< 2, Real >( elems ) +{ +} + template< typename Real > bool StaticVector< 2, Real >::setup( const Config::ParameterContainer& parameters, @@ -77,6 +83,7 @@ StaticVector< 2, Real >& StaticVector< 2, Real >::operator =( const StaticVector_& v ) { Algorithms::VectorAssignment< StaticVector< 2, Real >, StaticVector_ >::assign( *this, v ); + return *this; } template< typename Real > @@ -116,6 +123,7 @@ StaticVector< 2, Real >& StaticVector< 2, Real >::operator /= ( const Real& c ) return *this; } +#ifdef UNDEF template< typename Real > __cuda_callable__ StaticVector< 2, Real > StaticVector< 2, Real >::operator + ( const StaticVector& u ) const @@ -153,6 +161,8 @@ Real StaticVector< 2, Real >::operator * ( const StaticVector& u ) const return this->data[ 0 ] * u[ 0 ] + this->data[ 1 ] * u[ 1 ]; } +#endif + template< typename Real > __cuda_callable__ diff --git a/src/TNL/Containers/StaticVector3D_impl.h b/src/TNL/Containers/StaticVector3D_impl.h index 95cbe51e2..97a272a97 100644 --- a/src/TNL/Containers/StaticVector3D_impl.h +++ b/src/TNL/Containers/StaticVector3D_impl.h @@ -51,6 +51,12 @@ StaticVector< 3, Real >::StaticVector( const StaticVector< 3, Real >& v ) { } +template< typename Real > +StaticVector< 3, Real >::StaticVector( const std::initializer_list< Real > &elems ) +: StaticArray< 3, Real >( elems ) +{ +} + template< typename Real > bool StaticVector< 3, Real >::setup( const Config::ParameterContainer& parameters, @@ -78,6 +84,7 @@ StaticVector< 3, Real >& StaticVector< 3, Real >::operator =(const StaticVector_& v) { Algorithms::VectorAssignment< StaticVector< 3, Real >, StaticVector_ >::assign( *this, v ); + return *this; } template< typename Real > @@ -121,6 +128,7 @@ StaticVector< 3, Real >& StaticVector< 3, Real >::operator /= ( const Real& c ) return *this; } +#ifdef UNDEF template< typename Real > __cuda_callable__ StaticVector< 3, Real > StaticVector< 3, Real >::operator + ( const StaticVector& u ) const @@ -162,6 +170,8 @@ Real StaticVector< 3, Real >::operator * ( const StaticVector& u ) const this->data[ 1 ] * u[ 1 ] + this->data[ 2 ] * u[ 2 ]; } +#endif + template< typename Real > __cuda_callable__ diff --git a/src/TNL/Containers/StaticVector_impl.h b/src/TNL/Containers/StaticVector_impl.h index 53530eeb1..177573288 100644 --- a/src/TNL/Containers/StaticVector_impl.h +++ b/src/TNL/Containers/StaticVector_impl.h @@ -45,6 +45,12 @@ StaticVector< Size, Real >::StaticVector( const StaticVector< Size, Real >& v ) { } +template< int Size, typename Real > +StaticVector< Size, Real >::StaticVector( const std::initializer_list< Real > &elems ) +: StaticArray< Size, Real >( elems ) +{ +} + template< int Size, typename Real > bool StaticVector< Size, Real >::setup( const Config::ParameterContainer& parameters, @@ -72,6 +78,7 @@ StaticVector< Size, Real >& StaticVector< Size, Real >::operator =( const StaticVector_& v ) { Algorithms::VectorAssignment< StaticVector< Size, Real >, StaticVector_ >::assign( *this, v ); + return *this; } template< int Size, typename Real > @@ -111,6 +118,7 @@ StaticVector< Size, Real >& StaticVector< Size, Real >::operator /= ( const Real return *this; } +#ifdef UNDEF template< int Size, typename Real > __cuda_callable__ StaticVector< Size, Real > StaticVector< Size, Real >::operator + ( const StaticVector& u ) const @@ -150,6 +158,7 @@ Real StaticVector< Size, Real >::operator * ( const StaticVector& u ) const res += this->data[ i ] * u[ i ]; return res; } +#endif template< int Size, typename Real > __cuda_callable__ @@ -239,12 +248,13 @@ StaticVector< Size, Real >::lpNorm( const Real& p ) const return TNL::pow( aux, 1.0 / p ); } +/* template< int Size, typename Real, typename Scalar > __cuda_callable__ StaticVector< Size, Real > operator * ( const Scalar& c, const StaticVector< Size, Real >& u ) { return u * c; -} +}*/ } // namespace Containers } // namespace TNL diff --git a/src/UnitTests/ExpressionTemplatesStaticTest.cpp b/src/UnitTests/ExpressionTemplatesStaticTest.cpp index 5373c1eeb..55baf4eca 100644 --- a/src/UnitTests/ExpressionTemplatesStaticTest.cpp +++ b/src/UnitTests/ExpressionTemplatesStaticTest.cpp @@ -26,14 +26,20 @@ TEST( ExpressionTemplatesStaticTest, Addition ) StaticVector< 6, double > sv2{ 1.5, 1.5, 50, 30.4, 8, 600 }; StaticVector< 6, double > svr1{}; svr1 = sv1 + sv2 + sv2 + sv1; - double temp; for( int i = 0; i < 6; i++){ - temp = sv1[ i ] + sv2[ i ] + sv2[ i ] + sv1[ i ]; - EXPECT_EQ( svr1[ i ], temp ); + EXPECT_EQ( svr1[ i ], sv1[ i ] + sv2[ i ] + sv2[ i ] + sv1[ i ] ); + } + svr1 = sv1 + 2; + for( int i = 0; i < 6; i++){ + EXPECT_EQ( svr1[ i ], sv1[ i ] + 2 ); + } + svr1 = 2 + sv1; + for( int i = 0; i < 6; i++){ + EXPECT_EQ( svr1[ i ], sv1[ i ] + 2 ); } } -/*TEST( ExpressionTemplatesStaticTest, Subtraction ) +TEST( ExpressionTemplatesStaticTest, Subtraction ) { StaticVector< 6, double > sv1{ 1, 1.5, 9, 54, 300.4, 6 }; StaticVector< 6, double > sv2{ 1.5, 1.5, 50, 30.4, 8, 600 }; @@ -44,21 +50,36 @@ TEST( ExpressionTemplatesStaticTest, Addition ) temp = sv2[ i ] - sv1[ i ] - sv1[ i ]; EXPECT_EQ( svr1[ i ], temp ); } + svr1 = sv1 - 2; + for( int i = 0; i < 6; i++){ + EXPECT_EQ( svr1[ i ], sv1[ i ] - 2 ); + } + svr1 = 2 - sv1; + for( int i = 0; i < 6; i++){ + EXPECT_EQ( svr1[ i ], 2 - sv1[ i ] ); + } } - -TEST( ExpressionTemplatesStaticTest, MultiplicationLeftSide ) +TEST( ExpressionTemplatesStaticTest, Multiplication ) { StaticVector< 6, double > sv1{ 1, 1.5, 9, 54, 300.4, 6 }; + StaticVector< 6, double > sv2{ 1.5, 1.5, 50, 30.4, 8, 600 }; StaticVector< 6, double > svr1{}; - svr1 = 5 * sv1; - double temp; + svr1 = sv1 * sv2; for( int i = 0; i < 6; i++){ - temp = 5 * sv1[ i ]; - EXPECT_EQ( svr1[ i ], temp ); + EXPECT_EQ( svr1[ i ], sv1[ i ] * sv2[ i ] ); + } + svr1 = sv1 * 2; + for( int i = 0; i < 6; i++){ + EXPECT_EQ( svr1[ i ], sv1[ i ] * 2 ); + } + svr1 = 2 * sv1; + for( int i = 0; i < 6; i++){ + EXPECT_EQ( svr1[ i ], 2 * sv1[ i ] ); } } +/* TEST( ExpressionTemplatesStaticTest, AbsoluteValue ) { StaticVector< 6, double > sv1{ 1, 1.5, 9, 54, -300.4, 6 }; -- GitLab From d8958f80317b9209a0c2345845c32c67a044ac3c Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Thu, 18 Apr 2019 14:50:47 +0200 Subject: [PATCH 03/93] Added ET source code :-). --- src/TNL/Containers/StaticVector.h | 1 + .../BinaryExpressionTemplate.h | 152 ++++++ .../ExpressionTemplates/CMakeLists.txt | 16 + .../ExpressionTemplatesOperations.h | 54 +++ .../ExpressionVariableType.h | 70 +++ .../ExpressionTemplates/OverloadedOperators.h | 97 ++++ .../StaticVectorExpressions.h | 439 ++++++++++++++++++ .../ExpressionTemplates/VectorExpressions.h | 385 +++++++++++++++ .../VectorExpressionsWithReferences.h | 362 +++++++++++++++ .../expression-templates-static.cpp | 1 + .../expression-templates-static.h | 372 +++++++++++++++ .../expression-templates.cpp | 1 + .../expression-templates.cu | 1 + .../expression-templates.h | 401 ++++++++++++++++ 14 files changed, 2352 insertions(+) create mode 100644 src/TNL/Experimental/ExpressionTemplates/BinaryExpressionTemplate.h create mode 100644 src/TNL/Experimental/ExpressionTemplates/CMakeLists.txt create mode 100644 src/TNL/Experimental/ExpressionTemplates/ExpressionTemplatesOperations.h create mode 100644 src/TNL/Experimental/ExpressionTemplates/ExpressionVariableType.h create mode 100644 src/TNL/Experimental/ExpressionTemplates/OverloadedOperators.h create mode 100644 src/TNL/Experimental/ExpressionTemplates/StaticVectorExpressions.h create mode 100644 src/TNL/Experimental/ExpressionTemplates/VectorExpressions.h create mode 100644 src/TNL/Experimental/ExpressionTemplates/VectorExpressionsWithReferences.h create mode 100644 src/TNL/Experimental/ExpressionTemplates/expression-templates-static.cpp create mode 100644 src/TNL/Experimental/ExpressionTemplates/expression-templates-static.h create mode 100644 src/TNL/Experimental/ExpressionTemplates/expression-templates.cpp create mode 100644 src/TNL/Experimental/ExpressionTemplates/expression-templates.cu create mode 100644 src/TNL/Experimental/ExpressionTemplates/expression-templates.h diff --git a/src/TNL/Containers/StaticVector.h b/src/TNL/Containers/StaticVector.h index a21f34737..954e7c553 100644 --- a/src/TNL/Containers/StaticVector.h +++ b/src/TNL/Containers/StaticVector.h @@ -12,6 +12,7 @@ #include #include +#include namespace TNL { namespace Containers { diff --git a/src/TNL/Experimental/ExpressionTemplates/BinaryExpressionTemplate.h b/src/TNL/Experimental/ExpressionTemplates/BinaryExpressionTemplate.h new file mode 100644 index 000000000..6390f5766 --- /dev/null +++ b/src/TNL/Experimental/ExpressionTemplates/BinaryExpressionTemplate.h @@ -0,0 +1,152 @@ +/*************************************************************************** + BinaryExpressionTemplate.h - description + ------------------- + begin : Apr 18, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include +#include + +namespace TNL { + namespace ExpressionTemplates { + +template< typename T1, + typename T2, + template< typename, typename > class Operation, + ExpressionVariableType T1Type = ExpressionVariableTypeGetter< T1 >::value, + ExpressionVariableType T2Type = ExpressionVariableTypeGetter< T2 >::value > +struct BinaryExpressionTemplate +{ + BinaryExpressionTemplate( const T1& a, const T2& b ){}; + + static T1 evaluate( const T1& a, const T2& b ) + { + return Operation< T1, T2 >::evaluate( a, b ); + } +}; + +template< typename T1, + typename T2, + template< typename, typename > class Operation > +struct BinaryExpressionTemplate< T1, T2, Operation, VectorVariable, VectorVariable > +{ + using RealType = typename T1::RealType; + using IsExpressionTemplate = bool; + + BinaryExpressionTemplate( const T1& a, const T2& b ): op1( a ), op2( b ){} + + static BinaryExpressionTemplate evaluate( const T1& a, const T2& b ) + { + return BinaryExpressionTemplate( a, b ); + } + + RealType operator[]( const int i ) const + { + return Operation< typename T1::RealType, typename T2::RealType >::evaluate( op1[ i ], op2[ i ] ); + } + + int getSize() const + { + return op1.getSize(); + } + + protected: + const T1 &op1; + const T2 &op2; + +}; + +template< typename T1, + typename T2, + template< typename, typename > class Operation > +struct BinaryExpressionTemplate< T1, T2, Operation, VectorVariable, ArithmeticVariable > +{ + using RealType = typename T1::RealType; + using IsExpressionTemplate = bool; + + BinaryExpressionTemplate( const T1& a, const T2& b ): op1( a ), op2( b ){} + + BinaryExpressionTemplate evaluate( const T1& a, const T2& b ) + { + return BinaryExpressionTemplate( a, b ); + } + + RealType operator[]( const int i ) const + { + return Operation< typename T1::RealType, T2 >::evaluate( op1[ i ], op2 ); + } + + int getSize() const + { + return op1.getSize(); + } + + protected: + const T1 &op1; + const T2 &op2; + +}; + +template< typename T1, + typename T2, + template< typename, typename > class Operation > +struct BinaryExpressionTemplate< T1, T2, Operation, ArithmeticVariable, VectorVariable > +{ + using RealType = typename T2::RealType; + using IsExpressionTemplate = bool; + + BinaryExpressionTemplate( const T1& a, const T2& b ): op1( a ), op2( b ){} + + BinaryExpressionTemplate evaluate( const T1& a, const T2& b ) + { + return BinaryExpressionTemplate( a, b ); + } + + RealType operator[]( const int i ) const + { + return Operation< T1, typename T2::RealType >::evaluate( op1, op2[ i ] ); + } + + int getSize() const + { + return op2.getSize(); + } + + protected: + const T1& op1; + const T2& op2; + +}; + +template< typename T1, typename T2 > +BinaryExpressionTemplate< T1, T2, ExpressionTemplates::Addition > operator+( const T1 &a, const T2 &b ) +{ + return BinaryExpressionTemplate< T1, T2, ExpressionTemplates::Addition >( a, b ); +} + +template< typename T1, typename T2 > +BinaryExpressionTemplate< T1, T2, ExpressionTemplates::Subtraction > operator-( const T1 &a, const T2 &b ) +{ + return BinaryExpressionTemplate< T1, T2, ExpressionTemplates::Subtraction >( a, b ); +} + +template< typename T1, typename T2 > +BinaryExpressionTemplate< T1, T2, ExpressionTemplates::Multiplication > operator*( const T1 &a, const T2 &b ) +{ + return BinaryExpressionTemplate< T1, T2, ExpressionTemplates::Multiplication >( a, b ); +} + +template< typename T1, typename T2 > +BinaryExpressionTemplate< T1, T2, ExpressionTemplates::Division > operator/( const T1 &a, const T2 &b ) +{ + return BinaryExpressionTemplate< T1, T2, ExpressionTemplates::Division >( a, b ); +} + + } //namespace ExpressionTemplates +} // namespace TNL \ No newline at end of file diff --git a/src/TNL/Experimental/ExpressionTemplates/CMakeLists.txt b/src/TNL/Experimental/ExpressionTemplates/CMakeLists.txt new file mode 100644 index 000000000..e146717c6 --- /dev/null +++ b/src/TNL/Experimental/ExpressionTemplates/CMakeLists.txt @@ -0,0 +1,16 @@ +set( headers StaticVectorExpressions.h + VectorExpressions.h +) + +IF( BUILD_CUDA ) + CUDA_ADD_EXECUTABLE( tnl-expression-templates expression-templates.cu ) +ELSE( BUILD_CUDA ) + ADD_EXECUTABLE( tnl-expression-templates expression-templates.cpp ) + ADD_EXECUTABLE( tnl-expression-templates-static expression-templates-static.cpp ) +ENDIF( BUILD_CUDA ) + +INSTALL( TARGETS tnl-expression-templates + RUNTIME DESTINATION bin + PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE ) + +INSTALL( FILES ${headers} DESTINATION ${TNL_TARGET_INCLUDE_DIRECTORY}/Experimental/ExpressionTemplates ) diff --git a/src/TNL/Experimental/ExpressionTemplates/ExpressionTemplatesOperations.h b/src/TNL/Experimental/ExpressionTemplates/ExpressionTemplatesOperations.h new file mode 100644 index 000000000..e0901a0ca --- /dev/null +++ b/src/TNL/Experimental/ExpressionTemplates/ExpressionTemplatesOperations.h @@ -0,0 +1,54 @@ +/*************************************************************************** + ExpressionTemplatesOperations.h - description + ------------------- + begin : Apr 18, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +namespace TNL { + namespace ExpressionTemplates { + +template< typename T1, typename T2 > +struct Addition +{ + static auto evaluate( const T1& a, const T2& b ) -> decltype( a + b ) + { + return a + b; + } +}; + +template< typename T1, typename T2 > +struct Subtraction +{ + static auto evaluate( const T1& a, const T2& b ) -> decltype( a - b ) + { + return a - b; + } +}; + +template< typename T1, typename T2 > +struct Multiplication +{ + static auto evaluate( const T1& a, const T2& b ) -> decltype( a * b ) + { + return a * b; + } +}; + +template< typename T1, typename T2 > +struct Division +{ + static auto evaluate( const T1& a, const T2& b ) -> decltype( a / b ) + { + return a / b; + } +}; + + + } // ExpressionTemplates +} // namespace TNL \ No newline at end of file diff --git a/src/TNL/Experimental/ExpressionTemplates/ExpressionVariableType.h b/src/TNL/Experimental/ExpressionTemplates/ExpressionVariableType.h new file mode 100644 index 000000000..c353d8dd2 --- /dev/null +++ b/src/TNL/Experimental/ExpressionTemplates/ExpressionVariableType.h @@ -0,0 +1,70 @@ +/*************************************************************************** + ExpressionVariableType.h - description + ------------------- + begin : Apr 18, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include + +namespace TNL { + +enum ExpressionVariableType { ArithmeticVariable, VectorVariable, OtherVariable }; + +/** + * SFINAE for checking if T has getSize method + */ +template< typename T > +class IsExpressionTemplate +{ +private: + typedef char YesType[1]; + typedef char NoType[2]; + + template< typename C > static YesType& test( typename C::IsExpressionTemplate ); + template< typename C > static NoType& test(...); + +public: + static constexpr bool value = ( sizeof( test< T >(0) ) == sizeof( YesType ) ); +}; + + +template< typename T > +struct IsVectorType +{ + static constexpr bool value = false; +}; + +template< int Size, + typename Real > +struct IsVectorType< Containers::StaticVector< Size, Real > > +{ + static constexpr bool value = true; +}; + +template< typename T, + bool IsArithmetic = std::is_arithmetic< T >::value, + bool IsVector = IsVectorType< T >::value || IsExpressionTemplate< T >::value > +struct ExpressionVariableTypeGetter +{ + static constexpr ExpressionVariableType value = OtherVariable; +}; + +template< typename T > +struct ExpressionVariableTypeGetter< T, true, false > +{ + static constexpr ExpressionVariableType value = ArithmeticVariable; +}; + +template< typename T > +struct ExpressionVariableTypeGetter< T, false, true > +{ + static constexpr ExpressionVariableType value = VectorVariable; +}; + +} //namespace TNL diff --git a/src/TNL/Experimental/ExpressionTemplates/OverloadedOperators.h b/src/TNL/Experimental/ExpressionTemplates/OverloadedOperators.h new file mode 100644 index 000000000..96df213c0 --- /dev/null +++ b/src/TNL/Experimental/ExpressionTemplates/OverloadedOperators.h @@ -0,0 +1,97 @@ +/* + * To change this license header, choose License Headers in Project Properties. + * To change this template file, choose Tools | Templates + * and open the template in the editor. + */ + +/* + * File: OverloadedOperators.h + * Author: legler + * + * Created on December 4, 2018, 7:55 PM + */ + +#ifndef OVERLOADEDOPERATORS_H +#define OVERLOADEDOPERATORS_H + +#include + +//using std::vector; + +template< class T > +std::vector operator+(const std::vector &a, const std::vector &b) +{ +std::vector res(a.size()); +for (std::size_t i = 0; i +std::vector operator-(const std::vector &a, const std::vector &b) +{ +std::vector res(a.size()); +for (std::size_t i = 0; i +std::vector operator*(const T &a, const std::vector &b) +{ +std::vector res(a.size()); +for (size_t i = 0; i +std::vector operator*(const std::vector &a, const T &b) +{ +std::vector res(a.size()); +for (size_t i = 0; i +class Vec +{ +public: + std::vector data; + + Vec(const std::size_t size) : data(size) + {} + + Vec(const std::size_t size, const double init) : data(size, init) + {} + + Vec operator+ ( const Vec& a, const Vec& b ) + { + Vec res(a.data.size()); + for (std::size_t i = 0; i res(a.data.size()); + for (std::size_t i = 0; i res(b.data.size()); + for (std::size_t i = 0; i +#include +#include + +namespace TNL { + + +template< typename T1, + typename T2, + template< typename, typename > class Operation, + ExpressionVariableType T1Type = ExpressionVariableTypeGetter< T1 >::value, + ExpressionVariableType T2Type = ExpressionVariableTypeGetter< T2 >::value > +struct BinaryExpressionTemplate +{ + BinaryExpressionTemplate( const T1& a, const T2& b ){}; + + static T1 evaluate( const T1& a, const T2& b ) + { + return Operation< T1, T2 >::evaluate( a, b ); + } +}; + +template< typename T1, + typename T2, + template< typename, typename > class Operation > +struct BinaryExpressionTemplate< T1, T2, Operation, VectorVariable, VectorVariable > +{ + using RealType = typename T1::RealType; + using IsExpressionTemplate = bool; + + BinaryExpressionTemplate( const T1& a, const T2& b ): op1( a ), op2( b ){} + + static BinaryExpressionTemplate evaluate( const T1& a, const T2& b ) + { + return BinaryExpressionTemplate( a, b ); + } + + RealType operator[]( const int i ) const + { + return Operation< typename T1::RealType, typename T2::RealType >::evaluate( op1[ i ], op2[ i ] ); + } + + int getSize() const + { + return op1.getSize(); + } + + protected: + const T1 &op1; + const T2 &op2; + +}; + +template< typename T1, + typename T2, + template< typename, typename > class Operation > +struct BinaryExpressionTemplate< T1, T2, Operation, VectorVariable, ArithmeticVariable > +{ + using RealType = typename T1::RealType; + using IsExpressionTemplate = bool; + + BinaryExpressionTemplate( const T1& a, const T2& b ): op1( a ), op2( b ){} + + BinaryExpressionTemplate evaluate( const T1& a, const T2& b ) + { + return BinaryExpressionTemplate( a, b ); + } + + RealType operator[]( const int i ) const + { + return Operation< typename T1::RealType, T2 >::evaluate( op1[ i ], op2 ); + } + + int getSize() const + { + return op1.getSize(); + } + + protected: + const T1 &op1; + const T2 &op2; + +}; + +template< typename T1, + typename T2, + template< typename, typename > class Operation > +struct BinaryExpressionTemplate< T1, T2, Operation, ArithmeticVariable, VectorVariable > +{ + using RealType = typename T2::RealType; + using IsExpressionTemplate = bool; + + BinaryExpressionTemplate( const T1& a, const T2& b ): op1( a ), op2( b ){} + + BinaryExpressionTemplate evaluate( const T1& a, const T2& b ) + { + return BinaryExpressionTemplate( a, b ); + } + + RealType operator[]( const int i ) const + { + return Operation< T1, typename T2::RealType >::evaluate( op1, op2[ i ] ); + } + + int getSize() const + { + return op2.getSize(); + } + + protected: + const T1& op1; + const T2& op2; + +}; + + +template< typename T1, typename T2 > +BinaryExpressionTemplate< T1, T2, ExpressionTemplates::Addition > operator+( const T1 &a, const T2 &b ) +{ + return BinaryExpressionTemplate< T1, T2, ExpressionTemplates::Addition >( a, b ); +} + +template< typename T1, typename T2 > +BinaryExpressionTemplate< T1, T2, ExpressionTemplates::Subtraction > operator-( const T1 &a, const T2 &b ) +{ + return BinaryExpressionTemplate< T1, T2, ExpressionTemplates::Subtraction >( a, b ); +} + +template< typename T1, typename T2 > +BinaryExpressionTemplate< T1, T2, ExpressionTemplates::Multiplication > operator*( const T1 &a, const T2 &b ) +{ + return BinaryExpressionTemplate< T1, T2, ExpressionTemplates::Multiplication >( a, b ); +} + +template< typename T1, typename T2 > +BinaryExpressionTemplate< T1, T2, ExpressionTemplates::Division > operator/( const T1 &a, const T2 &b ) +{ + return BinaryExpressionTemplate< T1, T2, ExpressionTemplates::Division >( a, b ); +} + + + +template< typename T1 > +class StaticVectorAbsoluteValue +{ + const T1 &op1; + + public: + + StaticVectorAbsoluteValue( const T1& a ): op1( a ){} + + using RealType = typename T1::RealType; + + RealType operator[]( const int i ) const + { + return std::abs( op1[ i ] ); + } + + int getSize() const + { + return op1.getSize(); + } +}; + +template< typename T1 > +class StaticVectorExponentialFunction +{ + const T1 &op1; + + public: + + StaticVectorExponentialFunction( const T1& a ): op1( a ){} + + using RealType = typename T1::RealType; + + RealType operator[]( const int i ) const + { + return std::exp( op1[ i ] ); + } + + int getSize() const + { + return op1.getSize(); + } +}; + +template< typename T1 > +class StaticVectorNaturalLogarithm +{ + const T1 &op1; + + public: + + StaticVectorNaturalLogarithm( const T1& a ): op1( a ){} + + using RealType = typename T1::RealType; + + RealType operator[]( const int i ) const + { + return std::log( op1[ i ] ); + } + + int getSize() const + { + return op1.getSize(); + } +}; +/* +template< typename T1 > +class StaticVectorSquareRoot +{ + const T1 &op1; + + public: + + StaticVectorSquareRoot( const T1& a ): op1( a ){} + + using RealType = typename T1::RealType; + + RealType operator[]( const int i ) const + { + return std::sqrt( op1[ i ] ); + } + + int getSize() const + { + return op1.getSize(); + } +};*/ + +template< typename T1 > +class StaticVectorSine +{ + const T1 &op1; + + public: + + StaticVectorSine( const T1& a ): op1( a ){} + + using RealType = typename T1::RealType; + + RealType operator[]( const int i ) const + { + return std::sin( op1[ i ] ); + } + + int getSize() const + { + return op1.getSize(); + } +}; + +template< typename T1 > +class StaticVectorCosine +{ + const T1 &op1; + + public: + + StaticVectorCosine( const T1& a ): op1( a ){} + + using RealType = typename T1::RealType; + + RealType operator[]( const int i ) const + { + return std::cos( op1[ i ] ); + } + + int getSize() const + { + return op1.getSize(); + } +}; + +template< typename T1 > +class StaticVectorTangent +{ + const T1 &op1; + + public: + + StaticVectorTangent( const T1& a ): op1( a ){} + + using RealType = typename T1::RealType; + + RealType operator[]( const int i ) const + { + return std::tan( op1[ i ] ); + } + + int getSize() const + { + return op1.getSize(); + } +}; + +template< typename T1 > +class StaticVectorArcSine +{ + const T1 &op1; + + public: + + StaticVectorArcSine( const T1& a ): op1( a ){} + + using RealType = typename T1::RealType; + + RealType operator[]( const int i ) const + { + return std::asin( op1[ i ] ); + } + + int getSize() const + { + return op1.getSize(); + } +}; + +template< typename T1 > +class StaticVectorArcCosine +{ + const T1 &op1; + + public: + + StaticVectorArcCosine( const T1& a ): op1( a ){} + + using RealType = typename T1::RealType; + + RealType operator[]( const int i ) const + { + return std::acos( op1[ i ] ); + } + + int getSize() const + { + return op1.getSize(); + } +}; + +template< typename T1 > +class StaticVectorArcTangent +{ + const T1 &op1; + + public: + + StaticVectorArcTangent( const T1& a ): op1( a ){} + + using RealType = typename T1::RealType; + + RealType operator[]( const int i ) const + { + return std::atan( op1[ i ] ); + } + + int getSize() const + { + return op1.getSize(); + } +}; + + +template< typename T1 > +StaticVectorAbsoluteValue< T1 > abs( const T1 &a ) +{ + return StaticVectorAbsoluteValue< T1 >( a ); +} + +template< typename T1 > +StaticVectorExponentialFunction< T1 > exp( const T1 &a ) +{ + return StaticVectorExponentialFunction< T1 >( a ); +} + +template< typename T1 > +StaticVectorNaturalLogarithm< T1 > log( const T1 &a ) +{ + return StaticVectorNaturalLogarithm< T1 >( a ); +} +/* +template< typename T1 > +StaticVectorSquareRoot< T1 > sqrt( const T1 &a ) +{ + return StaticVectorSquareRoot< T1 >( a ); +}*/ + +/* +template< typename T1 > +StaticVectorSine< T1 > sin( const T1 &a ) +{ + return StaticVectorSine< T1 >( a ); +} + +template< typename T1 > +StaticVectorCosine< T1 > cos( const T1 &a ) +{ + return StaticVectorCosine< T1 >( a ); +} + +template< typename T1 > +StaticVectorTangent< T1 > tan( const T1 &a ) +{ + return StaticVectorTangent< T1 >( a ); +} + +template< typename T1 > +StaticVectorArcSine< T1 > asin( const T1 &a ) +{ + return StaticVectorArcSine< T1 >( a ); +} + +template< typename T1 > +StaticVectorArcCosine< T1 > acos( const T1 &a ) +{ + return StaticVectorArcCosine< T1 >( a ); +} + +template< typename T1 > +StaticVectorArcTangent< T1 > atan( const T1 &a ) +{ + return StaticVectorArcTangent< T1 >( a ); +}*/ + +} //namespace TNL diff --git a/src/TNL/Experimental/ExpressionTemplates/VectorExpressions.h b/src/TNL/Experimental/ExpressionTemplates/VectorExpressions.h new file mode 100644 index 000000000..45e930ae8 --- /dev/null +++ b/src/TNL/Experimental/ExpressionTemplates/VectorExpressions.h @@ -0,0 +1,385 @@ +/* + * File: VectorExpressions.h + * Author: Vojtěch Legler + * + * Created on November 29, 2018, 2:53 PM + */ + +#pragma once + +#include + +template< typename T1, typename T2 > +class VectorAddition +{ + const T1 op1; + const T2 op2; + + public: + + VectorAddition( const T1 &a, const T2 &b ): op1( a ), op2( b ){} + + VectorAddition( const VectorAddition &v ): op1( v.op1 ), op2( v.op2 ){} + + using RealType = typename T1::RealType; + + __cuda_callable__ + RealType operator[]( const int i ) const + { + return op1[ i ] + op2[ i ]; + } + + int getSize() const + { + return op1.getSize(); + } +}; + +template< typename T1, typename T2 > +class VectorSubtraction +{ + const T1 op1; + const T2 op2; + + public: + + VectorSubtraction( const T1& a, const T2& b ): op1( a ), op2( b ){} + + VectorSubtraction( const VectorSubtraction &v ): op1( v.op1 ), op2( v.op2 ){} + + using RealType = typename T1::RealType; + + __cuda_callable__ + RealType operator[]( const int i ) const + { + return op1[ i ] - op2[ i ]; + } + + int getSize() const + { + return op1.getSize(); + } +}; + +template< typename Scalar, typename T2 > +class VectorMultiplicationLeftSide +{ + const Scalar c; + const T2 op2; + + public: + + VectorMultiplicationLeftSide( const Scalar& a, const T2& b ): c( a ), op2( b ){} + + VectorMultiplicationLeftSide( const VectorMultiplicationLeftSide &v ): c( v.c ), op2( v.op2 ){} + + using RealType = typename T2::RealType; + + __cuda_callable__ + RealType operator[]( const int i ) const + { + return c * op2[ i ]; + } + + int getSize() const + { + return op2.getSize(); + } +}; + +template< typename T1 > +class VectorAbsoluteValue +{ + const T1 op1; + + public: + + VectorAbsoluteValue( const T1& a ): op1( a ){} + + VectorAbsoluteValue( const VectorAbsoluteValue &v ): op1( v.op1 ){} + + using RealType = typename T1::RealType; + + __cuda_callable__ + RealType operator[]( const int i ) const + { + return std::abs( op1[ i ] ); + } + + int getSize() const + { + return op1.getSize(); + } +}; + +template< typename T1 > +class VectorExponentialFunction +{ + const T1 op1; + + public: + + VectorExponentialFunction( const T1& a ): op1( a ){} + + VectorExponentialFunction( const VectorExponentialFunction &v ): op1( v.op1 ){} + + using RealType = typename T1::RealType; + + __cuda_callable__ + RealType operator[]( const int i ) const + { + return std::exp( op1[ i ] ); + } + + int getSize() const + { + return op1.getSize(); + } +}; + +template< typename T1 > +class VectorNaturalLogarithm +{ + const T1 op1; + + public: + + VectorNaturalLogarithm( const T1& a ): op1( a ){} + + VectorNaturalLogarithm( const VectorNaturalLogarithm &v ): op1( v.op1 ){} + + using RealType = typename T1::RealType; + + __cuda_callable__ + RealType operator[]( const int i ) const + { + return std::log( op1[ i ] ); + } + + int getSize() const + { + return op1.getSize(); + } +}; + +template< typename T1 > +class VectorSine +{ + const T1 op1; + + public: + + VectorSine( const T1& a ): op1( a ){} + + VectorSine( const VectorSine &v ): op1( v.op1 ){} + + using RealType = typename T1::RealType; + + __cuda_callable__ + RealType operator[]( const int i ) const + { + return std::sin( op1[ i ] ); + } + + int getSize() const + { + return op1.getSize(); + } +}; + +template< typename T1 > +class VectorCosine +{ + const T1 op1; + + public: + + VectorCosine( const T1& a ): op1( a ){} + + VectorCosine( const VectorCosine &v ): op1( v.op1 ){} + + using RealType = typename T1::RealType; + + __cuda_callable__ + RealType operator[]( const int i ) const + { + return std::cos( op1[ i ] ); + } + + int getSize() const + { + return op1.getSize(); + } +}; + +template< typename T1 > +class VectorTangent +{ + const T1 op1; + + public: + + VectorTangent( const T1& a ): op1( a ){} + + VectorTangent( const VectorTangent &v ): op1( v.op1 ){} + + using RealType = typename T1::RealType; + + __cuda_callable__ + RealType operator[]( const int i ) const + { + return std::tan( op1[ i ] ); + } + + int getSize() const + { + return op1.getSize(); + } +}; + +template< typename T1 > +class VectorArcSine +{ + const T1 op1; + + public: + + VectorArcSine( const T1& a ): op1( a ){} + + VectorArcSine( const VectorArcSine &v ): op1( v.op1 ){} + + using RealType = typename T1::RealType; + + __cuda_callable__ + RealType operator[]( const int i ) const + { + return std::asin( op1[ i ] ); + } + + int getSize() const + { + return op1.getSize(); + } +}; + +template< typename T1 > +class VectorArcCosine +{ + const T1 op1; + + public: + + VectorArcCosine( const T1& a ): op1( a ){} + + VectorArcCosine( const VectorArcCosine &v ): op1( v.op1 ){} + + using RealType = typename T1::RealType; + + __cuda_callable__ + RealType operator[]( const int i ) const + { + return std::acos( op1[ i ] ); + } + + int getSize() const + { + return op1.getSize(); + } +}; + +template< typename T1 > +class VectorArcTangent +{ + const T1 op1; + + public: + + VectorArcTangent( const T1& a ): op1( a ){} + + VectorArcTangent( const VectorArcTangent &v ): op1( v.op1 ){} + + using RealType = typename T1::RealType; + + __cuda_callable__ + RealType operator[]( const int i ) const + { + return std::atan( op1[ i ] ); + } + + int getSize() const + { + return op1.getSize(); + } +}; + +template< typename T1, typename T2 > +VectorAddition< T1, T2 > operator+( const T1 &a, const T2 &b ) +{ + return VectorAddition< T1, T2 >( a, b ); +} + +template< typename T1, typename T2 > +VectorSubtraction< T1, T2 > operator-( const T1 &a, const T2 &b ) +{ + return VectorSubtraction< T1, T2 >( a, b ); +} + +template< typename Scalar, typename T2 > +VectorMultiplicationLeftSide< Scalar, T2 > operator*( const Scalar &a, const T2 &b ) +{ + return VectorMultiplicationLeftSide< Scalar, T2 >( a, b ); +} + +template< typename T1 > +VectorAbsoluteValue< T1 > abs( const T1 &a ) +{ + return VectorAbsoluteValue< T1 >( a ); +} + +template< typename T1 > +VectorExponentialFunction< T1 > exp( const T1 &a ) +{ + return VectorExponentialFunction< T1 >( a ); +} + +template< typename T1 > +VectorNaturalLogarithm< T1 > log( const T1 &a ) +{ + return VectorNaturalLogarithm< T1 >( a ); +} + +template< typename T1 > +VectorSine< T1 > sin( const T1 &a ) +{ + return VectorSine< T1 >( a ); +} + +template< typename T1 > +VectorCosine< T1 > cos( const T1 &a ) +{ + return VectorCosine< T1 >( a ); +} + +template< typename T1 > +VectorTangent< T1 > tan( const T1 &a ) +{ + return VectorTangent< T1 >( a ); +} + +template< typename T1 > +VectorArcSine< T1 > asin( const T1 &a ) +{ + return VectorArcSine< T1 >( a ); +} + +template< typename T1 > +VectorArcCosine< T1 > acos( const T1 &a ) +{ + return VectorArcCosine< T1 >( a ); +} + +template< typename T1 > +VectorArcTangent< T1 > atan( const T1 &a ) +{ + return VectorArcTangent< T1 >( a ); +} diff --git a/src/TNL/Experimental/ExpressionTemplates/VectorExpressionsWithReferences.h b/src/TNL/Experimental/ExpressionTemplates/VectorExpressionsWithReferences.h new file mode 100644 index 000000000..b2feb1a94 --- /dev/null +++ b/src/TNL/Experimental/ExpressionTemplates/VectorExpressionsWithReferences.h @@ -0,0 +1,362 @@ +/* + * File: VectorExpressions.h + * Author: Vojtěch Legler + * + * Created on November 29, 2018, 2:53 PM + */ + +#pragma once + +#include + +template< typename T1, typename T2 > +class VectorAddition +{ + const T1 &op1; + const T2 &op2; + + public: + + VectorAddition( const T1& a, const T2& b ): op1( a ), op2( b ){} + + using RealType = typename T1::RealType; + + __cuda_callable__ + RealType operator[]( const int i ) const + { + return op1[ i ] + op2[ i ]; + } + + int getSize() const + { + return op1.getSize(); + } +}; + +template< typename T1, typename T2 > +class VectorSubtraction +{ + const T1 &op1; + const T2 &op2; + + public: + + VectorSubtraction( const T1& a, const T2& b ): op1( a ), op2( b ){} + + using RealType = typename T1::RealType; + + __cuda_callable__ + RealType operator[]( const int i ) const + { + return op1[ i ] - op2[ i ]; + } + + int getSize() const + { + return op1.getSize(); + } +}; + +template< typename Scalar, typename T2 > +class VectorMultiplicationLeftSide +{ + const Scalar &c; + const T2 &op2; + + public: + + VectorMultiplicationLeftSide( const Scalar& a, const T2& b ): c( a ), op2( b ){} + + using RealType = typename T2::RealType; + + __cuda_callable__ + RealType operator[]( const int i ) const + { + return c * op2[ i ]; + } + + int getSize() const + { + return op2.getSize(); + } +}; + +template< typename T1 > +class VectorAbsoluteValue +{ + const T1 &op1; + + public: + + VectorAbsoluteValue( const T1& a ): op1( a ){} + + using RealType = typename T1::RealType; + + __cuda_callable__ + RealType operator[]( const int i ) const + { + return std::abs( op1[ i ] ); + } + + int getSize() const + { + return op1.getSize(); + } +}; + +template< typename T1 > +class VectorExponentialFunction +{ + const T1 &op1; + + public: + + VectorExponentialFunction( const T1& a ): op1( a ){} + + using RealType = typename T1::RealType; + + __cuda_callable__ + RealType operator[]( const int i ) const + { + return std::exp( op1[ i ] ); + } + + int getSize() const + { + return op1.getSize(); + } +}; + +template< typename T1 > +class VectorNaturalLogarithm +{ + const T1 &op1; + + public: + + VectorNaturalLogarithm( const T1& a ): op1( a ){} + + using RealType = typename T1::RealType; + + __cuda_callable__ + RealType operator[]( const int i ) const + { + return std::log( op1[ i ] ); + } + + int getSize() const + { + return op1.getSize(); + } +}; + +template< typename T1 > +class VectorSine +{ + const T1 &op1; + + public: + + VectorSine( const T1& a ): op1( a ){} + + using RealType = typename T1::RealType; + + __cuda_callable__ + RealType operator[]( const int i ) const + { + return std::sin( op1[ i ] ); + } + + int getSize() const + { + return op1.getSize(); + } +}; + +template< typename T1 > +class VectorCosine +{ + const T1 &op1; + + public: + + VectorCosine( const T1& a ): op1( a ){} + + using RealType = typename T1::RealType; + + __cuda_callable__ + RealType operator[]( const int i ) const + { + return std::cos( op1[ i ] ); + } + + int getSize() const + { + return op1.getSize(); + } +}; + +template< typename T1 > +class VectorTangent +{ + const T1 &op1; + + public: + + VectorTangent( const T1& a ): op1( a ){} + + using RealType = typename T1::RealType; + + __cuda_callable__ + RealType operator[]( const int i ) const + { + return std::tan( op1[ i ] ); + } + + int getSize() const + { + return op1.getSize(); + } +}; + +template< typename T1 > +class VectorArcSine +{ + const T1 &op1; + + public: + + VectorArcSine( const T1& a ): op1( a ){} + + using RealType = typename T1::RealType; + + __cuda_callable__ + RealType operator[]( const int i ) const + { + return std::asin( op1[ i ] ); + } + + int getSize() const + { + return op1.getSize(); + } +}; + +template< typename T1 > +class VectorArcCosine +{ + const T1 &op1; + + public: + + VectorArcCosine( const T1& a ): op1( a ){} + + using RealType = typename T1::RealType; + + __cuda_callable__ + RealType operator[]( const int i ) const + { + return std::acos( op1[ i ] ); + } + + int getSize() const + { + return op1.getSize(); + } +}; + +template< typename T1 > +class VectorArcTangent +{ + const T1 &op1; + + public: + + VectorArcTangent( const T1& a ): op1( a ){} + + using RealType = typename T1::RealType; + + __cuda_callable__ + RealType operator[]( const int i ) const + { + return std::atan( op1[ i ] ); + } + + int getSize() const + { + return op1.getSize(); + } +}; + + +template< typename T1, typename T2 > +VectorAddition< T1, T2 > operator+( const T1 &a, const T2 &b ) +{ + return VectorAddition< T1, T2 >( a, b ); +} + +template< typename T1, typename T2 > +VectorSubtraction< T1, T2 > operator-( const T1 &a, const T2 &b ) +{ + return VectorSubtraction< T1, T2 >( a, b ); +} + +template< typename Scalar, typename T2 > +VectorMultiplicationLeftSide< Scalar, T2 > operator*( const Scalar &a, const T2 &b ) +{ + return VectorMultiplicationLeftSide< Scalar, T2 >( a, b ); +} + +template< typename T1 > +VectorAbsoluteValue< T1 > abs( const T1 &a ) +{ + return VectorAbsoluteValue< T1 >( a ); +} + +template< typename T1 > +VectorExponentialFunction< T1 > exp( const T1 &a ) +{ + return VectorExponentialFunction< T1 >( a ); +} + +template< typename T1 > +VectorNaturalLogarithm< T1 > log( const T1 &a ) +{ + return VectorNaturalLogarithm< T1 >( a ); +} + +template< typename T1 > +VectorSine< T1 > sin( const T1 &a ) +{ + return VectorSine< T1 >( a ); +} + +template< typename T1 > +VectorCosine< T1 > cos( const T1 &a ) +{ + return VectorCosine< T1 >( a ); +} + +template< typename T1 > +VectorTangent< T1 > tan( const T1 &a ) +{ + return VectorTangent< T1 >( a ); +} + +template< typename T1 > +VectorArcSine< T1 > asin( const T1 &a ) +{ + return VectorArcSine< T1 >( a ); +} + +template< typename T1 > +VectorArcCosine< T1 > acos( const T1 &a ) +{ + return VectorArcCosine< T1 >( a ); +} + +template< typename T1 > +VectorArcTangent< T1 > atan( const T1 &a ) +{ + return VectorArcTangent< T1 >( a ); +} diff --git a/src/TNL/Experimental/ExpressionTemplates/expression-templates-static.cpp b/src/TNL/Experimental/ExpressionTemplates/expression-templates-static.cpp new file mode 100644 index 000000000..36aa43b18 --- /dev/null +++ b/src/TNL/Experimental/ExpressionTemplates/expression-templates-static.cpp @@ -0,0 +1 @@ +#include "expression-templates-static.h" diff --git a/src/TNL/Experimental/ExpressionTemplates/expression-templates-static.h b/src/TNL/Experimental/ExpressionTemplates/expression-templates-static.h new file mode 100644 index 000000000..5bf29e617 --- /dev/null +++ b/src/TNL/Experimental/ExpressionTemplates/expression-templates-static.h @@ -0,0 +1,372 @@ +#pragma once + +#include +#include +#include +#include "OverloadedOperators.h" +#include + +using namespace std; +using namespace TNL; +using namespace TNL::Containers; + +int main() +{ + StaticVector< 10, double > sv1( 1.5 ); + StaticVector< 100, double > sv2( 1.5 ); + StaticVector< 500, double > sv3( 1.5 ); + StaticVector< 1000, double > sv4( 1.5 ); + StaticVector< 2000, double > sv5( 1.5 ); + StaticVector< 5000, double > sv6( 1.5 ); + StaticVector< 10, double > svr1( 0.0 ); + StaticVector< 10, double > svr1_( 0.0 ); + StaticVector< 100, double > svr2( 0.0 ); + StaticVector< 100, double > svr2_( 0.0 ); + StaticVector< 500, double > svr3( 0.0 ); + StaticVector< 500, double > svr3_( 0.0 ); + StaticVector< 1000, double > svr4( 0.0 ); + StaticVector< 1000, double > svr4_( 0.0 ); + StaticVector< 2000, double > svr5( 0.0 ); + StaticVector< 2000, double > svr5_( 0.0 ); + StaticVector< 5000, double > svr6( 0.0 ); + StaticVector< 5000, double > svr6_( 0.0 ); + + std::vector v1( 10, 1.5 ); + std::vector v2( 100, 1.5 ); + std::vector v3( 500, 1.5 ); + std::vector v4( 1000, 1.5 ); + std::vector v5( 2000, 1.5 ); + std::vector v6( 5000, 1.5 ); + std::vector vr1(10), vr2(100), vr3(500), vr4(1000), vr5(2000), vr6(5000), vr1_(10), vr2_(100), vr3_(500), vr4_(1000), vr5_(2000), vr6_(5000); + std::vector cvr1(10), cvr2(100), cvr3(500), cvr4(1000), cvr5(2000), cvr6(5000), cvr1_(10), cvr2_(100), cvr3_(500), cvr4_(1000), cvr5_(2000), cvr6_(5000); + + TNL::Timer t1; + TNL::Timer t2; + TNL::Timer t3; + + long double stm1 = 0, stm2 = 0, stm3 = 0, stm4 = 0, stm5 = 0, stm6 = 0, stm1_ = 0, stm2_ = 0, stm3_ = 0, stm4_ = 0, stm5_ = 0, stm6_ = 0; + long double tm1 = 0, tm2 = 0, tm3 = 0, tm4 = 0, tm5 = 0, tm6 = 0, tm1_ = 0, tm2_ = 0, tm3_ = 0, tm4_ = 0, tm5_ = 0, tm6_ = 0; + long double ctm1 = 0, ctm2 = 0, ctm3 = 0, ctm4 = 0, ctm5 = 0, ctm6 = 0, ctm1_ = 0, ctm2_ = 0, ctm3_ = 0, ctm4_ = 0, ctm5_ = 0, ctm6_ = 0; + + int numb = 50000; + + //static vectors + + t1.start(); + for( int i = 0; i < numb; i++ ) + svr1 = sv1 + sv1; + t1.stop(); + stm1 = t1.getCPUCycles(); + + t1.reset(); + t1.start(); + for( int i = 0; i < numb; i++ ) + svr2 = sv2 + sv2; + t1.stop(); + stm2 = t1.getCPUCycles(); + + t1.reset(); + t1.start(); + for( int i = 0; i < numb; i++ ) + svr3 = sv3 + sv3; + t1.stop(); + stm3 = t1.getCPUCycles(); + + t1.reset(); + t1.start(); + for( int i = 0; i < numb; i++ ) + svr4 = sv4 + sv4; + t1.stop(); + stm4 = t1.getCPUCycles(); + + t1.reset(); + t1.start(); + for( int i = 0; i < numb; i++ ) + svr5 = sv5 + sv5; + t1.stop(); + stm5 = t1.getCPUCycles(); + + t1.reset(); + t1.start(); + for( int i = 0; i < numb; i++ ) + svr6 = sv6 + sv6; + t1.stop(); + stm6 = t1.getCPUCycles(); + + t1.reset(); + t1.start(); + for( int i = 0; i < numb; i++ ) + svr1_ = sv1 + sv1 + sv1 + sv1 + sv1 + sv1 + sv1 + sv1 + sv1 + sv1; + t1.stop(); + stm1_ = t1.getCPUCycles(); + + t1.reset(); + t1.start(); + for( int i = 0; i < numb; i++ ) + svr2_ = sv2 + sv2 + sv2 + sv2 + sv2 + sv2 + sv2 + sv2 + sv2 + sv2; + t1.stop(); + stm2_ = t1.getCPUCycles(); + + t1.reset(); + t1.start(); + for( int i = 0; i < numb; i++ ) + svr3_ = sv3 + sv3 + sv3 + sv3 + sv3 + sv3 + sv3 + sv3 + sv3 + sv3; + t1.stop(); + stm3_ = t1.getCPUCycles(); + + t1.reset(); + t1.start(); + for( int i = 0; i < numb; i++ ) + svr4_ = sv4 + sv4 + sv4 + sv4 + sv4 + sv4 + sv4 + sv4 + sv4 + sv4; + t1.stop(); + stm4_ = t1.getCPUCycles(); + + t1.reset(); + t1.start(); + for( int i = 0; i < numb; i++ ) + svr5_ = sv5 + sv5 + sv5 + sv5 + sv5 + sv5 + sv5 + sv5 + sv5 + sv5; + t1.stop(); + stm5_ = t1.getCPUCycles(); + + t1.reset(); + t1.start(); + for( int i = 0; i < numb; i++ ) + svr6_ = sv6 + sv6 + sv6 + sv6 + sv6 + sv6 + sv6 + sv6 + sv6 + sv6; + t1.stop(); + stm6_ = t1.getCPUCycles(); + + //overloaded operators + + t2.start(); + for( int i = 0; i < numb; i++ ) + vr1 = v1 + v1; + t2.stop(); + tm1 = t2.getCPUCycles(); + + t2.reset(); + t2.start(); + for( int i = 0; i < numb; i++ ) + vr2 = v2 + v2; + t2.stop(); + tm2 = t2.getCPUCycles(); + + t2.reset(); + t2.start(); + for( int i = 0; i < numb; i++ ) + vr3 = v3 + v3; + t2.stop(); + tm3 = t2.getCPUCycles(); + + t2.reset(); + t2.start(); + for( int i = 0; i < numb; i++ ) + vr4 = v4 + v4; + t2.stop(); + tm4 = t2.getCPUCycles(); + + t2.reset(); + t2.start(); + for( int i = 0; i < numb; i++ ) + vr5 = v5 + v5; + t2.stop(); + tm5 = t2.getCPUCycles(); + + t2.reset(); + t2.start(); + for( int i = 0; i < numb; i++ ) + vr6 = v6 + v6; + t2.stop(); + tm6 = t2.getCPUCycles(); + + t2.reset(); + t2.start(); + for( int i = 0; i < numb; i++ ) + vr1_ = v1 + v1 + v1 + v1 + v1 + v1 + v1 + v1 + v1 + v1; + t2.stop(); + tm1_ = t2.getCPUCycles(); + + t2.reset(); + t2.start(); + for( int i = 0; i < numb; i++ ) + vr2_ = v2 + v2 + v2 + v2 + v2 + v2 + v2 + v2 + v2 + v2; + t2.stop(); + tm2_ = t2.getCPUCycles(); + + t2.reset(); + t2.start(); + for( int i = 0; i < numb; i++ ) + vr3_ = v3 + v3 + v3 + v3 + v3 + v3 + v3 + v3 + v3 + v3; + t2.stop(); + tm3_ = t2.getCPUCycles(); + + t2.reset(); + t2.start(); + for( int i = 0; i < numb; i++ ) + vr4_ = v4 + v4 + v4 + v4 + v4 + v4 + v4 + v4 + v4 + v4; + t2.stop(); + tm4_ = t2.getCPUCycles(); + + t2.reset(); + t2.start(); + for( int i = 0; i < numb; i++ ) + vr5_ = v5 + v5 + v5 + v5 + v5 + v5 + v5 + v5 + v5 + v5; + t2.stop(); + tm5_ = t2.getCPUCycles(); + + t2.reset(); + t2.start(); + for( int i = 0; i < numb; i++ ) + vr6_ = v6 + v6 + v6 + v6 + v6 + v6 + v6 + v6 + v6 + v6; + t2.stop(); + tm6_ = t2.getCPUCycles(); + + //pure c + + t3.start(); + for( int i = 0; i < numb; i++ ){ + for( unsigned int i = 0; i < v1.size(); ++i) + { + cvr1[ i ] = v1[ i ] + v1[ i ]; + } + } + t3.stop(); + ctm1 = t3.getCPUCycles(); + + t3.reset(); + t3.start(); + for( int i = 0; i < numb; i++ ){ + for( unsigned int i = 0; i < v2.size(); ++i) + { + cvr2[ i ] = v2[ i ] + v2[ i ]; + } + } + t3.stop(); + ctm2 = t3.getCPUCycles(); + + t3.reset(); + t3.start(); + for( int i = 0; i < numb; i++ ){ + for( unsigned int i = 0; i < v3.size(); ++i) + { + cvr3[ i ] = v3[ i ] + v3[ i ]; + } + } + t3.stop(); + ctm3 = t3.getCPUCycles(); + + t3.reset(); + t3.start(); + for( int i = 0; i < numb; i++ ){ + for( unsigned int i = 0; i < v4.size(); ++i) + { + cvr4[ i ] = v4[ i ] + v4[ i ]; + } + } + t3.stop(); + ctm4 = t3.getCPUCycles(); + + t3.reset(); + t3.start(); + for( int i = 0; i < numb; i++ ){ + for( unsigned int i = 0; i < v5.size(); ++i) + { + cvr5[ i ] = v5[ i ] + v5[ i ]; + } + } + t3.stop(); + ctm5 = t3.getCPUCycles(); + + t3.reset(); + t3.start(); + for( int i = 0; i < numb; i++ ){ + for( unsigned int i = 0; i < v6.size(); ++i) + { + cvr6[ i ] = v6[ i ] + v6[ i ]; + } + } + t3.stop(); + ctm6 = t3.getCPUCycles(); + + t3.reset(); + t3.start(); + for( int i = 0; i < numb; i++ ){ + for( unsigned int i = 0; i < v1.size(); ++i) + { + cvr1_[ i ] = v1[ i ] + v1[ i ] + v1[ i ] + v1[ i ] + v1[ i ] + v1[ i ] + v1[ i ] + v1[ i ] + v1[ i ] + v1[ i ]; + } + } + t3.stop(); + ctm1_ = t3.getCPUCycles(); + + t3.reset(); + t3.start(); + for( int i = 0; i < numb; i++ ){ + for( unsigned int i = 0; i < v2.size(); ++i) + { + cvr2_[ i ] = v2[ i ] + v2[ i ] + v2[ i ] + v2[ i ] + v2[ i ] + v2[ i ] + v2[ i ] + v2[ i ] + v2[ i ] + v2[ i ]; + } + } + t3.stop(); + ctm2_ = t3.getCPUCycles(); + + t3.reset(); + t3.start(); + for( int i = 0; i < numb; i++ ){ + for( unsigned int i = 0; i < v3.size(); ++i) + { + cvr3_[ i ] = v3[ i ] + v3[ i ] + v3[ i ] + v3[ i ] + v3[ i ] + v3[ i ] + v3[ i ] + v3[ i ] + v3[ i ] + v3[ i ]; + } + } + t3.stop(); + ctm3_ = t3.getCPUCycles(); + + t3.reset(); + t3.start(); + for( int i = 0; i < numb; i++ ){ + for( unsigned int i = 0; i < v4.size(); ++i) + { + cvr4_[ i ] = v4[ i ] + v4[ i ] + v4[ i ] + v4[ i ] + v4[ i ] + v4[ i ] + v4[ i ] + v4[ i ] + v4[ i ] + v4[ i ]; + } + } + t3.stop(); + ctm4_ = t3.getCPUCycles(); + + t3.reset(); + t3.start(); + for( int i = 0; i < numb; i++ ){ + for( unsigned int i = 0; i < v5.size(); ++i) + { + cvr5_[ i ] = v5[ i ] + v5[ i ] + v5[ i ] + v5[ i ] + v5[ i ] + v5[ i ] + v5[ i ] + v5[ i ] + v5[ i ] + v5[ i ]; + } + } + t3.stop(); + ctm5_ = t3.getCPUCycles(); + + t3.reset(); + t3.start(); + for( int i = 0; i < numb; i++ ){ + for( unsigned int i = 0; i < v6.size(); ++i) + { + cvr6_[ i ] = v6[ i ] + v6[ i ] + v6[ i ] + v6[ i ] + v6[ i ] + v6[ i ] + v6[ i ] + v6[ i ] + v6[ i ] + v6[ i ]; + } + } + t3.stop(); + ctm6_ = t3.getCPUCycles(); + + std::cout << std::fixed; + std::cout << std::setprecision(2); + + std::cout << "addition of 2 vectors" << std::endl; + std::cout << "size\t\t" << "10" << "\t\t" << "100" << "\t\t" << "500" << "\t\t" << "1000" << "\t\t" << "2000" << "\t\t" << "5000" << std::endl; + std::cout << "static vectors" << "\t" << stm1 << "\t\t" << stm2 << "\t\t" << stm3 << "\t\t" << stm4 << "\t\t" << stm5 << "\t\t" << stm6 << std::endl; + std::cout << "overloaded ops." << "\t" << tm1 << "\t" << tm2 << "\t" << tm3 << "\t" << tm4 << "\t" << tm5 << "\t" << tm6 << std::endl; + std::cout << "pure c" << "\t\t" << ctm1 << "\t" << ctm2 << "\t" << ctm3 << "\t" << ctm4 << "\t" << ctm5 << "\t" << ctm6 << "\n" << std::endl; + + std::cout << "addition of 10 vectors" << std::endl; + std::cout << "size\t\t" << "10" << "\t\t" << "100" << "\t\t" << "500" << "\t\t" << "1000" << "\t\t" << "2000" << "\t\t" << "5000" << std::endl; + std::cout << "static vectors" << "\t" << stm1_ << "\t\t" << stm2_ << "\t\t" << stm3_ << "\t\t" << stm4_ << "\t\t" << stm5_ << "\t\t" << stm6_ << std::endl; + std::cout << "overloaded ops." << "\t" << tm1_ << "\t" << tm2_ << "\t" << tm3_ << "\t" << tm4_ << "\t" << tm5_ << "\t" << tm6_ << std::endl; + std::cout << "pure c" << "\t\t" << ctm1_ << "\t" << ctm2_ << "\t" << ctm3_ << "\t" << ctm4_ << "\t" << ctm5_ << "\t" << ctm6_ << std::endl; + + return 0; +} diff --git a/src/TNL/Experimental/ExpressionTemplates/expression-templates.cpp b/src/TNL/Experimental/ExpressionTemplates/expression-templates.cpp new file mode 100644 index 000000000..c609e179c --- /dev/null +++ b/src/TNL/Experimental/ExpressionTemplates/expression-templates.cpp @@ -0,0 +1 @@ +#include "expression-templates.h" diff --git a/src/TNL/Experimental/ExpressionTemplates/expression-templates.cu b/src/TNL/Experimental/ExpressionTemplates/expression-templates.cu new file mode 100644 index 000000000..c609e179c --- /dev/null +++ b/src/TNL/Experimental/ExpressionTemplates/expression-templates.cu @@ -0,0 +1 @@ +#include "expression-templates.h" diff --git a/src/TNL/Experimental/ExpressionTemplates/expression-templates.h b/src/TNL/Experimental/ExpressionTemplates/expression-templates.h new file mode 100644 index 000000000..86411ee08 --- /dev/null +++ b/src/TNL/Experimental/ExpressionTemplates/expression-templates.h @@ -0,0 +1,401 @@ +#pragma once + +#include +#include +#include +#include "OverloadedOperators.h" +#include +#include +//#include + +using namespace std; +using namespace TNL; +using namespace TNL::Containers; + +int main() +{ + + Vector< double, Devices::Host, int > d1( 10 ); + for( int i = 0; i < 10; i++) + d1[i] = 1.5; + Vector< double, Devices::Host, int > d2( 100 ); + for( int i = 0; i < 100; i++) + d2[i] = 1.5; + Vector< double, Devices::Host, int > d3( 500 ); + for( int i = 0; i < 500; i++) + d3[i] = 1.5; + Vector< double, Devices::Host, int > d4( 1000 ); + for( int i = 0; i < 1000; i++) + d4[i] = 1.5; + Vector< double, Devices::Host, int > d5( 2000 ); + for( int i = 0; i < 2000; i++) + d5[i] = 1.5; + Vector< double, Devices::Host, int > d6( 5000 ); + for( int i = 0; i < 5000; i++) + d6[i] = 1.5; + Vector< double, Devices::Host, int > dr1( 10 ); + Vector< double, Devices::Host, int > dr2( 100 ); + Vector< double, Devices::Host, int > dr3( 500 ); + Vector< double, Devices::Host, int > dr4( 1000 ); + Vector< double, Devices::Host, int > dr5( 2000 ); + Vector< double, Devices::Host, int > dr6( 5000 ); + + VectorView< double, Devices::Host, int > dv1( d1 ); + VectorView< double, Devices::Host, int > dv2( d2 ); + VectorView< double, Devices::Host, int > dv3( d3 ); + VectorView< double, Devices::Host, int > dv4( d4 ); + VectorView< double, Devices::Host, int > dv5( d5 ); + VectorView< double, Devices::Host, int > dv6( d6 ); + VectorView< double, Devices::Host, int > dvr1( dr1 ); + VectorView< double, Devices::Host, int > dvr2( dr2 ); + VectorView< double, Devices::Host, int > dvr3( dr3 ); + VectorView< double, Devices::Host, int > dvr4( dr4 ); + VectorView< double, Devices::Host, int > dvr5( dr5 ); + VectorView< double, Devices::Host, int > dvr6( dr6 ); + VectorView< double, Devices::Host, int > dvr1_( dr1 ); + VectorView< double, Devices::Host, int > dvr2_( dr2 ); + VectorView< double, Devices::Host, int > dvr3_( dr3 ); + VectorView< double, Devices::Host, int > dvr4_( dr4 ); + VectorView< double, Devices::Host, int > dvr5_( dr5 ); + VectorView< double, Devices::Host, int > dvr6_( dr6 ); + + std::vector v1( 10, 1.5 ); + std::vector v2( 100, 1.5 ); + std::vector v3( 500, 1.5 ); + std::vector v4( 1000, 1.5 ); + std::vector v5( 2000, 1.5 ); + std::vector v6( 5000, 1.5 ); + std::vector vr1(10), vr2(100), vr3(500), vr4(1000), vr5(2000), vr6(5000), vr1_(10), vr2_(100), vr3_(500), vr4_(1000), vr5_(2000), vr6_(5000); + std::vector cvr1(10), cvr2(100), cvr3(500), cvr4(1000), cvr5(2000), cvr6(5000), cvr1_(10), cvr2_(100), cvr3_(500), cvr4_(1000), cvr5_(2000), cvr6_(5000); + + TNL::Timer t2; + TNL::Timer t3; + TNL::Timer t4; + + long double dtm1 = 0, dtm2 = 0, dtm3 = 0, dtm4 = 0, dtm5 = 0, dtm6 = 0, dtm1_ = 0, dtm2_ = 0, dtm3_ = 0, dtm4_ = 0, dtm5_ = 0, dtm6_ = 0; + long double tm1 = 0, tm2 = 0, tm3 = 0, tm4 = 0, tm5 = 0, tm6 = 0, tm1_ = 0, tm2_ = 0, tm3_ = 0, tm4_ = 0, tm5_ = 0, tm6_ = 0; + long double ctm1 = 0, ctm2 = 0, ctm3 = 0, ctm4 = 0, ctm5 = 0, ctm6 = 0, ctm1_ = 0, ctm2_ = 0, ctm3_ = 0, ctm4_ = 0, ctm5_ = 0, ctm6_ = 0; + + int numb = 50000; + + //dynamic vectors + + t4.start(); + for( int i = 0; i < numb; i++ ) + dvr1.evaluate( dv1 + dv1 ); + t4.stop(); + dtm1 = t4.getCPUCycles(); + + t4.reset(); + t4.start(); + for( int i = 0; i < numb; i++ ) + dvr2.evaluate( dv2 + dv2 ); + t4.stop(); + dtm2 = t4.getCPUCycles(); + + t4.reset(); + t4.start(); + for( int i = 0; i < numb; i++ ) + dvr3.evaluate( dv3 + dv3 ); + t4.stop(); + dtm3 = t4.getCPUCycles(); + + t4.reset(); + t4.start(); + for( int i = 0; i < numb; i++ ) + dvr4.evaluate( dv4 + dv4 ); + t4.stop(); + dtm4 = t4.getCPUCycles(); + + t4.reset(); + t4.start(); + for( int i = 0; i < numb; i++ ) + dvr5.evaluate( dv5 + dv5 ); + t4.stop(); + dtm5 = t4.getCPUCycles(); + + t4.reset(); + t4.start(); + for( int i = 0; i < numb; i++ ) + dvr6.evaluate( dv6 + dv6 ); + t4.stop(); + dtm6 = t4.getCPUCycles(); + + t4.reset(); + t4.start(); + for( int i = 0; i < numb; i++ ) + dvr1_.evaluate( dv1 + dv1 + dv1 + dv1 + dv1 + dv1 + dv1 + dv1 + dv1 + dv1 ); + t4.stop(); + dtm1_ = t4.getCPUCycles(); + + t4.reset(); + t4.start(); + for( int i = 0; i < numb; i++ ) + dvr2_.evaluate( dv2 + dv2 + dv2 + dv2 + dv2 + dv2 + dv2 + dv2 + dv2 + dv2 ); + t4.stop(); + dtm2_ = t4.getCPUCycles(); + + t4.reset(); + t4.start(); + for( int i = 0; i < numb; i++ ) + dvr3_.evaluate( dv3 + dv3 + dv3 + dv3 + dv3 + dv3 + dv3 + dv3 + dv3 + dv3 ); + t4.stop(); + dtm3_ = t4.getCPUCycles(); + + t4.reset(); + t4.start(); + for( int i = 0; i < numb; i++ ) + dvr4_.evaluate( dv4 + dv4 + dv4 + dv4 + dv4 + dv4 + dv4 + dv4 + dv4 + dv4 ); + t4.stop(); + dtm4_ = t4.getCPUCycles(); + + t4.reset(); + t4.start(); + for( int i = 0; i < numb; i++ ) + dvr5_.evaluate( dv5 + dv5 + dv5 + dv5 + dv5 + dv5 + dv5 + dv5 + dv5 + dv5 ); + t4.stop(); + dtm5_ = t4.getCPUCycles(); + + t4.reset(); + t4.start(); + for( int i = 0; i < numb; i++ ) + dvr6_.evaluate( dv6 + dv6 + dv6 + dv6 + dv6 + dv6 + dv6 + dv6 + dv6 + dv6 ); + t4.stop(); + dtm6_ = t4.getCPUCycles(); + + + //overloaded operators + + t2.start(); + for( int i = 0; i < numb; i++ ) + vr1 = v1 + v1; + t2.stop(); + tm1 = t2.getCPUCycles(); + + t2.reset(); + t2.start(); + for( int i = 0; i < numb; i++ ) + vr2 = v2 + v2; + t2.stop(); + tm2 = t2.getCPUCycles(); + + t2.reset(); + t2.start(); + for( int i = 0; i < numb; i++ ) + vr3 = v3 + v3; + t2.stop(); + tm3 = t2.getCPUCycles(); + + t2.reset(); + t2.start(); + for( int i = 0; i < numb; i++ ) + vr4 = v4 + v4; + t2.stop(); + tm4 = t2.getCPUCycles(); + + t2.reset(); + t2.start(); + for( int i = 0; i < numb; i++ ) + vr5 = v5 + v5; + t2.stop(); + tm5 = t2.getCPUCycles(); + + t2.reset(); + t2.start(); + for( int i = 0; i < numb; i++ ) + vr6 = v6 + v6; + t2.stop(); + tm6 = t2.getCPUCycles(); + + t2.reset(); + t2.start(); + for( int i = 0; i < numb; i++ ) + vr1_ = v1 + v1 + v1 + v1 + v1 + v1 + v1 + v1 + v1 + v1; + t2.stop(); + tm1_ = t2.getCPUCycles(); + + t2.reset(); + t2.start(); + for( int i = 0; i < numb; i++ ) + vr2_ = v2 + v2 + v2 + v2 + v2 + v2 + v2 + v2 + v2 + v2; + t2.stop(); + tm2_ = t2.getCPUCycles(); + + t2.reset(); + t2.start(); + for( int i = 0; i < numb; i++ ) + vr3_ = v3 + v3 + v3 + v3 + v3 + v3 + v3 + v3 + v3 + v3; + t2.stop(); + tm3_ = t2.getCPUCycles(); + + t2.reset(); + t2.start(); + for( int i = 0; i < numb; i++ ) + vr4_ = v4 + v4 + v4 + v4 + v4 + v4 + v4 + v4 + v4 + v4; + t2.stop(); + tm4_ = t2.getCPUCycles(); + + t2.reset(); + t2.start(); + for( int i = 0; i < numb; i++ ) + vr5_ = v5 + v5 + v5 + v5 + v5 + v5 + v5 + v5 + v5 + v5; + t2.stop(); + tm5_ = t2.getCPUCycles(); + + t2.reset(); + t2.start(); + for( int i = 0; i < numb; i++ ) + vr6_ = v6 + v6 + v6 + v6 + v6 + v6 + v6 + v6 + v6 + v6; + t2.stop(); + tm6_ = t2.getCPUCycles(); + + //pure c + + t3.start(); + for( int i = 0; i < numb; i++ ){ + for( unsigned int i = 0; i < v1.size(); ++i) + { + cvr1[ i ] = v1[ i ] + v1[ i ]; + } + } + t3.stop(); + ctm1 = t3.getCPUCycles(); + + t3.reset(); + t3.start(); + for( int i = 0; i < numb; i++ ){ + for( unsigned int i = 0; i < v2.size(); ++i) + { + cvr2[ i ] = v2[ i ] + v2[ i ]; + } + } + t3.stop(); + ctm2 = t3.getCPUCycles(); + + t3.reset(); + t3.start(); + for( int i = 0; i < numb; i++ ){ + for( unsigned int i = 0; i < v3.size(); ++i) + { + cvr3[ i ] = v3[ i ] + v3[ i ]; + } + } + t3.stop(); + ctm3 = t3.getCPUCycles(); + + t3.reset(); + t3.start(); + for( int i = 0; i < numb; i++ ){ + for( unsigned int i = 0; i < v4.size(); ++i) + { + cvr4[ i ] = v4[ i ] + v4[ i ]; + } + } + t3.stop(); + ctm4 = t3.getCPUCycles(); + + t3.reset(); + t3.start(); + for( int i = 0; i < numb; i++ ){ + for( unsigned int i = 0; i < v5.size(); ++i) + { + cvr5[ i ] = v5[ i ] + v5[ i ]; + } + } + t3.stop(); + ctm5 = t3.getCPUCycles(); + + t3.reset(); + t3.start(); + for( int i = 0; i < numb; i++ ){ + for( unsigned int i = 0; i < v6.size(); ++i) + { + cvr6[ i ] = v6[ i ] + v6[ i ]; + } + } + t3.stop(); + ctm6 = t3.getCPUCycles(); + + t3.reset(); + t3.start(); + for( int i = 0; i < numb; i++ ){ + for( unsigned int i = 0; i < v1.size(); ++i) + { + cvr1_[ i ] = v1[ i ] + v1[ i ] + v1[ i ] + v1[ i ] + v1[ i ] + v1[ i ] + v1[ i ] + v1[ i ] + v1[ i ] + v1[ i ]; + } + } + t3.stop(); + ctm1_ = t3.getCPUCycles(); + + t3.reset(); + t3.start(); + for( int i = 0; i < numb; i++ ){ + for( unsigned int i = 0; i < v2.size(); ++i) + { + cvr2_[ i ] = v2[ i ] + v2[ i ] + v2[ i ] + v2[ i ] + v2[ i ] + v2[ i ] + v2[ i ] + v2[ i ] + v2[ i ] + v2[ i ]; + } + } + t3.stop(); + ctm2_ = t3.getCPUCycles(); + + t3.reset(); + t3.start(); + for( int i = 0; i < numb; i++ ){ + for( unsigned int i = 0; i < v3.size(); ++i) + { + cvr3_[ i ] = v3[ i ] + v3[ i ] + v3[ i ] + v3[ i ] + v3[ i ] + v3[ i ] + v3[ i ] + v3[ i ] + v3[ i ] + v3[ i ]; + } + } + t3.stop(); + ctm3_ = t3.getCPUCycles(); + + t3.reset(); + t3.start(); + for( int i = 0; i < numb; i++ ){ + for( unsigned int i = 0; i < v4.size(); ++i) + { + cvr4_[ i ] = v4[ i ] + v4[ i ] + v4[ i ] + v4[ i ] + v4[ i ] + v4[ i ] + v4[ i ] + v4[ i ] + v4[ i ] + v4[ i ]; + } + } + t3.stop(); + ctm4_ = t3.getCPUCycles(); + + t3.reset(); + t3.start(); + for( int i = 0; i < numb; i++ ){ + for( unsigned int i = 0; i < v5.size(); ++i) + { + cvr5_[ i ] = v5[ i ] + v5[ i ] + v5[ i ] + v5[ i ] + v5[ i ] + v5[ i ] + v5[ i ] + v5[ i ] + v5[ i ] + v5[ i ]; + } + } + t3.stop(); + ctm5_ = t3.getCPUCycles(); + + t3.reset(); + t3.start(); + for( int i = 0; i < numb; i++ ){ + for( unsigned int i = 0; i < v6.size(); ++i) + { + cvr6_[ i ] = v6[ i ] + v6[ i ] + v6[ i ] + v6[ i ] + v6[ i ] + v6[ i ] + v6[ i ] + v6[ i ] + v6[ i ] + v6[ i ]; + } + } + t3.stop(); + ctm6_ = t3.getCPUCycles(); + + std::cout << std::fixed; + std::cout << std::setprecision(2); + + std::cout << "addition of 2 vectors" << std::endl; + std::cout << "size\t\t" << "10" << "\t\t" << "100" << "\t\t" << "500" << "\t\t" << "1000" << "\t\t" << "2000" << "\t\t" << "5000" << std::endl; + std::cout << "dynamic vectors" << "\t" << dtm1 << "\t" << dtm2 << "\t" << dtm3 << "\t" << dtm4 << "\t" << dtm5 << "\t" << dtm6 << std::endl; + std::cout << "overloaded ops." << "\t" << tm1 << "\t" << tm2 << "\t" << tm3 << "\t" << tm4 << "\t" << tm5 << "\t" << tm6 << std::endl; + std::cout << "pure c" << "\t\t" << ctm1 << "\t" << ctm2 << "\t" << ctm3 << "\t" << ctm4 << "\t" << ctm5 << "\t" << ctm6 << "\n" << std::endl; + + std::cout << "addition of 10 vectors" << std::endl; + std::cout << "size\t\t" << "10" << "\t\t" << "100" << "\t\t" << "500" << "\t\t" << "1000" << "\t\t" << "2000" << "\t\t" << "5000" << std::endl; + std::cout << "dynamic vectors" << "\t" << dtm1_ << "\t" << dtm2_ << "\t" << dtm3_ << "\t" << dtm4_ << "\t" << dtm5_ << "\t" << dtm6_ << std::endl; + std::cout << "overloaded ops." << "\t" << tm1_ << "\t" << tm2_ << "\t" << tm3_ << "\t" << tm4_ << "\t" << tm5_ << "\t" << tm6_ << std::endl; + std::cout << "pure c" << "\t\t" << ctm1_ << "\t" << ctm2_ << "\t" << ctm3_ << "\t" << ctm4_ << "\t" << ctm5_ << "\t" << ctm6_ << std::endl; + + return 0; +} -- GitLab From 6abbfef6c5f2b4f7fe9c94fdd91660a3fcbbd0be Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Thu, 18 Apr 2019 15:24:32 +0200 Subject: [PATCH 04/93] Moving ET source code to containers. --- .../Algorithms}/BinaryExpressionTemplate.h | 13 +- .../ExpressionTemplatesOperations.h | 7 +- .../Algorithms}/ExpressionVariableType.h | 4 + src/TNL/Containers/StaticVector.h | 61 +++++++- .../StaticVectorExpressions.h | 133 ------------------ 5 files changed, 76 insertions(+), 142 deletions(-) rename src/TNL/{Experimental/ExpressionTemplates => Containers/Algorithms}/BinaryExpressionTemplate.h (94%) rename src/TNL/{Experimental/ExpressionTemplates => Containers/Algorithms}/ExpressionTemplatesOperations.h (91%) rename src/TNL/{Experimental/ExpressionTemplates => Containers/Algorithms}/ExpressionVariableType.h (94%) diff --git a/src/TNL/Experimental/ExpressionTemplates/BinaryExpressionTemplate.h b/src/TNL/Containers/Algorithms/BinaryExpressionTemplate.h similarity index 94% rename from src/TNL/Experimental/ExpressionTemplates/BinaryExpressionTemplate.h rename to src/TNL/Containers/Algorithms/BinaryExpressionTemplate.h index 6390f5766..8cd390941 100644 --- a/src/TNL/Experimental/ExpressionTemplates/BinaryExpressionTemplate.h +++ b/src/TNL/Containers/Algorithms/BinaryExpressionTemplate.h @@ -10,11 +10,12 @@ #pragma once -#include -#include +#include +#include namespace TNL { - namespace ExpressionTemplates { + namespace Containers { + namespace Algorithms { template< typename T1, typename T2, @@ -124,6 +125,7 @@ struct BinaryExpressionTemplate< T1, T2, Operation, ArithmeticVariable, VectorVa }; +/* template< typename T1, typename T2 > BinaryExpressionTemplate< T1, T2, ExpressionTemplates::Addition > operator+( const T1 &a, const T2 &b ) { @@ -147,6 +149,7 @@ BinaryExpressionTemplate< T1, T2, ExpressionTemplates::Division > operator/( con { return BinaryExpressionTemplate< T1, T2, ExpressionTemplates::Division >( a, b ); } - - } //namespace ExpressionTemplates + */ + } //namespace Algorithms + } //namespace Containers } // namespace TNL \ No newline at end of file diff --git a/src/TNL/Experimental/ExpressionTemplates/ExpressionTemplatesOperations.h b/src/TNL/Containers/Algorithms/ExpressionTemplatesOperations.h similarity index 91% rename from src/TNL/Experimental/ExpressionTemplates/ExpressionTemplatesOperations.h rename to src/TNL/Containers/Algorithms/ExpressionTemplatesOperations.h index e0901a0ca..c1974ddb1 100644 --- a/src/TNL/Experimental/ExpressionTemplates/ExpressionTemplatesOperations.h +++ b/src/TNL/Containers/Algorithms/ExpressionTemplatesOperations.h @@ -11,7 +11,8 @@ #pragma once namespace TNL { - namespace ExpressionTemplates { + namespace Containers { + namespace Algorithms { template< typename T1, typename T2 > struct Addition @@ -49,6 +50,6 @@ struct Division } }; - - } // ExpressionTemplates + } //namespace Algorithms + } // namespace Containers } // namespace TNL \ No newline at end of file diff --git a/src/TNL/Experimental/ExpressionTemplates/ExpressionVariableType.h b/src/TNL/Containers/Algorithms/ExpressionVariableType.h similarity index 94% rename from src/TNL/Experimental/ExpressionTemplates/ExpressionVariableType.h rename to src/TNL/Containers/Algorithms/ExpressionVariableType.h index c353d8dd2..90c49b0e0 100644 --- a/src/TNL/Experimental/ExpressionTemplates/ExpressionVariableType.h +++ b/src/TNL/Containers/Algorithms/ExpressionVariableType.h @@ -13,6 +13,8 @@ #include namespace TNL { + namespace Containers { + namespace Algorithms { enum ExpressionVariableType { ArithmeticVariable, VectorVariable, OtherVariable }; @@ -67,4 +69,6 @@ struct ExpressionVariableTypeGetter< T, false, true > static constexpr ExpressionVariableType value = VectorVariable; }; + } //namespace Algorithms + } //namespace Containers } //namespace TNL diff --git a/src/TNL/Containers/StaticVector.h b/src/TNL/Containers/StaticVector.h index 954e7c553..b3b68ab5c 100644 --- a/src/TNL/Containers/StaticVector.h +++ b/src/TNL/Containers/StaticVector.h @@ -12,7 +12,6 @@ #include #include -#include namespace TNL { namespace Containers { @@ -637,6 +636,66 @@ __cuda_callable__ StaticVector< Size, Real > operator * ( const Scalar& c, const StaticVector< Size, Real >& u ); */ +#include + +template< int Size, typename Real, typename ET > +BinaryExpressionTemplate< StaticVector< Size, Real >, ET, ExpressionTemplates::Addition > +operator+( const StaticVector< Size, Real >& a, const ET& b ) +{ + return BinaryExpressionTemplate< StaticVector< Size, Real >, ET, ExpressionTemplates::Addition >( a, b ); +} + +template< typename ET, int Size, typename Real > +BinaryExpressionTemplate< ET, StaticVector< Size, Real >, ExpressionTemplates::Addition > +operator+( const ET& a, const StaticVector< Size, Real >& b ) +{ + return BinaryExpressionTemplate< ET, StaticVector< Size, Real >, ExpressionTemplates::Addition >( a, b ); +} + +template< int Size, typename Real, typename ET > +BinaryExpressionTemplate< StaticVector< Size, Real >, ET, ExpressionTemplates::Subtraction > +operator-( const StaticVector< Size, Real >& a, const ET& b ) +{ + return BinaryExpressionTemplate< StaticVector< Size, Real >, ET, ExpressionTemplates::Subtraction >( a, b ); +} + +template< typename ET, int Size, typename Real > +BinaryExpressionTemplate< ET, StaticVector< Size, Real >, ExpressionTemplates::Subtraction > +operator-( const ET& a, const StaticVector< Size, Real >& b ) +{ + return BinaryExpressionTemplate< ET, StaticVector< Size, Real >, ExpressionTemplates::Subtraction >( a, b ); +} + +template< int Size, typename Real, typename ET > +BinaryExpressionTemplate< StaticVector< Size, Real >, ET, ExpressionTemplates::Multiplication > +operator*( const StaticVector< Size, Real >& a, const ET& b ) +{ + return BinaryExpressionTemplate< StaticVector< Size, Real >, ET, ExpressionTemplates::Multiplication >( a, b ); +} + +template< typename ET, int Size, typename Real > +BinaryExpressionTemplate< ET, StaticVector< Size, Real >, ExpressionTemplates::Multiplication > +operator*( const ET& a, const StaticVector< Size, Real >& b ) +{ + return BinaryExpressionTemplate< ET, StaticVector< Size, Real >, ExpressionTemplates::Multiplication >( a, b ); +} + +template< int Size, typename Real, typename ET > +BinaryExpressionTemplate< StaticVector< Size, Real >, ET, ExpressionTemplates::Division > +operator/( const StaticVector< Size, Real >& a, const ET& b ) +{ + return BinaryExpressionTemplate< StaticVector< Size, Real >, ET, ExpressionTemplates::Division >( a, b ); +} + +template< typename ET, int Size, typename Real > +BinaryExpressionTemplate< ET, StaticVector< Size, Real >, ExpressionTemplates::Division > +operator/( const ET& a, const StaticVector< Size, Real >& b ) +{ + return BinaryExpressionTemplate< ET, StaticVector< Size, Real >, ExpressionTemplates::Division >( a, b ); +} + + + template< int Size, typename Real > __cuda_callable__ StaticVector< Size, Real > abs( const StaticVector< Size, Real >& u ) { return u.abs(); }; diff --git a/src/TNL/Experimental/ExpressionTemplates/StaticVectorExpressions.h b/src/TNL/Experimental/ExpressionTemplates/StaticVectorExpressions.h index 136befb99..fbc5cdfc3 100644 --- a/src/TNL/Experimental/ExpressionTemplates/StaticVectorExpressions.h +++ b/src/TNL/Experimental/ExpressionTemplates/StaticVectorExpressions.h @@ -19,139 +19,6 @@ namespace TNL { -template< typename T1, - typename T2, - template< typename, typename > class Operation, - ExpressionVariableType T1Type = ExpressionVariableTypeGetter< T1 >::value, - ExpressionVariableType T2Type = ExpressionVariableTypeGetter< T2 >::value > -struct BinaryExpressionTemplate -{ - BinaryExpressionTemplate( const T1& a, const T2& b ){}; - - static T1 evaluate( const T1& a, const T2& b ) - { - return Operation< T1, T2 >::evaluate( a, b ); - } -}; - -template< typename T1, - typename T2, - template< typename, typename > class Operation > -struct BinaryExpressionTemplate< T1, T2, Operation, VectorVariable, VectorVariable > -{ - using RealType = typename T1::RealType; - using IsExpressionTemplate = bool; - - BinaryExpressionTemplate( const T1& a, const T2& b ): op1( a ), op2( b ){} - - static BinaryExpressionTemplate evaluate( const T1& a, const T2& b ) - { - return BinaryExpressionTemplate( a, b ); - } - - RealType operator[]( const int i ) const - { - return Operation< typename T1::RealType, typename T2::RealType >::evaluate( op1[ i ], op2[ i ] ); - } - - int getSize() const - { - return op1.getSize(); - } - - protected: - const T1 &op1; - const T2 &op2; - -}; - -template< typename T1, - typename T2, - template< typename, typename > class Operation > -struct BinaryExpressionTemplate< T1, T2, Operation, VectorVariable, ArithmeticVariable > -{ - using RealType = typename T1::RealType; - using IsExpressionTemplate = bool; - - BinaryExpressionTemplate( const T1& a, const T2& b ): op1( a ), op2( b ){} - - BinaryExpressionTemplate evaluate( const T1& a, const T2& b ) - { - return BinaryExpressionTemplate( a, b ); - } - - RealType operator[]( const int i ) const - { - return Operation< typename T1::RealType, T2 >::evaluate( op1[ i ], op2 ); - } - - int getSize() const - { - return op1.getSize(); - } - - protected: - const T1 &op1; - const T2 &op2; - -}; - -template< typename T1, - typename T2, - template< typename, typename > class Operation > -struct BinaryExpressionTemplate< T1, T2, Operation, ArithmeticVariable, VectorVariable > -{ - using RealType = typename T2::RealType; - using IsExpressionTemplate = bool; - - BinaryExpressionTemplate( const T1& a, const T2& b ): op1( a ), op2( b ){} - - BinaryExpressionTemplate evaluate( const T1& a, const T2& b ) - { - return BinaryExpressionTemplate( a, b ); - } - - RealType operator[]( const int i ) const - { - return Operation< T1, typename T2::RealType >::evaluate( op1, op2[ i ] ); - } - - int getSize() const - { - return op2.getSize(); - } - - protected: - const T1& op1; - const T2& op2; - -}; - - -template< typename T1, typename T2 > -BinaryExpressionTemplate< T1, T2, ExpressionTemplates::Addition > operator+( const T1 &a, const T2 &b ) -{ - return BinaryExpressionTemplate< T1, T2, ExpressionTemplates::Addition >( a, b ); -} - -template< typename T1, typename T2 > -BinaryExpressionTemplate< T1, T2, ExpressionTemplates::Subtraction > operator-( const T1 &a, const T2 &b ) -{ - return BinaryExpressionTemplate< T1, T2, ExpressionTemplates::Subtraction >( a, b ); -} - -template< typename T1, typename T2 > -BinaryExpressionTemplate< T1, T2, ExpressionTemplates::Multiplication > operator*( const T1 &a, const T2 &b ) -{ - return BinaryExpressionTemplate< T1, T2, ExpressionTemplates::Multiplication >( a, b ); -} - -template< typename T1, typename T2 > -BinaryExpressionTemplate< T1, T2, ExpressionTemplates::Division > operator/( const T1 &a, const T2 &b ) -{ - return BinaryExpressionTemplate< T1, T2, ExpressionTemplates::Division >( a, b ); -} - template< typename T1 > -- GitLab From 8e50a7781e6e8a027d075df28eb6d6981bc9dc6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Fri, 19 Apr 2019 20:00:28 +0200 Subject: [PATCH 05/93] [WIP] Implementing expression templates for static vectors. --- .../Algorithms/BinaryExpressionTemplate.h | 155 ------ .../Expressions/BinaryExpressionTemplate.h | 443 ++++++++++++++++++ .../ExpressionTemplatesOperations.h | 5 +- .../ExpressionVariableType.h | 13 +- .../Containers/Expressions/StaticComparison.h | 75 +++ src/TNL/Containers/StaticVector.h | 84 +--- src/TNL/Containers/StaticVector1D_impl.h | 17 +- src/TNL/Containers/StaticVector2D_impl.h | 17 +- src/TNL/Containers/StaticVector3D_impl.h | 18 +- src/TNL/Containers/StaticVectorExpressions.h | 262 +++++++++++ src/TNL/Containers/StaticVector_impl.h | 17 +- src/TNL/Containers/Vector.h | 84 +++- src/TNL/Containers/Vector.hpp | 94 +++- .../StaticVectorExpressions.h | 4 +- .../DistributedMeshes/DistributedGrid.hpp | 2 +- src/UnitTests/Containers/VectorTest.h | 53 +++ .../ExpressionTemplatesStaticTest.cpp | 29 +- 17 files changed, 1121 insertions(+), 251 deletions(-) delete mode 100644 src/TNL/Containers/Algorithms/BinaryExpressionTemplate.h create mode 100644 src/TNL/Containers/Expressions/BinaryExpressionTemplate.h rename src/TNL/Containers/{Algorithms => Expressions}/ExpressionTemplatesOperations.h (95%) rename src/TNL/Containers/{Algorithms => Expressions}/ExpressionVariableType.h (85%) create mode 100644 src/TNL/Containers/Expressions/StaticComparison.h create mode 100644 src/TNL/Containers/StaticVectorExpressions.h diff --git a/src/TNL/Containers/Algorithms/BinaryExpressionTemplate.h b/src/TNL/Containers/Algorithms/BinaryExpressionTemplate.h deleted file mode 100644 index 8cd390941..000000000 --- a/src/TNL/Containers/Algorithms/BinaryExpressionTemplate.h +++ /dev/null @@ -1,155 +0,0 @@ -/*************************************************************************** - BinaryExpressionTemplate.h - description - ------------------- - begin : Apr 18, 2019 - copyright : (C) 2019 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/* See Copyright Notice in tnl/Copyright */ - -#pragma once - -#include -#include - -namespace TNL { - namespace Containers { - namespace Algorithms { - -template< typename T1, - typename T2, - template< typename, typename > class Operation, - ExpressionVariableType T1Type = ExpressionVariableTypeGetter< T1 >::value, - ExpressionVariableType T2Type = ExpressionVariableTypeGetter< T2 >::value > -struct BinaryExpressionTemplate -{ - BinaryExpressionTemplate( const T1& a, const T2& b ){}; - - static T1 evaluate( const T1& a, const T2& b ) - { - return Operation< T1, T2 >::evaluate( a, b ); - } -}; - -template< typename T1, - typename T2, - template< typename, typename > class Operation > -struct BinaryExpressionTemplate< T1, T2, Operation, VectorVariable, VectorVariable > -{ - using RealType = typename T1::RealType; - using IsExpressionTemplate = bool; - - BinaryExpressionTemplate( const T1& a, const T2& b ): op1( a ), op2( b ){} - - static BinaryExpressionTemplate evaluate( const T1& a, const T2& b ) - { - return BinaryExpressionTemplate( a, b ); - } - - RealType operator[]( const int i ) const - { - return Operation< typename T1::RealType, typename T2::RealType >::evaluate( op1[ i ], op2[ i ] ); - } - - int getSize() const - { - return op1.getSize(); - } - - protected: - const T1 &op1; - const T2 &op2; - -}; - -template< typename T1, - typename T2, - template< typename, typename > class Operation > -struct BinaryExpressionTemplate< T1, T2, Operation, VectorVariable, ArithmeticVariable > -{ - using RealType = typename T1::RealType; - using IsExpressionTemplate = bool; - - BinaryExpressionTemplate( const T1& a, const T2& b ): op1( a ), op2( b ){} - - BinaryExpressionTemplate evaluate( const T1& a, const T2& b ) - { - return BinaryExpressionTemplate( a, b ); - } - - RealType operator[]( const int i ) const - { - return Operation< typename T1::RealType, T2 >::evaluate( op1[ i ], op2 ); - } - - int getSize() const - { - return op1.getSize(); - } - - protected: - const T1 &op1; - const T2 &op2; - -}; - -template< typename T1, - typename T2, - template< typename, typename > class Operation > -struct BinaryExpressionTemplate< T1, T2, Operation, ArithmeticVariable, VectorVariable > -{ - using RealType = typename T2::RealType; - using IsExpressionTemplate = bool; - - BinaryExpressionTemplate( const T1& a, const T2& b ): op1( a ), op2( b ){} - - BinaryExpressionTemplate evaluate( const T1& a, const T2& b ) - { - return BinaryExpressionTemplate( a, b ); - } - - RealType operator[]( const int i ) const - { - return Operation< T1, typename T2::RealType >::evaluate( op1, op2[ i ] ); - } - - int getSize() const - { - return op2.getSize(); - } - - protected: - const T1& op1; - const T2& op2; - -}; - -/* -template< typename T1, typename T2 > -BinaryExpressionTemplate< T1, T2, ExpressionTemplates::Addition > operator+( const T1 &a, const T2 &b ) -{ - return BinaryExpressionTemplate< T1, T2, ExpressionTemplates::Addition >( a, b ); -} - -template< typename T1, typename T2 > -BinaryExpressionTemplate< T1, T2, ExpressionTemplates::Subtraction > operator-( const T1 &a, const T2 &b ) -{ - return BinaryExpressionTemplate< T1, T2, ExpressionTemplates::Subtraction >( a, b ); -} - -template< typename T1, typename T2 > -BinaryExpressionTemplate< T1, T2, ExpressionTemplates::Multiplication > operator*( const T1 &a, const T2 &b ) -{ - return BinaryExpressionTemplate< T1, T2, ExpressionTemplates::Multiplication >( a, b ); -} - -template< typename T1, typename T2 > -BinaryExpressionTemplate< T1, T2, ExpressionTemplates::Division > operator/( const T1 &a, const T2 &b ) -{ - return BinaryExpressionTemplate< T1, T2, ExpressionTemplates::Division >( a, b ); -} - */ - } //namespace Algorithms - } //namespace Containers -} // namespace TNL \ No newline at end of file diff --git a/src/TNL/Containers/Expressions/BinaryExpressionTemplate.h b/src/TNL/Containers/Expressions/BinaryExpressionTemplate.h new file mode 100644 index 000000000..cc5830c79 --- /dev/null +++ b/src/TNL/Containers/Expressions/BinaryExpressionTemplate.h @@ -0,0 +1,443 @@ +/*************************************************************************** + BinaryExpressionTemplate.h - description + ------------------- + begin : Apr 18, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include +#include +#include +#include + +namespace TNL { + namespace Containers { + namespace Expressions { + +template< typename T1, + typename T2, + template< typename, typename > class Operation, + ExpressionVariableType T1Type = ExpressionVariableTypeGetter< T1 >::value, + ExpressionVariableType T2Type = ExpressionVariableTypeGetter< T2 >::value > +struct BinaryExpressionTemplate +{ + /*BinaryExpressionTemplate( const T1& a, const T2& b ){}; + + static T1 evaluate( const T1& a, const T2& b ) + { + return Operation< T1, T2 >::evaluate( a, b ); + }*/ +}; + +template< typename T1, + typename T2, + template< typename, typename > class Operation > +struct BinaryExpressionTemplate< T1, T2, Operation, VectorVariable, VectorVariable > +{ + using RealType = typename T1::RealType; + using IsExpressionTemplate = bool; + + BinaryExpressionTemplate( const T1& a, const T2& b ): op1( a ), op2( b ){} + + static BinaryExpressionTemplate evaluate( const T1& a, const T2& b ) + { + return BinaryExpressionTemplate( a, b ); + } + + RealType operator[]( const int i ) const + { + return Operation< typename T1::RealType, typename T2::RealType >::evaluate( op1[ i ], op2[ i ] ); + } + + int getSize() const + { + return op1.getSize(); + } + + protected: + const T1 &op1; + const T2 &op2; + +}; + +template< typename T1, + typename T2, + template< typename, typename > class Operation > +struct BinaryExpressionTemplate< T1, T2, Operation, VectorVariable, ArithmeticVariable > +{ + using RealType = typename T1::RealType; + using IsExpressionTemplate = bool; + + BinaryExpressionTemplate( const T1& a, const T2& b ): op1( a ), op2( b ){} + + BinaryExpressionTemplate evaluate( const T1& a, const T2& b ) + { + return BinaryExpressionTemplate( a, b ); + } + + RealType operator[]( const int i ) const + { + return Operation< typename T1::RealType, T2 >::evaluate( op1[ i ], op2 ); + } + + int getSize() const + { + return op1.getSize(); + } + + protected: + const T1 &op1; + const T2 &op2; + +}; + +template< typename T1, + typename T2, + template< typename, typename > class Operation > +struct BinaryExpressionTemplate< T1, T2, Operation, ArithmeticVariable, VectorVariable > +{ + using RealType = typename T2::RealType; + using IsExpressionTemplate = bool; + + BinaryExpressionTemplate( const T1& a, const T2& b ): op1( a ), op2( b ){} + + BinaryExpressionTemplate evaluate( const T1& a, const T2& b ) + { + return BinaryExpressionTemplate( a, b ); + } + + RealType operator[]( const int i ) const + { + return Operation< T1, typename T2::RealType >::evaluate( op1, op2[ i ] ); + } + + int getSize() const + { + return op2.getSize(); + } + + protected: + const T1& op1; + const T2& op2; +}; + +//// +// Binary expressions addition +template< typename L1, + typename L2, + template< typename, typename > class LOperation, + typename R1, + typename R2, + template< typename, typename > class ROperation > +const Expressions::BinaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, + Expressions::Addition > +operator + ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, + const Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) +{ + return Expressions::BinaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Addition >( a, b ); +} + +template< typename T1, + typename T2, + template< typename, typename > class Operation > +const Expressions::BinaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< T1, T2, Operation >, + typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType, + Expressions::Addition > +operator + ( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, + const typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) +{ + return Expressions::BinaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< T1, T2, Operation >, + typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType, + Expressions::Addition >( a, b ); +} + +//// +// Binary expression subtraction +template< typename L1, + typename L2, + template< typename, typename > class LOperation, + typename R1, + typename R2, + template< typename, typename > class ROperation > +const Expressions::BinaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, + Expressions::Subtraction > +operator - ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, + const Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) +{ + return Expressions::BinaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Subtraction >( a, b ); +} + +template< typename T1, + typename T2, + template< typename, typename > class Operation > +const Expressions::BinaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< T1, T2, Operation >, + typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType, + Expressions::Subtraction > +operator - ( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, + const typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) +{ + return Expressions::BinaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< T1, T2, Operation >, + typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType, + Expressions::Subtraction >( a, b ); +} + +//// +// Binary expression multiplication +template< typename L1, + typename L2, + template< typename, typename > class LOperation, + typename R1, + typename R2, + template< typename, typename > class ROperation > +const Expressions::BinaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, + Expressions::Multiplication > +operator * ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, + const Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) +{ + return Expressions::BinaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Multiplication >( a, b ); +} + +template< typename T1, + typename T2, + template< typename, typename > class Operation > +const Expressions::BinaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< T1, T2, Operation >, + typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType, + Expressions::Multiplication > +operator * ( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, + const typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) +{ + return Expressions::BinaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< T1, T2, Operation >, + typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType, + Expressions::Multiplication >( a, b ); +} + +//// +// Binary expression division +template< typename L1, + typename L2, + template< typename, typename > class LOperation, + typename R1, + typename R2, + template< typename, typename > class ROperation > +const Expressions::BinaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, + Expressions::Division > +operator / ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, + const Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) +{ + return Expressions::BinaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Division >( a, b ); +} + +template< typename T1, + typename T2, + template< typename, typename > class Operation > +const Expressions::BinaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< T1, T2, Operation >, + typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType, + Expressions::Division > +operator / ( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, + const typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) +{ + return Expressions::BinaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< T1, T2, Operation >, + typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType, + Expressions::Division >( a, b ); +} + +//// +// Comparison operator == +template< typename L1, + typename L2, + template< typename, typename > class LOperation, + typename R1, + typename R2, + template< typename, typename > class ROperation > +bool +operator == ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, + const Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) +{ + return Expressions::StaticComparisonEQ( a, b ); +} + +template< typename T1, + typename T2, + template< typename, typename > class Operation > +bool +operator == ( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, + const typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) +{ + return Expressions::StaticComparisonEQ( a, b ); +} + +//// +// Comparison operator != +template< typename L1, + typename L2, + template< typename, typename > class LOperation, + typename R1, + typename R2, + template< typename, typename > class ROperation > +bool +operator != ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, + const Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) +{ + return Expressions::StaticComparisonNE( a, b ); +} + +template< typename T1, + typename T2, + template< typename, typename > class Operation > +bool +operator != ( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, + const typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) +{ + return Expressions::StaticComparisonNE( a, b ); +} + +//// +// Comparison operator < +template< typename L1, + typename L2, + template< typename, typename > class LOperation, + typename R1, + typename R2, + template< typename, typename > class ROperation > +bool +operator < ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, + const Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) +{ + return Expressions::StaticComparisonLT( a, b ); +} + +template< typename T1, + typename T2, + template< typename, typename > class Operation > +bool +operator < ( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, + const typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) +{ + return Expressions::StaticComparisonLT( a, b ); +} + +//// +// Comparison operator <= +template< typename L1, + typename L2, + template< typename, typename > class LOperation, + typename R1, + typename R2, + template< typename, typename > class ROperation > +bool +operator <= ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, + const Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) +{ + return Expressions::StaticComparisonLE( a, b ); +} + +template< typename T1, + typename T2, + template< typename, typename > class Operation > +bool +operator <= ( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, + const typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) +{ + return Expressions::StaticComparisonLE( a, b ); +} + +//// +// Comparison operator > +template< typename L1, + typename L2, + template< typename, typename > class LOperation, + typename R1, + typename R2, + template< typename, typename > class ROperation > +bool +operator > ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, + const Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) +{ + return Expressions::StaticComparisonGT( a, b ); +} + +template< typename T1, + typename T2, + template< typename, typename > class Operation > +bool +operator > ( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, + const typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) +{ + return Expressions::StaticComparisonGT( a, b ); +} + +//// +// Comparison operator >= +template< typename L1, + typename L2, + template< typename, typename > class LOperation, + typename R1, + typename R2, + template< typename, typename > class ROperation > +bool +operator >= ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, + const Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) +{ + return Expressions::StaticComparisonGE( a, b ); +} + +template< typename T1, + typename T2, + template< typename, typename > class Operation > +bool +operator >= ( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, + const typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) +{ + return Expressions::StaticComparisonGE( a, b ); +} + +//// +// Output stream +template< typename T1, + typename T2, + template< typename, typename > class Operation > +std::ostream& operator << ( std::ostream& str, const BinaryExpressionTemplate< T1, T2, Operation >& expression ) +{ + str << "[ "; + for( int i = 0; i < expression.getSize() - 1; i++ ) + str << expression[ i ] << " "; + str << expression[ expression.getSize() - 1 ] << " ]"; + return str; +} + + } //namespace Expressions + } //namespace Containers +} // namespace TNL diff --git a/src/TNL/Containers/Algorithms/ExpressionTemplatesOperations.h b/src/TNL/Containers/Expressions/ExpressionTemplatesOperations.h similarity index 95% rename from src/TNL/Containers/Algorithms/ExpressionTemplatesOperations.h rename to src/TNL/Containers/Expressions/ExpressionTemplatesOperations.h index c1974ddb1..4e10db348 100644 --- a/src/TNL/Containers/Algorithms/ExpressionTemplatesOperations.h +++ b/src/TNL/Containers/Expressions/ExpressionTemplatesOperations.h @@ -12,7 +12,7 @@ namespace TNL { namespace Containers { - namespace Algorithms { + namespace Expressions { template< typename T1, typename T2 > struct Addition @@ -49,7 +49,6 @@ struct Division return a / b; } }; - - } //namespace Algorithms + } //namespace Expressions } // namespace Containers } // namespace TNL \ No newline at end of file diff --git a/src/TNL/Containers/Algorithms/ExpressionVariableType.h b/src/TNL/Containers/Expressions/ExpressionVariableType.h similarity index 85% rename from src/TNL/Containers/Algorithms/ExpressionVariableType.h rename to src/TNL/Containers/Expressions/ExpressionVariableType.h index 90c49b0e0..bc3d2b5f2 100644 --- a/src/TNL/Containers/Algorithms/ExpressionVariableType.h +++ b/src/TNL/Containers/Expressions/ExpressionVariableType.h @@ -14,10 +14,15 @@ namespace TNL { namespace Containers { - namespace Algorithms { + +template< int Size, typename Real > +class StaticVector; + + namespace Expressions { enum ExpressionVariableType { ArithmeticVariable, VectorVariable, OtherVariable }; + /** * SFINAE for checking if T has getSize method */ @@ -32,7 +37,7 @@ private: template< typename C > static NoType& test(...); public: - static constexpr bool value = ( sizeof( test< T >(0) ) == sizeof( YesType ) ); + static constexpr bool value = ( sizeof( test< typename std::remove_reference< T >::type >(0) ) == sizeof( YesType ) ); }; @@ -44,7 +49,7 @@ struct IsVectorType template< int Size, typename Real > -struct IsVectorType< Containers::StaticVector< Size, Real > > +struct IsVectorType< StaticVector< Size, Real > > { static constexpr bool value = true; }; @@ -69,6 +74,6 @@ struct ExpressionVariableTypeGetter< T, false, true > static constexpr ExpressionVariableType value = VectorVariable; }; - } //namespace Algorithms + } //namespace Expressions } //namespace Containers } //namespace TNL diff --git a/src/TNL/Containers/Expressions/StaticComparison.h b/src/TNL/Containers/Expressions/StaticComparison.h new file mode 100644 index 000000000..c427a95b9 --- /dev/null +++ b/src/TNL/Containers/Expressions/StaticComparison.h @@ -0,0 +1,75 @@ +/*************************************************************************** + StaticComparison.h - description + ------------------- + begin : Apr 19, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include + +namespace TNL { + namespace Containers { + namespace Expressions { + +template< typename T1, + typename T2 > +bool StaticComparisonEQ( const T1& a, const T2& b ) +{ + TNL_ASSERT_EQ( a.getSize(), b.getSize(), "Sizes of expressions to be compared do not fit." ); + for( int i = 0; i < a.getSize(); i++ ) + if( a[ i ] != b[ i ] ) + return false; + return true; +} + +template< typename T1, + typename T2 > +bool StaticComparisonNE( const T1& a, const T2& b ) +{ + return ! StaticComparisonEQ( a, b ); +} + +template< typename T1, + typename T2 > +bool StaticComparisonGT( const T1& a, const T2& b ) +{ + TNL_ASSERT_EQ( a.getSize(), b.getSize(), "Sizes of expressions to be compared do not fit." ); + for( int i = 0; i < a.getSize(); i++ ) + if( a[ i ] <= b[ i ] ) + return false; + return true; +} + +template< typename T1, + typename T2 > +bool StaticComparisonLE( const T1& a, const T2& b ) +{ + return ! StaticComparisonGT( a, b ); +} + +template< typename T1, + typename T2 > +bool StaticComparisonLT( const T1& a, const T2& b ) +{ + TNL_ASSERT_EQ( a.getSize(), b.getSize(), "Sizes of expressions to be compared do not fit." ); + for( int i = 0; i < a.getSize(); i++ ) + if( a[ i ] >= b[ i ] ) + return false; + return true; +} + +template< typename T1, + typename T2 > +bool StaticComparisonGE( const T1& a, const T2& b ) +{ + return ! StaticComparisonLT( a, b ); +} + + } //namespace Expressions + } // namespace Containers +} // namespace TNL \ No newline at end of file diff --git a/src/TNL/Containers/StaticVector.h b/src/TNL/Containers/StaticVector.h index b3b68ab5c..f754fe91f 100644 --- a/src/TNL/Containers/StaticVector.h +++ b/src/TNL/Containers/StaticVector.h @@ -12,10 +12,13 @@ #include #include +#include namespace TNL { namespace Containers { + + /** * \brief Vector with constant size. * @@ -70,6 +73,11 @@ class StaticVector : public StaticArray< Size, Real > StaticVector( const std::initializer_list< Real > &elems ); + template< typename T1, + typename T2, + template< typename, typename > class Operation > + StaticVector( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& op ); + /** * \brief Sets up a new (vector) parameter which means it can have more elements. * @@ -279,6 +287,11 @@ class StaticVector< 1, Real > : public StaticArray< 1, Real > StaticVector( const std::initializer_list< Real > &elems ); + template< typename T1, + typename T2, + template< typename, typename > class Operation > + StaticVector( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& op ); + bool setup( const Config::ParameterContainer& parameters, const String& prefix = "" ); @@ -410,6 +423,11 @@ class StaticVector< 2, Real > : public StaticArray< 2, Real > StaticVector( const std::initializer_list< Real > &elems ); + template< typename T1, + typename T2, + template< typename, typename > class Operation > + StaticVector( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& op ); + bool setup( const Config::ParameterContainer& parameters, const String& prefix = "" ); @@ -542,6 +560,12 @@ class StaticVector< 3, Real > : public StaticArray< 3, Real > StaticVector( const std::initializer_list< Real > &elems ); + template< typename T1, + typename T2, + template< typename, typename > class Operation > + StaticVector( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& op ); + + bool setup( const Config::ParameterContainer& parameters, const String& prefix = "" ); @@ -636,66 +660,6 @@ __cuda_callable__ StaticVector< Size, Real > operator * ( const Scalar& c, const StaticVector< Size, Real >& u ); */ -#include - -template< int Size, typename Real, typename ET > -BinaryExpressionTemplate< StaticVector< Size, Real >, ET, ExpressionTemplates::Addition > -operator+( const StaticVector< Size, Real >& a, const ET& b ) -{ - return BinaryExpressionTemplate< StaticVector< Size, Real >, ET, ExpressionTemplates::Addition >( a, b ); -} - -template< typename ET, int Size, typename Real > -BinaryExpressionTemplate< ET, StaticVector< Size, Real >, ExpressionTemplates::Addition > -operator+( const ET& a, const StaticVector< Size, Real >& b ) -{ - return BinaryExpressionTemplate< ET, StaticVector< Size, Real >, ExpressionTemplates::Addition >( a, b ); -} - -template< int Size, typename Real, typename ET > -BinaryExpressionTemplate< StaticVector< Size, Real >, ET, ExpressionTemplates::Subtraction > -operator-( const StaticVector< Size, Real >& a, const ET& b ) -{ - return BinaryExpressionTemplate< StaticVector< Size, Real >, ET, ExpressionTemplates::Subtraction >( a, b ); -} - -template< typename ET, int Size, typename Real > -BinaryExpressionTemplate< ET, StaticVector< Size, Real >, ExpressionTemplates::Subtraction > -operator-( const ET& a, const StaticVector< Size, Real >& b ) -{ - return BinaryExpressionTemplate< ET, StaticVector< Size, Real >, ExpressionTemplates::Subtraction >( a, b ); -} - -template< int Size, typename Real, typename ET > -BinaryExpressionTemplate< StaticVector< Size, Real >, ET, ExpressionTemplates::Multiplication > -operator*( const StaticVector< Size, Real >& a, const ET& b ) -{ - return BinaryExpressionTemplate< StaticVector< Size, Real >, ET, ExpressionTemplates::Multiplication >( a, b ); -} - -template< typename ET, int Size, typename Real > -BinaryExpressionTemplate< ET, StaticVector< Size, Real >, ExpressionTemplates::Multiplication > -operator*( const ET& a, const StaticVector< Size, Real >& b ) -{ - return BinaryExpressionTemplate< ET, StaticVector< Size, Real >, ExpressionTemplates::Multiplication >( a, b ); -} - -template< int Size, typename Real, typename ET > -BinaryExpressionTemplate< StaticVector< Size, Real >, ET, ExpressionTemplates::Division > -operator/( const StaticVector< Size, Real >& a, const ET& b ) -{ - return BinaryExpressionTemplate< StaticVector< Size, Real >, ET, ExpressionTemplates::Division >( a, b ); -} - -template< typename ET, int Size, typename Real > -BinaryExpressionTemplate< ET, StaticVector< Size, Real >, ExpressionTemplates::Division > -operator/( const ET& a, const StaticVector< Size, Real >& b ) -{ - return BinaryExpressionTemplate< ET, StaticVector< Size, Real >, ExpressionTemplates::Division >( a, b ); -} - - - template< int Size, typename Real > __cuda_callable__ StaticVector< Size, Real > abs( const StaticVector< Size, Real >& u ) { return u.abs(); }; diff --git a/src/TNL/Containers/StaticVector1D_impl.h b/src/TNL/Containers/StaticVector1D_impl.h index 0349bf0e1..e4e3d8efd 100644 --- a/src/TNL/Containers/StaticVector1D_impl.h +++ b/src/TNL/Containers/StaticVector1D_impl.h @@ -11,8 +11,8 @@ #pragma once #include -#include #include +#include namespace TNL { namespace Containers { @@ -51,6 +51,15 @@ StaticVector< 1, Real >::StaticVector( const std::initializer_list< Real > &elem { } +template< typename Real > + template< typename T1, + typename T2, + template< typename, typename > class Operation > +StaticVector< 1, Real >::StaticVector( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& op ) +{ + Algorithms::VectorAssignment< StaticVector< 1, Real >, Expressions::BinaryExpressionTemplate< T1, T2, Operation > >::assign( *this, op ); +}; + template< typename Real > bool StaticVector< 1, Real >::setup( const Config::ParameterContainer& parameters, @@ -71,11 +80,11 @@ String StaticVector< 1, Real >::getType() } template< typename Real > - template< typename StaticVector_ > + template< typename RHS > StaticVector< 1, Real >& -StaticVector< 1, Real >::operator =( const StaticVector_& v ) +StaticVector< 1, Real >::operator =( const RHS& rhs ) { - Algorithms::VectorAssignment< StaticVector< 1, Real >, StaticVector_ >::assign( *this, v ); + Algorithms::VectorAssignment< StaticVector< 1, Real >, RHS >::assign( *this, rhs ); return *this; } diff --git a/src/TNL/Containers/StaticVector2D_impl.h b/src/TNL/Containers/StaticVector2D_impl.h index d23caba03..5f8697bf5 100644 --- a/src/TNL/Containers/StaticVector2D_impl.h +++ b/src/TNL/Containers/StaticVector2D_impl.h @@ -11,7 +11,7 @@ #pragma once #include -#include +#include namespace TNL { namespace Containers { @@ -57,6 +57,15 @@ StaticVector< 2, Real >::StaticVector( const std::initializer_list< Real > &elem { } +template< typename Real > + template< typename T1, + typename T2, + template< typename, typename > class Operation > +StaticVector< 2, Real >::StaticVector( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& op ) +{ + Algorithms::VectorAssignment< StaticVector< 2, Real >, Expressions::BinaryExpressionTemplate< T1, T2, Operation > >::assign( *this, op ); +}; + template< typename Real > bool StaticVector< 2, Real >::setup( const Config::ParameterContainer& parameters, @@ -78,11 +87,11 @@ String StaticVector< 2, Real >::getType() } template< typename Real > - template< typename StaticVector_ > + template< typename RHS > StaticVector< 2, Real >& -StaticVector< 2, Real >::operator =( const StaticVector_& v ) +StaticVector< 2, Real >::operator =( const RHS& rhs ) { - Algorithms::VectorAssignment< StaticVector< 2, Real >, StaticVector_ >::assign( *this, v ); + Algorithms::VectorAssignment< StaticVector< 2, Real >, RHS >::assign( *this, rhs ); return *this; } diff --git a/src/TNL/Containers/StaticVector3D_impl.h b/src/TNL/Containers/StaticVector3D_impl.h index 97a272a97..5bb78d98b 100644 --- a/src/TNL/Containers/StaticVector3D_impl.h +++ b/src/TNL/Containers/StaticVector3D_impl.h @@ -11,7 +11,7 @@ #pragma once #include -#include +#include namespace TNL { namespace Containers { @@ -57,6 +57,16 @@ StaticVector< 3, Real >::StaticVector( const std::initializer_list< Real > &elem { } +template< typename Real > + template< typename T1, + typename T2, + template< typename, typename > class Operation > +StaticVector< 3, Real >::StaticVector( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& op ) +{ + Algorithms::VectorAssignment< StaticVector< 3, Real >, Expressions::BinaryExpressionTemplate< T1, T2, Operation > >::assign( *this, op ); +}; + + template< typename Real > bool StaticVector< 3, Real >::setup( const Config::ParameterContainer& parameters, @@ -79,11 +89,11 @@ String StaticVector< 3, Real >::getType() } template< typename Real > - template< typename StaticVector_ > + template< typename RHS > StaticVector< 3, Real >& -StaticVector< 3, Real >::operator =(const StaticVector_& v) +StaticVector< 3, Real >::operator =( const RHS& rhs ) { - Algorithms::VectorAssignment< StaticVector< 3, Real >, StaticVector_ >::assign( *this, v ); + Algorithms::VectorAssignment< StaticVector< 3, Real >, RHS >::assign( *this, rhs ); return *this; } diff --git a/src/TNL/Containers/StaticVectorExpressions.h b/src/TNL/Containers/StaticVectorExpressions.h new file mode 100644 index 000000000..2a0ce9f71 --- /dev/null +++ b/src/TNL/Containers/StaticVectorExpressions.h @@ -0,0 +1,262 @@ +/*************************************************************************** + StaticVectorExpressions.h - description + ------------------- + begin : Apr 19, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include +#include +#include + +#include "Expressions/StaticComparison.h" + +namespace TNL { + namespace Containers { + +//// +// Addition +template< int Size, typename Real, typename ET > +const Expressions::BinaryExpressionTemplate< StaticVector< Size, Real >, ET, Expressions::Addition > +operator+( const StaticVector< Size, Real >& a, const ET& b ) +{ + return Expressions::BinaryExpressionTemplate< StaticVector< Size, Real >, ET, Expressions::Addition >( a, b ); +} + +template< typename ET, int Size, typename Real > +const Expressions::BinaryExpressionTemplate< ET, StaticVector< Size, Real >, Expressions::Addition > +operator+( const ET& a, const StaticVector< Size, Real >& b ) +{ + return Expressions::BinaryExpressionTemplate< ET, StaticVector< Size, Real >, Expressions::Addition >( a, b ); +} + +template< int Size, typename Real1, typename Real2 > +const Expressions::BinaryExpressionTemplate< StaticVector< Size, Real1 >, StaticVector< Size, Real2 >, Expressions::Addition > +operator+( const StaticVector< Size, Real1 >& a, const StaticVector< Size, Real2 >& b ) +{ + return Expressions::BinaryExpressionTemplate< StaticVector< Size, Real1 >, StaticVector< Size, Real2 >, Expressions::Addition >( a, b ); +} + +//// +// Subtraction +template< int Size, typename Real, typename ET > +const Expressions::BinaryExpressionTemplate< StaticVector< Size, Real >, ET, Expressions::Subtraction > +operator-( const StaticVector< Size, Real >& a, const ET& b ) +{ + return Expressions::BinaryExpressionTemplate< StaticVector< Size, Real >, ET, Expressions::Subtraction >( a, b ); +} + +template< typename ET, int Size, typename Real > +const Expressions::BinaryExpressionTemplate< ET, StaticVector< Size, Real >, Expressions::Subtraction > +operator-( const ET& a, const StaticVector< Size, Real >& b ) +{ + return Expressions::BinaryExpressionTemplate< ET, StaticVector< Size, Real >, Expressions::Subtraction >( a, b ); +} + +template< int Size, typename Real1, typename Real2 > +const Expressions::BinaryExpressionTemplate< StaticVector< Size, Real1 >, StaticVector< Size, Real2 >, Expressions::Subtraction > +operator-( const StaticVector< Size, Real1 >& a, const StaticVector< Size, Real2 >& b ) +{ + return Expressions::BinaryExpressionTemplate< StaticVector< Size, Real1 >, StaticVector< Size, Real2 >, Expressions::Subtraction >( a, b ); +} + +//// +// Multiplication +template< int Size, typename Real, typename ET > +const Expressions::BinaryExpressionTemplate< StaticVector< Size, Real >, ET, Expressions::Multiplication > +operator*( const StaticVector< Size, Real >& a, const ET& b ) +{ + return Expressions::BinaryExpressionTemplate< StaticVector< Size, Real >, ET, Expressions::Multiplication >( a, b ); +} + +template< typename ET, int Size, typename Real > +const Expressions::BinaryExpressionTemplate< ET, StaticVector< Size, Real >, Expressions::Multiplication > +operator*( const ET& a, const StaticVector< Size, Real >& b ) +{ + return Expressions::BinaryExpressionTemplate< ET, StaticVector< Size, Real >, Expressions::Multiplication >( a, b ); +} + +template< int Size, typename Real1, typename Real2 > +const Expressions::BinaryExpressionTemplate< StaticVector< Size, Real1 >, StaticVector< Size, Real2 >, Expressions::Multiplication > +operator*( const StaticVector< Size, Real1 >& a, const StaticVector< Size, Real2 >& b ) +{ + return Expressions::BinaryExpressionTemplate< StaticVector< Size, Real1 >, StaticVector< Size, Real2 >, Expressions::Multiplication >( a, b ); +} + +//// +// Division +template< int Size, typename Real, typename ET > +const Expressions::BinaryExpressionTemplate< StaticVector< Size, Real >, ET, Expressions::Division > +operator/( const StaticVector< Size, Real >& a, const ET& b ) +{ + return Expressions::BinaryExpressionTemplate< StaticVector< Size, Real >, ET, Expressions::Division >( a, b ); +} + +template< typename ET, int Size, typename Real > +const Expressions::BinaryExpressionTemplate< ET, StaticVector< Size, Real >, Expressions::Division > +operator/( const ET& a, const StaticVector< Size, Real >& b ) +{ + return Expressions::BinaryExpressionTemplate< ET, StaticVector< Size, Real >, Expressions::Division >( a, b ); +} + +template< int Size, typename Real1, typename Real2 > +const Expressions::BinaryExpressionTemplate< StaticVector< Size, Real1 >, StaticVector< Size, Real2 >, Expressions::Division > +operator/( const StaticVector< Size, Real1 >& a, const StaticVector< Size, Real2 >& b ) +{ + return Expressions::BinaryExpressionTemplate< StaticVector< Size, Real1 >, StaticVector< Size, Real2 >, Expressions::Division >( a, b ); +} + +//// +// Comparison operations - operator == +template< int Size, typename Real, typename ET > +bool operator==( const StaticVector< Size, Real >& a, const ET& b ) +{ + return Expressions::StaticComparisonEQ( a, b ); +} + +template< typename ET, int Size, typename Real > +bool operator==( const ET& a, const StaticVector< Size, Real >& b ) +{ + return Expressions::StaticComparisonEQ( a, b ); +} + +template< int Size, typename Real1, typename Real2 > +bool operator==( const StaticVector< Size, Real1 >& a, const StaticVector< Size, Real2 >& b ) +{ + return Expressions::StaticComparisonEQ( a, b ); +} + +//// +// Comparison operations - operator != +template< int Size, typename Real, typename ET > +bool operator!=( const StaticVector< Size, Real >& a, const ET& b ) +{ + return Expressions::StaticComparisonNE( a, b ); +} + +template< typename ET, int Size, typename Real > +bool operator!=( const ET& a, const StaticVector< Size, Real >& b ) +{ + return Expressions::StaticComparisonNE( a, b ); +} + +template< int Size, typename Real1, typename Real2 > +bool operator!=( const StaticVector< Size, Real1 >& a, const StaticVector< Size, Real2 >& b ) +{ + return Expressions::StaticComparisonNE( a, b ); +} + +//// +// Comparison operations - operator < +template< int Size, typename Real, typename ET > +bool operator<( const StaticVector< Size, Real >& a, const ET& b ) +{ + return Expressions::StaticComparisonLT( a, b ); +} + +template< typename ET, int Size, typename Real > +bool operator<( const ET& a, const StaticVector< Size, Real >& b ) +{ + return Expressions::StaticComparisonLT( a, b ); +} + +template< int Size, typename Real1, typename Real2 > +bool operator<( const StaticVector< Size, Real1 >& a, const StaticVector< Size, Real2 >& b ) +{ + return Expressions::StaticComparisonLT( a, b ); +} + +//// +// Comparison operations - operator <= +template< int Size, typename Real, typename ET > +bool operator<=( const StaticVector< Size, Real >& a, const ET& b ) +{ + return Expressions::StaticComparisonLE( a, b ); +} + +template< typename ET, int Size, typename Real > +bool operator<=( const ET& a, const StaticVector< Size, Real >& b ) +{ + return Expressions::StaticComparisonLE( a, b ); +} + +template< int Size, typename Real1, typename Real2 > +bool operator<=( const StaticVector< Size, Real1 >& a, const StaticVector< Size, Real2 >& b ) +{ + return Expressions::StaticComparisonLE( a, b ); +} + +//// +// Comparison operations - operator > +template< int Size, typename Real, typename ET > +bool operator>( const StaticVector< Size, Real >& a, const ET& b ) +{ + return Expressions::StaticComparisonGT( a, b ); +} + +template< typename ET, int Size, typename Real > +bool operator>( const ET& a, const StaticVector< Size, Real >& b ) +{ + return Expressions::StaticComparisonGT( a, b ); +} + +template< int Size, typename Real1, typename Real2 > +bool operator>( const StaticVector< Size, Real1 >& a, const StaticVector< Size, Real2 >& b ) +{ + return Expressions::StaticComparisonGT( a, b ); +} + +//// +// Comparison operations - operator >= +template< int Size, typename Real, typename ET > +bool operator>=( const StaticVector< Size, Real >& a, const ET& b ) +{ + return Expressions::StaticComparisonGE( a, b ); +} + +template< typename ET, int Size, typename Real > +bool operator>=( const ET& a, const StaticVector< Size, Real >& b ) +{ + return Expressions::StaticComparisonGE( a, b ); +} + +template< int Size, typename Real1, typename Real2 > +bool operator>=( const StaticVector< Size, Real1 >& a, const StaticVector< Size, Real2 >& b ) +{ + return Expressions::StaticComparisonGE( a, b ); +} + +//// +// TODO: Replace this with multiplication when its safe +template< int Size, typename Real, typename ET > +StaticVector< Size, Real > +Scale( const StaticVector< Size, Real >& a, const ET& b ) +{ + StaticVector< Size, Real > result = Expressions::BinaryExpressionTemplate< StaticVector< Size, Real >, ET, Expressions::Multiplication >( a, b ); + return result; +} + +template< typename ET, int Size, typename Real > +Expressions::BinaryExpressionTemplate< ET, StaticVector< Size, Real >, Expressions::Multiplication > +Scale( const ET& a, const StaticVector< Size, Real >& b ) +{ + StaticVector< Size, Real > result = Expressions::BinaryExpressionTemplate< ET, StaticVector< Size, Real >, Expressions::Multiplication >( a, b ); + return result; +} + +template< int Size, typename Real1, typename Real2 > +Expressions::BinaryExpressionTemplate< StaticVector< Size, Real1 >, StaticVector< Size, Real2 >, Expressions::Multiplication > +Scale( const StaticVector< Size, Real1 >& a, const StaticVector< Size, Real2 >& b ) +{ + StaticVector< Size, Real1 > result = Expressions::BinaryExpressionTemplate< StaticVector< Size, Real1 >, StaticVector< Size, Real2 >, Expressions::Multiplication >( a, b ); + return result; +} + + + } //namespace Containers +} // namespace TNL diff --git a/src/TNL/Containers/StaticVector_impl.h b/src/TNL/Containers/StaticVector_impl.h index 177573288..f3c19d92f 100644 --- a/src/TNL/Containers/StaticVector_impl.h +++ b/src/TNL/Containers/StaticVector_impl.h @@ -11,7 +11,7 @@ #pragma once #include -#include +#include #include namespace TNL { @@ -51,6 +51,15 @@ StaticVector< Size, Real >::StaticVector( const std::initializer_list< Real > &e { } +template< int Size, typename Real > + template< typename T1, + typename T2, + template< typename, typename > class Operation > +StaticVector< Size, Real >::StaticVector( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& op ) +{ + Algorithms::VectorAssignment< StaticVector< Size, Real >, Expressions::BinaryExpressionTemplate< T1, T2, Operation > >::assign( *this, op ); +}; + template< int Size, typename Real > bool StaticVector< Size, Real >::setup( const Config::ParameterContainer& parameters, @@ -73,11 +82,11 @@ String StaticVector< Size, Real >::getType() } template< int Size, typename Real > - template< typename StaticVector_ > + template< typename RHS > StaticVector< Size, Real >& -StaticVector< Size, Real >::operator =( const StaticVector_& v ) +StaticVector< Size, Real >::operator =( const RHS& rhs ) { - Algorithms::VectorAssignment< StaticVector< Size, Real >, StaticVector_ >::assign( *this, v ); + Algorithms::VectorAssignment< StaticVector< Size, Real >, RHS >::assign( *this, rhs ); return *this; } diff --git a/src/TNL/Containers/Vector.h b/src/TNL/Containers/Vector.h index f9e1c68b3..359066a5a 100644 --- a/src/TNL/Containers/Vector.h +++ b/src/TNL/Containers/Vector.h @@ -41,10 +41,90 @@ public: using ViewType = VectorView< Real, Device, Index >; using ConstViewType = VectorView< std::add_const_t< Real >, Device, Index >; - /** Constructors and assignment operators are inherited from the class \ref Array. */ - using Array< Real, Device, Index >::Array; + /** Assignment operators are inherited from the class \ref Array. */ using Array< Real, Device, Index >::operator=; + /** + * \brief Basic constructor. + * + * Constructs an empty vector with zero size. + */ + Vector(); + + /** + * \brief Constructor with vector size. + * + * \param size is number of vector elements. + */ + Vector( const IndexType& size ); + + /** + * \brief Constructor with data pointer and size. + * + * In this case, the Vector just encapsulates the pointer \e data. No + * deallocation is done in destructor. + * + * This behavior of the Vector is deprecated and \ref VectorView should be used + * instead. + * + * \param data Pointer to data. + * \param size Number of vector elements. + */ + Vector( Real* data, + const IndexType& size ); + + /** + * \brief Copy constructor. + * + * \param vector is an vector to be copied. + */ + explicit Vector( const Vector& vector ); + + /** + * \brief Bind constructor . + * + * The constructor does not make a deep copy, but binds to the supplied vector. + * This is also deprecated, \ref VectorView should be used instead. + * + * \param vector is an vector that is to be bound. + * \param begin is the first index which should be bound. + * \param size is number of array elements that should be bound. + */ + Vector( Vector& vector, + const IndexType& begin = 0, + const IndexType& size = 0 ); + + /** + * \brief Move constructor. + * + * @param vector is an vector to be moved + */ + Vector( Vector&& vector ); + + /** + * \brief Initialize the vector from initializer list, i.e. { ... } + * + * @param list Initializer list. + */ + template< typename InReal > + Vector( const std::initializer_list< InReal >& list ); + + /** + * \brief Initialize the vector from std::list. + * + * @param list Input STL list. + */ + template< typename InReal > + Vector( const std::list< InReal >& list ); + + /** + * \brief Initialize the vector from std::vector. + * + * @param vector Input STL vector. + */ + template< typename InReal > + Vector( const std::vector< InReal >& vector ); + /** \brief Returns type of vector Real value, Device type and the type of Index. */ static String getType(); diff --git a/src/TNL/Containers/Vector.hpp b/src/TNL/Containers/Vector.hpp index 8dea03963..764d24fb1 100644 --- a/src/TNL/Containers/Vector.hpp +++ b/src/TNL/Containers/Vector.hpp @@ -16,6 +16,92 @@ namespace TNL { namespace Containers { +template< typename Real, + typename Device, + typename Index > +Vector< Real, Device, Index >:: +Vector() +{ +} + +template< typename Real, + typename Device, + typename Index > +Vector< Real, Device, Index >:: +Vector( const IndexType& size ) +: Array< Real, Device, Index >( size ) +{ +} + +template< typename Real, + typename Device, + typename Index > +Vector< Real, Device, Index >:: +Vector( Real* data, + const IndexType& size ) +: Array< Real, Device, Index >( data, size ) +{ +} + +template< typename Real, + typename Device, + typename Index > +Vector< Real, Device, Index >:: +Vector( const Vector< Real, Device, Index >& vector ) +: Array< Real, Device, Index >( vector ) +{ +} + +template< typename Real, + typename Device, + typename Index > +Vector< Real, Device, Index >:: +Vector( Vector< Real, Device, Index >& vector, + const IndexType& begin, + const IndexType& size ) +: Array< Real, Device, Index >( vector, begin, size ) +{ +} + +template< typename Real, + typename Device, + typename Index > +Vector< Real, Device, Index >:: +Vector( Vector< Real, Device, Index >&& vector ) +: Array< Real, Device, Index >( std::move( vector ) ) +{ +} + +template< typename Real, + typename Device, + typename Index > + template< typename InReal > +Vector< Real, Device, Index >:: +Vector( const std::initializer_list< InReal >& list ) +: Array< Real, Device, Index >( list ) +{ +} + +template< typename Real, + typename Device, + typename Index > + template< typename InReal > +Vector< Real, Device, Index >:: +Vector( const std::list< InReal >& list ) +: Array< Real, Device, Index >( list ) +{ +} + +template< typename Real, + typename Device, + typename Index > + template< typename InReal > +Vector< Real, Device, Index >:: +Vector( const std::vector< InReal >& vector ) +: Array< Real, Device, Index >( vector ) +{ +} + template< typename Real, typename Device, typename Index > @@ -59,19 +145,19 @@ getConstView() const return ConstViewType( this->getData(), this->getSize() ); } -template< typename Value, +template< typename Real, typename Device, typename Index > -Vector< Value, Device, Index >:: +Vector< Real, Device, Index >:: operator ViewType() { return getView(); } -template< typename Value, +template< typename Real, typename Device, typename Index > -Vector< Value, Device, Index >:: +Vector< Real, Device, Index >:: operator ConstViewType() const { return getConstView(); diff --git a/src/TNL/Experimental/ExpressionTemplates/StaticVectorExpressions.h b/src/TNL/Experimental/ExpressionTemplates/StaticVectorExpressions.h index fbc5cdfc3..7dcf11e25 100644 --- a/src/TNL/Experimental/ExpressionTemplates/StaticVectorExpressions.h +++ b/src/TNL/Experimental/ExpressionTemplates/StaticVectorExpressions.h @@ -13,8 +13,8 @@ #pragma once #include -#include -#include +#include +#include namespace TNL { diff --git a/src/TNL/Meshes/DistributedMeshes/DistributedGrid.hpp b/src/TNL/Meshes/DistributedMeshes/DistributedGrid.hpp index a91b8a585..8d96c4e55 100644 --- a/src/TNL/Meshes/DistributedMeshes/DistributedGrid.hpp +++ b/src/TNL/Meshes/DistributedMeshes/DistributedGrid.hpp @@ -379,7 +379,7 @@ setupNeighbors() for( int i = 0; i < getNeighborsCount(); i++ ) { auto direction = Directions::template getXYZ< Dimension >( i ); - auto coordinates = this->subdomainCoordinates+direction; + CoordinatesType coordinates = this->subdomainCoordinates+direction; if( this->isThereNeighbor( direction ) ) this->neighbors[ i ] = this->getRankOfProcCoord( coordinates ); else diff --git a/src/UnitTests/Containers/VectorTest.h b/src/UnitTests/Containers/VectorTest.h index c223a7afd..0e704f12e 100644 --- a/src/UnitTests/Containers/VectorTest.h +++ b/src/UnitTests/Containers/VectorTest.h @@ -145,6 +145,59 @@ using VectorTypes = ::testing::Types< TYPED_TEST_SUITE( VectorTest, VectorTypes ); +TYPED_TEST( VectorTest, constructors ) +{ + using VectorType = typename TestFixture::VectorType; + + VectorType u; + EXPECT_EQ( u.getSize(), 0 ); + + VectorType v( 10 ); + EXPECT_EQ( v.getSize(), 10 ); + + if( std::is_same< typename VectorType::DeviceType, Devices::Host >::value ) { + typename VectorType::ValueType data[ 10 ]; + VectorType w( data, 10 ); + EXPECT_EQ( w.getData(), data ); + + VectorType z1( w ); + //EXPECT_EQ( z1.getData(), data ); + EXPECT_EQ( z1.getSize(), 10 ); + + VectorType z2( w, 1 ); + EXPECT_EQ( z2.getData(), data + 1 ); + EXPECT_EQ( z2.getSize(), 9 ); + + VectorType z3( w, 2, 3 ); + EXPECT_EQ( z3.getData(), data + 2 ); + EXPECT_EQ( z3.getSize(), 3 ); + } + + VectorType w( v ); + EXPECT_EQ( w.getSize(), v.getSize() ); + for( int i = 0; i < 10; i++ ) + EXPECT_EQ( v.getElement( i ), w.getElement( i ) ); + v.reset(); + EXPECT_EQ( w.getSize(), 10 ); + + VectorType a1 { 1, 2, 3 }; + EXPECT_EQ( a1.getElement( 0 ), 1 ); + EXPECT_EQ( a1.getElement( 1 ), 2 ); + EXPECT_EQ( a1.getElement( 2 ), 3 ); + + std::list< int > l = { 4, 5, 6 }; + VectorType a2( l ); + EXPECT_EQ( a2.getElement( 0 ), 4 ); + EXPECT_EQ( a2.getElement( 1 ), 5 ); + EXPECT_EQ( a2.getElement( 2 ), 6 ); + + std::vector< int > q = { 7, 8, 9 }; + + VectorType a3( q ); + EXPECT_EQ( a3.getElement( 0 ), 7 ); + EXPECT_EQ( a3.getElement( 1 ), 8 ); + EXPECT_EQ( a3.getElement( 2 ), 9 ); +} TYPED_TEST( VectorTest, max ) { diff --git a/src/UnitTests/ExpressionTemplatesStaticTest.cpp b/src/UnitTests/ExpressionTemplatesStaticTest.cpp index 55baf4eca..35088d051 100644 --- a/src/UnitTests/ExpressionTemplatesStaticTest.cpp +++ b/src/UnitTests/ExpressionTemplatesStaticTest.cpp @@ -20,16 +20,33 @@ using namespace TNL; using namespace TNL::Containers; #ifdef HAVE_GTEST +TEST( ExpressionTemplatesStaticTest, TypeTraitsTest ) +{ + using VectorType = StaticVector< 6, double >; + VectorType sv1{ 1, 1.5, 9, 54, 300.4, 6 }; + VectorType sv2{ 1.5, 1.5, 50, 30.4, 8, 600 }; + VectorType svr1; + + using Type1 = decltype( sv1 + 1 ); + using Type2 = decltype( sv1 + sv2 ); + static_assert( Expressions::ExpressionVariableTypeGetter< int >::value == Expressions::ArithmeticVariable ); + static_assert( Expressions::ExpressionVariableTypeGetter< VectorType >::value == Expressions::VectorVariable ); + static_assert( Expressions::IsExpressionTemplate< Type1 >::value == true ); + static_assert( Expressions::IsExpressionTemplate< Type2 >::value == true ); +} + TEST( ExpressionTemplatesStaticTest, Addition ) { - StaticVector< 6, double > sv1{ 1, 1.5, 9, 54, 300.4, 6 }; - StaticVector< 6, double > sv2{ 1.5, 1.5, 50, 30.4, 8, 600 }; - StaticVector< 6, double > svr1{}; + using VectorType = StaticVector< 6, double >; + VectorType sv1{ 1, 1.5, 9, 54, 300.4, 6 }; + VectorType sv2{ 1.5, 1.5, 50, 30.4, 8, 600 }; + VectorType svr1; + svr1 = sv1 + sv2 + sv2 + sv1; for( int i = 0; i < 6; i++){ EXPECT_EQ( svr1[ i ], sv1[ i ] + sv2[ i ] + sv2[ i ] + sv1[ i ] ); } - svr1 = sv1 + 2; + svr1 = sv1 + ( double ) 2; for( int i = 0; i < 6; i++){ EXPECT_EQ( svr1[ i ], sv1[ i ] + 2 ); } @@ -37,6 +54,9 @@ TEST( ExpressionTemplatesStaticTest, Addition ) for( int i = 0; i < 6; i++){ EXPECT_EQ( svr1[ i ], sv1[ i ] + 2 ); } + + svr1 = sv1 + sv2 + ( double ) 1; + svr1 = sv1 + sv2 - ( double ) 1; } TEST( ExpressionTemplatesStaticTest, Subtraction ) @@ -193,6 +213,7 @@ TEST( ExpressionTemplatesStaticTest, ArcTangent ) #include "GtestMissingError.h" int main( int argc, char* argv[] ) { + //Test(); #ifdef HAVE_GTEST ::testing::InitGoogleTest( &argc, argv ); return RUN_ALL_TESTS(); -- GitLab From 04aee969170805eea8ec8c8a4d6dd2281d213842 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Sat, 20 Apr 2019 23:05:39 +0200 Subject: [PATCH 06/93] [WIP] Implementing expression templates for static vectors. --- src/TNL/Containers/StaticVector.h | 43 ++++++++++++++++++- src/TNL/Containers/Vector.h | 5 ++- src/TNL/Containers/Vector.hpp | 13 +++++- src/TNL/Functions/MeshFunction.h | 2 + src/TNL/Functions/MeshFunction_impl.h | 12 ++++++ src/UnitTests/Containers/StaticVectorTest.cpp | 2 +- .../ExpressionTemplatesStaticTest.cpp | 8 ++-- 7 files changed, 75 insertions(+), 10 deletions(-) diff --git a/src/TNL/Containers/StaticVector.h b/src/TNL/Containers/StaticVector.h index f754fe91f..d8adac6bf 100644 --- a/src/TNL/Containers/StaticVector.h +++ b/src/TNL/Containers/StaticVector.h @@ -677,6 +677,39 @@ namespace TNL { namespace Containers { // TODO: move to some other source file +template< int Size, typename Real1, typename Real2 > +struct StaticScalarProductGetter +{ + static auto compute( const Real1* u, const Real2* v ) -> decltype( u[ 0 ] * v[ 0 ] ) + { + return u[ 0 ] * v[ 0 ] + StaticScalarProductGetter< Size - 1, Real1, Real2 >::compute( &u[ 1 ], &v[ 1 ] ); + } +}; + +template< typename Real1, typename Real2 > +struct StaticScalarProductGetter< 0, Real1, Real2 > +{ + static auto compute( const Real1* u, const Real2* v ) -> decltype( u[ 0 ] * v[ 0 ] ) + { + return u[ 0 ] * v[ 0 ]; + } +}; + +template< int Size, typename Real1, typename Real2 > +auto ScalarProduct( const StaticVector< Size, Real1 >& u, + const StaticVector< Size, Real2 >& v ) -> decltype( u[ 0 ] * v[ 0 ] ) +{ + return StaticScalarProductGetter< Size, Real1, Real2 >::compute( u.getData(), v.getData() ); +} + +template< int Size, typename Real1, typename Real2 > +auto operator,( const StaticVector< Size, Real1 >& u, + const StaticVector< Size, Real2 >& v ) -> decltype( u[ 0 ] * v[ 0 ] ) +{ + return StaticScalarProductGetter< Size, Real1, Real2 >::compute( u.getData(), v.getData() ); +} + + template< typename Real > StaticVector< 3, Real > VectorProduct( const StaticVector< 3, Real >& u, const StaticVector< 3, Real >& v ) @@ -688,6 +721,14 @@ StaticVector< 3, Real > VectorProduct( const StaticVector< 3, Real >& u, return p; } +/*template< typename Real > +Real ScalarProduct( const StaticVector< 1, Real >& u, + const StaticVector< 1, Real >& v ) +{ + return u[ 0 ] * v[ 0 ]; +} + + template< typename Real > Real ScalarProduct( const StaticVector< 2, Real >& u, const StaticVector< 2, Real >& v ) @@ -700,7 +741,7 @@ Real ScalarProduct( const StaticVector< 3, Real >& u, const StaticVector< 3, Real >& v ) { return u[ 0 ] * v[ 0 ] + u[ 1 ] * v[ 1 ] + u[ 2 ] * v[ 2 ]; -} +}*/ template< typename T1, typename T2> diff --git a/src/TNL/Containers/Vector.h b/src/TNL/Containers/Vector.h index 359066a5a..30f959452 100644 --- a/src/TNL/Containers/Vector.h +++ b/src/TNL/Containers/Vector.h @@ -106,8 +106,7 @@ public: * * @param list Initializer list. */ - template< typename InReal > - Vector( const std::initializer_list< InReal >& list ); + Vector( const std::initializer_list< Real >& list ); /** * \brief Initialize the vector from std::list. @@ -170,6 +169,8 @@ public: const RealType& value, const Scalar thisElementMultiplicator ); + Vector& operator=( const Vector& v ); + /** * \brief This function subtracts \e vector from this vector and returns the resulting vector. * diff --git a/src/TNL/Containers/Vector.hpp b/src/TNL/Containers/Vector.hpp index 764d24fb1..f39d537c1 100644 --- a/src/TNL/Containers/Vector.hpp +++ b/src/TNL/Containers/Vector.hpp @@ -75,9 +75,8 @@ Vector( Vector< Real, Device, Index >&& vector ) template< typename Real, typename Device, typename Index > - template< typename InReal > Vector< Real, Device, Index >:: -Vector( const std::initializer_list< InReal >& list ) +Vector( const std::initializer_list< Real >& list ) : Array< Real, Device, Index >( list ) { } @@ -187,6 +186,16 @@ addElement( const IndexType i, Algorithms::VectorOperations< Device >::addElement( *this, i, value, thisElementMultiplicator ); } +template< typename Real, + typename Device, + typename Index > +Vector< Real, Device, Index >& +Vector< Real, Device, Index >::operator=( const Vector< Real, Device, Index >& v ) +{ + Array< Real, Device, Index >::operator = ( v ); + return *this; +} + template< typename Real, typename Device, typename Index > diff --git a/src/TNL/Functions/MeshFunction.h b/src/TNL/Functions/MeshFunction.h index c9eb66ac5..c7cf168d9 100644 --- a/src/TNL/Functions/MeshFunction.h +++ b/src/TNL/Functions/MeshFunction.h @@ -133,6 +133,8 @@ class MeshFunction : __cuda_callable__ const RealType& operator[]( const IndexType& meshEntityIndex ) const; + ThisType& operator = ( const ThisType& f ); + template< typename Function > MeshFunction& operator = ( const Function& f ); diff --git a/src/TNL/Functions/MeshFunction_impl.h b/src/TNL/Functions/MeshFunction_impl.h index 0ac98b7b2..8f12a2b1c 100644 --- a/src/TNL/Functions/MeshFunction_impl.h +++ b/src/TNL/Functions/MeshFunction_impl.h @@ -400,6 +400,18 @@ operator[]( const IndexType& meshEntityIndex ) const return this->data[ meshEntityIndex ]; } +template< typename Mesh, + int MeshEntityDimension, + typename Real > +MeshFunction< Mesh, MeshEntityDimension, Real >& +MeshFunction< Mesh, MeshEntityDimension, Real >:: +operator = ( const ThisType& f ) +{ + this->setMesh( f.getMeshPointer() ); + this->getData() = f.getData(); + return *this; +} + template< typename Mesh, int MeshEntityDimension, typename Real > diff --git a/src/UnitTests/Containers/StaticVectorTest.cpp b/src/UnitTests/Containers/StaticVectorTest.cpp index 3f39c5045..d86b7644a 100644 --- a/src/UnitTests/Containers/StaticVectorTest.cpp +++ b/src/UnitTests/Containers/StaticVectorTest.cpp @@ -120,7 +120,7 @@ TYPED_TEST( StaticVectorTest, operators ) EXPECT_EQ( u3[ 0 ], 4 ); EXPECT_EQ( u3[ size - 1 ], 4 ); - EXPECT_EQ( u1 * u2, 4 * size ); + EXPECT_EQ( ScalarProduct( u1, u2 ), 4 * size ); } TYPED_TEST( StaticVectorTest, comparisons ) diff --git a/src/UnitTests/ExpressionTemplatesStaticTest.cpp b/src/UnitTests/ExpressionTemplatesStaticTest.cpp index 35088d051..951258974 100644 --- a/src/UnitTests/ExpressionTemplatesStaticTest.cpp +++ b/src/UnitTests/ExpressionTemplatesStaticTest.cpp @@ -29,10 +29,10 @@ TEST( ExpressionTemplatesStaticTest, TypeTraitsTest ) using Type1 = decltype( sv1 + 1 ); using Type2 = decltype( sv1 + sv2 ); - static_assert( Expressions::ExpressionVariableTypeGetter< int >::value == Expressions::ArithmeticVariable ); - static_assert( Expressions::ExpressionVariableTypeGetter< VectorType >::value == Expressions::VectorVariable ); - static_assert( Expressions::IsExpressionTemplate< Type1 >::value == true ); - static_assert( Expressions::IsExpressionTemplate< Type2 >::value == true ); + static_assert( Expressions::ExpressionVariableTypeGetter< int >::value == Expressions::ArithmeticVariable, "" ); + static_assert( Expressions::ExpressionVariableTypeGetter< VectorType >::value == Expressions::VectorVariable, "" ); + static_assert( Expressions::IsExpressionTemplate< Type1 >::value == true, "" ); + static_assert( Expressions::IsExpressionTemplate< Type2 >::value == true, "" ); } TEST( ExpressionTemplatesStaticTest, Addition ) -- GitLab From c9b59e39cf8a95ce38988da5a8fa91b4ca3106dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Mon, 22 Apr 2019 20:42:21 +0200 Subject: [PATCH 07/93] Fixed scalar product of static vectors. --- src/TNL/Containers/StaticVector.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/TNL/Containers/StaticVector.h b/src/TNL/Containers/StaticVector.h index d8adac6bf..8b20830c1 100644 --- a/src/TNL/Containers/StaticVector.h +++ b/src/TNL/Containers/StaticVector.h @@ -687,7 +687,7 @@ struct StaticScalarProductGetter }; template< typename Real1, typename Real2 > -struct StaticScalarProductGetter< 0, Real1, Real2 > +struct StaticScalarProductGetter< 1, Real1, Real2 > { static auto compute( const Real1* u, const Real2* v ) -> decltype( u[ 0 ] * v[ 0 ] ) { -- GitLab From d0d436184f8edb97f71a3ac48880ad7ff110014b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Wed, 24 Apr 2019 06:13:01 +0200 Subject: [PATCH 08/93] Added cuda callables to statice expression templates. --- .../Expressions/BinaryExpressionTemplate.h | 38 ++++++++++++++++--- .../ExpressionTemplatesOperations.h | 4 ++ .../Containers/Expressions/StaticComparison.h | 6 +++ src/TNL/Containers/StaticVector.h | 9 ++++- src/TNL/Containers/StaticVector1D_impl.h | 1 + src/TNL/Containers/StaticVector2D_impl.h | 1 + src/TNL/Containers/StaticVector3D_impl.h | 1 + src/TNL/Containers/StaticVectorExpressions.h | 33 ++++++++++++++++ 8 files changed, 86 insertions(+), 7 deletions(-) diff --git a/src/TNL/Containers/Expressions/BinaryExpressionTemplate.h b/src/TNL/Containers/Expressions/BinaryExpressionTemplate.h index cc5830c79..5a68130bd 100644 --- a/src/TNL/Containers/Expressions/BinaryExpressionTemplate.h +++ b/src/TNL/Containers/Expressions/BinaryExpressionTemplate.h @@ -26,12 +26,6 @@ template< typename T1, ExpressionVariableType T2Type = ExpressionVariableTypeGetter< T2 >::value > struct BinaryExpressionTemplate { - /*BinaryExpressionTemplate( const T1& a, const T2& b ){}; - - static T1 evaluate( const T1& a, const T2& b ) - { - return Operation< T1, T2 >::evaluate( a, b ); - }*/ }; template< typename T1, @@ -42,18 +36,22 @@ struct BinaryExpressionTemplate< T1, T2, Operation, VectorVariable, VectorVariab using RealType = typename T1::RealType; using IsExpressionTemplate = bool; + __cuda_callable__ BinaryExpressionTemplate( const T1& a, const T2& b ): op1( a ), op2( b ){} + __cuda_callable__ static BinaryExpressionTemplate evaluate( const T1& a, const T2& b ) { return BinaryExpressionTemplate( a, b ); } + __cuda_callable__ RealType operator[]( const int i ) const { return Operation< typename T1::RealType, typename T2::RealType >::evaluate( op1[ i ], op2[ i ] ); } + __cuda_callable__ int getSize() const { return op1.getSize(); @@ -73,18 +71,22 @@ struct BinaryExpressionTemplate< T1, T2, Operation, VectorVariable, ArithmeticVa using RealType = typename T1::RealType; using IsExpressionTemplate = bool; + __cuda_callable__ BinaryExpressionTemplate( const T1& a, const T2& b ): op1( a ), op2( b ){} + __cuda_callable__ BinaryExpressionTemplate evaluate( const T1& a, const T2& b ) { return BinaryExpressionTemplate( a, b ); } + __cuda_callable__ RealType operator[]( const int i ) const { return Operation< typename T1::RealType, T2 >::evaluate( op1[ i ], op2 ); } + __cuda_callable__ int getSize() const { return op1.getSize(); @@ -104,18 +106,22 @@ struct BinaryExpressionTemplate< T1, T2, Operation, ArithmeticVariable, VectorVa using RealType = typename T2::RealType; using IsExpressionTemplate = bool; + __cuda_callable__ BinaryExpressionTemplate( const T1& a, const T2& b ): op1( a ), op2( b ){} + __cuda_callable__ BinaryExpressionTemplate evaluate( const T1& a, const T2& b ) { return BinaryExpressionTemplate( a, b ); } + __cuda_callable__ RealType operator[]( const int i ) const { return Operation< T1, typename T2::RealType >::evaluate( op1, op2[ i ] ); } + __cuda_callable__ int getSize() const { return op2.getSize(); @@ -134,6 +140,7 @@ template< typename L1, typename R1, typename R2, template< typename, typename > class ROperation > +__cuda_callable__ const Expressions::BinaryExpressionTemplate< Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, @@ -150,6 +157,7 @@ operator + ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& template< typename T1, typename T2, template< typename, typename > class Operation > +__cuda_callable__ const Expressions::BinaryExpressionTemplate< Expressions::BinaryExpressionTemplate< T1, T2, Operation >, typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType, @@ -171,6 +179,7 @@ template< typename L1, typename R1, typename R2, template< typename, typename > class ROperation > +__cuda_callable__ const Expressions::BinaryExpressionTemplate< Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, @@ -187,6 +196,7 @@ operator - ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& template< typename T1, typename T2, template< typename, typename > class Operation > +__cuda_callable__ const Expressions::BinaryExpressionTemplate< Expressions::BinaryExpressionTemplate< T1, T2, Operation >, typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType, @@ -208,6 +218,7 @@ template< typename L1, typename R1, typename R2, template< typename, typename > class ROperation > +__cuda_callable__ const Expressions::BinaryExpressionTemplate< Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, @@ -224,6 +235,7 @@ operator * ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& template< typename T1, typename T2, template< typename, typename > class Operation > +__cuda_callable__ const Expressions::BinaryExpressionTemplate< Expressions::BinaryExpressionTemplate< T1, T2, Operation >, typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType, @@ -245,6 +257,7 @@ template< typename L1, typename R1, typename R2, template< typename, typename > class ROperation > +__cuda_callable__ const Expressions::BinaryExpressionTemplate< Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, @@ -261,6 +274,7 @@ operator / ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& template< typename T1, typename T2, template< typename, typename > class Operation > +__cuda_callable__ const Expressions::BinaryExpressionTemplate< Expressions::BinaryExpressionTemplate< T1, T2, Operation >, typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType, @@ -282,6 +296,7 @@ template< typename L1, typename R1, typename R2, template< typename, typename > class ROperation > +__cuda_callable__ bool operator == ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, const Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) @@ -292,6 +307,7 @@ operator == ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& template< typename T1, typename T2, template< typename, typename > class Operation > +__cuda_callable__ bool operator == ( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, const typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) @@ -307,6 +323,7 @@ template< typename L1, typename R1, typename R2, template< typename, typename > class ROperation > +__cuda_callable__ bool operator != ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, const Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) @@ -317,6 +334,7 @@ operator != ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& template< typename T1, typename T2, template< typename, typename > class Operation > +__cuda_callable__ bool operator != ( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, const typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) @@ -332,6 +350,7 @@ template< typename L1, typename R1, typename R2, template< typename, typename > class ROperation > +__cuda_callable__ bool operator < ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, const Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) @@ -342,6 +361,7 @@ operator < ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& template< typename T1, typename T2, template< typename, typename > class Operation > +__cuda_callable__ bool operator < ( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, const typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) @@ -357,6 +377,7 @@ template< typename L1, typename R1, typename R2, template< typename, typename > class ROperation > +__cuda_callable__ bool operator <= ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, const Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) @@ -367,6 +388,7 @@ operator <= ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& template< typename T1, typename T2, template< typename, typename > class Operation > +__cuda_callable__ bool operator <= ( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, const typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) @@ -382,6 +404,7 @@ template< typename L1, typename R1, typename R2, template< typename, typename > class ROperation > +__cuda_callable__ bool operator > ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, const Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) @@ -392,6 +415,7 @@ operator > ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& template< typename T1, typename T2, template< typename, typename > class Operation > +__cuda_callable__ bool operator > ( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, const typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) @@ -407,6 +431,7 @@ template< typename L1, typename R1, typename R2, template< typename, typename > class ROperation > +__cuda_callable__ bool operator >= ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, const Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) @@ -417,6 +442,7 @@ operator >= ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& template< typename T1, typename T2, template< typename, typename > class Operation > +__cuda_callable__ bool operator >= ( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, const typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) diff --git a/src/TNL/Containers/Expressions/ExpressionTemplatesOperations.h b/src/TNL/Containers/Expressions/ExpressionTemplatesOperations.h index 4e10db348..703a04180 100644 --- a/src/TNL/Containers/Expressions/ExpressionTemplatesOperations.h +++ b/src/TNL/Containers/Expressions/ExpressionTemplatesOperations.h @@ -17,6 +17,7 @@ namespace TNL { template< typename T1, typename T2 > struct Addition { + __cuda_callable__ static auto evaluate( const T1& a, const T2& b ) -> decltype( a + b ) { return a + b; @@ -26,6 +27,7 @@ struct Addition template< typename T1, typename T2 > struct Subtraction { + __cuda_callable__ static auto evaluate( const T1& a, const T2& b ) -> decltype( a - b ) { return a - b; @@ -35,6 +37,7 @@ struct Subtraction template< typename T1, typename T2 > struct Multiplication { + __cuda_callable__ static auto evaluate( const T1& a, const T2& b ) -> decltype( a * b ) { return a * b; @@ -44,6 +47,7 @@ struct Multiplication template< typename T1, typename T2 > struct Division { + __cuda_callable__ static auto evaluate( const T1& a, const T2& b ) -> decltype( a / b ) { return a / b; diff --git a/src/TNL/Containers/Expressions/StaticComparison.h b/src/TNL/Containers/Expressions/StaticComparison.h index c427a95b9..3f254fbd0 100644 --- a/src/TNL/Containers/Expressions/StaticComparison.h +++ b/src/TNL/Containers/Expressions/StaticComparison.h @@ -18,6 +18,7 @@ namespace TNL { template< typename T1, typename T2 > +__cuda_callable__ bool StaticComparisonEQ( const T1& a, const T2& b ) { TNL_ASSERT_EQ( a.getSize(), b.getSize(), "Sizes of expressions to be compared do not fit." ); @@ -29,6 +30,7 @@ bool StaticComparisonEQ( const T1& a, const T2& b ) template< typename T1, typename T2 > +__cuda_callable__ bool StaticComparisonNE( const T1& a, const T2& b ) { return ! StaticComparisonEQ( a, b ); @@ -36,6 +38,7 @@ bool StaticComparisonNE( const T1& a, const T2& b ) template< typename T1, typename T2 > +__cuda_callable__ bool StaticComparisonGT( const T1& a, const T2& b ) { TNL_ASSERT_EQ( a.getSize(), b.getSize(), "Sizes of expressions to be compared do not fit." ); @@ -47,6 +50,7 @@ bool StaticComparisonGT( const T1& a, const T2& b ) template< typename T1, typename T2 > +__cuda_callable__ bool StaticComparisonLE( const T1& a, const T2& b ) { return ! StaticComparisonGT( a, b ); @@ -54,6 +58,7 @@ bool StaticComparisonLE( const T1& a, const T2& b ) template< typename T1, typename T2 > +__cuda_callable__ bool StaticComparisonLT( const T1& a, const T2& b ) { TNL_ASSERT_EQ( a.getSize(), b.getSize(), "Sizes of expressions to be compared do not fit." ); @@ -65,6 +70,7 @@ bool StaticComparisonLT( const T1& a, const T2& b ) template< typename T1, typename T2 > +__cuda_callable__ bool StaticComparisonGE( const T1& a, const T2& b ) { return ! StaticComparisonLT( a, b ); diff --git a/src/TNL/Containers/StaticVector.h b/src/TNL/Containers/StaticVector.h index 8b20830c1..47809ca41 100644 --- a/src/TNL/Containers/StaticVector.h +++ b/src/TNL/Containers/StaticVector.h @@ -76,6 +76,7 @@ class StaticVector : public StaticArray< Size, Real > template< typename T1, typename T2, template< typename, typename > class Operation > + __cuda_callable__ StaticVector( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& op ); /** @@ -290,6 +291,7 @@ class StaticVector< 1, Real > : public StaticArray< 1, Real > template< typename T1, typename T2, template< typename, typename > class Operation > + __cuda_callable__ StaticVector( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& op ); bool setup( const Config::ParameterContainer& parameters, @@ -426,6 +428,7 @@ class StaticVector< 2, Real > : public StaticArray< 2, Real > template< typename T1, typename T2, template< typename, typename > class Operation > + __cuda_callable__ StaticVector( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& op ); bool setup( const Config::ParameterContainer& parameters, @@ -563,9 +566,9 @@ class StaticVector< 3, Real > : public StaticArray< 3, Real > template< typename T1, typename T2, template< typename, typename > class Operation > + __cuda_callable__ StaticVector( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& op ); - bool setup( const Config::ParameterContainer& parameters, const String& prefix = "" ); @@ -680,6 +683,7 @@ namespace Containers { template< int Size, typename Real1, typename Real2 > struct StaticScalarProductGetter { + __cuda_callable__ static auto compute( const Real1* u, const Real2* v ) -> decltype( u[ 0 ] * v[ 0 ] ) { return u[ 0 ] * v[ 0 ] + StaticScalarProductGetter< Size - 1, Real1, Real2 >::compute( &u[ 1 ], &v[ 1 ] ); @@ -689,6 +693,7 @@ struct StaticScalarProductGetter template< typename Real1, typename Real2 > struct StaticScalarProductGetter< 1, Real1, Real2 > { + __cuda_callable__ static auto compute( const Real1* u, const Real2* v ) -> decltype( u[ 0 ] * v[ 0 ] ) { return u[ 0 ] * v[ 0 ]; @@ -696,6 +701,7 @@ struct StaticScalarProductGetter< 1, Real1, Real2 > }; template< int Size, typename Real1, typename Real2 > +__cuda_callable__ auto ScalarProduct( const StaticVector< Size, Real1 >& u, const StaticVector< Size, Real2 >& v ) -> decltype( u[ 0 ] * v[ 0 ] ) { @@ -703,6 +709,7 @@ auto ScalarProduct( const StaticVector< Size, Real1 >& u, } template< int Size, typename Real1, typename Real2 > +__cuda_callable__ auto operator,( const StaticVector< Size, Real1 >& u, const StaticVector< Size, Real2 >& v ) -> decltype( u[ 0 ] * v[ 0 ] ) { diff --git a/src/TNL/Containers/StaticVector1D_impl.h b/src/TNL/Containers/StaticVector1D_impl.h index e4e3d8efd..5248d821d 100644 --- a/src/TNL/Containers/StaticVector1D_impl.h +++ b/src/TNL/Containers/StaticVector1D_impl.h @@ -55,6 +55,7 @@ template< typename Real > template< typename T1, typename T2, template< typename, typename > class Operation > +__cuda_callable__ StaticVector< 1, Real >::StaticVector( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& op ) { Algorithms::VectorAssignment< StaticVector< 1, Real >, Expressions::BinaryExpressionTemplate< T1, T2, Operation > >::assign( *this, op ); diff --git a/src/TNL/Containers/StaticVector2D_impl.h b/src/TNL/Containers/StaticVector2D_impl.h index 5f8697bf5..bacc94ee6 100644 --- a/src/TNL/Containers/StaticVector2D_impl.h +++ b/src/TNL/Containers/StaticVector2D_impl.h @@ -61,6 +61,7 @@ template< typename Real > template< typename T1, typename T2, template< typename, typename > class Operation > +__cuda_callable__ StaticVector< 2, Real >::StaticVector( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& op ) { Algorithms::VectorAssignment< StaticVector< 2, Real >, Expressions::BinaryExpressionTemplate< T1, T2, Operation > >::assign( *this, op ); diff --git a/src/TNL/Containers/StaticVector3D_impl.h b/src/TNL/Containers/StaticVector3D_impl.h index 5bb78d98b..8b796c54a 100644 --- a/src/TNL/Containers/StaticVector3D_impl.h +++ b/src/TNL/Containers/StaticVector3D_impl.h @@ -61,6 +61,7 @@ template< typename Real > template< typename T1, typename T2, template< typename, typename > class Operation > +__cuda_callable__ StaticVector< 3, Real >::StaticVector( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& op ) { Algorithms::VectorAssignment< StaticVector< 3, Real >, Expressions::BinaryExpressionTemplate< T1, T2, Operation > >::assign( *this, op ); diff --git a/src/TNL/Containers/StaticVectorExpressions.h b/src/TNL/Containers/StaticVectorExpressions.h index 2a0ce9f71..4cb9c7daf 100644 --- a/src/TNL/Containers/StaticVectorExpressions.h +++ b/src/TNL/Containers/StaticVectorExpressions.h @@ -22,6 +22,7 @@ namespace TNL { //// // Addition template< int Size, typename Real, typename ET > +__cuda_callable__ const Expressions::BinaryExpressionTemplate< StaticVector< Size, Real >, ET, Expressions::Addition > operator+( const StaticVector< Size, Real >& a, const ET& b ) { @@ -29,6 +30,7 @@ operator+( const StaticVector< Size, Real >& a, const ET& b ) } template< typename ET, int Size, typename Real > +__cuda_callable__ const Expressions::BinaryExpressionTemplate< ET, StaticVector< Size, Real >, Expressions::Addition > operator+( const ET& a, const StaticVector< Size, Real >& b ) { @@ -36,6 +38,7 @@ operator+( const ET& a, const StaticVector< Size, Real >& b ) } template< int Size, typename Real1, typename Real2 > +__cuda_callable__ const Expressions::BinaryExpressionTemplate< StaticVector< Size, Real1 >, StaticVector< Size, Real2 >, Expressions::Addition > operator+( const StaticVector< Size, Real1 >& a, const StaticVector< Size, Real2 >& b ) { @@ -45,6 +48,7 @@ operator+( const StaticVector< Size, Real1 >& a, const StaticVector< Size, Real2 //// // Subtraction template< int Size, typename Real, typename ET > +__cuda_callable__ const Expressions::BinaryExpressionTemplate< StaticVector< Size, Real >, ET, Expressions::Subtraction > operator-( const StaticVector< Size, Real >& a, const ET& b ) { @@ -52,6 +56,7 @@ operator-( const StaticVector< Size, Real >& a, const ET& b ) } template< typename ET, int Size, typename Real > +__cuda_callable__ const Expressions::BinaryExpressionTemplate< ET, StaticVector< Size, Real >, Expressions::Subtraction > operator-( const ET& a, const StaticVector< Size, Real >& b ) { @@ -59,6 +64,7 @@ operator-( const ET& a, const StaticVector< Size, Real >& b ) } template< int Size, typename Real1, typename Real2 > +__cuda_callable__ const Expressions::BinaryExpressionTemplate< StaticVector< Size, Real1 >, StaticVector< Size, Real2 >, Expressions::Subtraction > operator-( const StaticVector< Size, Real1 >& a, const StaticVector< Size, Real2 >& b ) { @@ -68,6 +74,7 @@ operator-( const StaticVector< Size, Real1 >& a, const StaticVector< Size, Real2 //// // Multiplication template< int Size, typename Real, typename ET > +__cuda_callable__ const Expressions::BinaryExpressionTemplate< StaticVector< Size, Real >, ET, Expressions::Multiplication > operator*( const StaticVector< Size, Real >& a, const ET& b ) { @@ -75,6 +82,7 @@ operator*( const StaticVector< Size, Real >& a, const ET& b ) } template< typename ET, int Size, typename Real > +__cuda_callable__ const Expressions::BinaryExpressionTemplate< ET, StaticVector< Size, Real >, Expressions::Multiplication > operator*( const ET& a, const StaticVector< Size, Real >& b ) { @@ -82,6 +90,7 @@ operator*( const ET& a, const StaticVector< Size, Real >& b ) } template< int Size, typename Real1, typename Real2 > +__cuda_callable__ const Expressions::BinaryExpressionTemplate< StaticVector< Size, Real1 >, StaticVector< Size, Real2 >, Expressions::Multiplication > operator*( const StaticVector< Size, Real1 >& a, const StaticVector< Size, Real2 >& b ) { @@ -91,6 +100,7 @@ operator*( const StaticVector< Size, Real1 >& a, const StaticVector< Size, Real2 //// // Division template< int Size, typename Real, typename ET > +__cuda_callable__ const Expressions::BinaryExpressionTemplate< StaticVector< Size, Real >, ET, Expressions::Division > operator/( const StaticVector< Size, Real >& a, const ET& b ) { @@ -98,6 +108,7 @@ operator/( const StaticVector< Size, Real >& a, const ET& b ) } template< typename ET, int Size, typename Real > +__cuda_callable__ const Expressions::BinaryExpressionTemplate< ET, StaticVector< Size, Real >, Expressions::Division > operator/( const ET& a, const StaticVector< Size, Real >& b ) { @@ -105,6 +116,7 @@ operator/( const ET& a, const StaticVector< Size, Real >& b ) } template< int Size, typename Real1, typename Real2 > +__cuda_callable__ const Expressions::BinaryExpressionTemplate< StaticVector< Size, Real1 >, StaticVector< Size, Real2 >, Expressions::Division > operator/( const StaticVector< Size, Real1 >& a, const StaticVector< Size, Real2 >& b ) { @@ -114,18 +126,21 @@ operator/( const StaticVector< Size, Real1 >& a, const StaticVector< Size, Real2 //// // Comparison operations - operator == template< int Size, typename Real, typename ET > +__cuda_callable__ bool operator==( const StaticVector< Size, Real >& a, const ET& b ) { return Expressions::StaticComparisonEQ( a, b ); } template< typename ET, int Size, typename Real > +__cuda_callable__ bool operator==( const ET& a, const StaticVector< Size, Real >& b ) { return Expressions::StaticComparisonEQ( a, b ); } template< int Size, typename Real1, typename Real2 > +__cuda_callable__ bool operator==( const StaticVector< Size, Real1 >& a, const StaticVector< Size, Real2 >& b ) { return Expressions::StaticComparisonEQ( a, b ); @@ -134,18 +149,21 @@ bool operator==( const StaticVector< Size, Real1 >& a, const StaticVector< Size, //// // Comparison operations - operator != template< int Size, typename Real, typename ET > +__cuda_callable__ bool operator!=( const StaticVector< Size, Real >& a, const ET& b ) { return Expressions::StaticComparisonNE( a, b ); } template< typename ET, int Size, typename Real > +__cuda_callable__ bool operator!=( const ET& a, const StaticVector< Size, Real >& b ) { return Expressions::StaticComparisonNE( a, b ); } template< int Size, typename Real1, typename Real2 > +__cuda_callable__ bool operator!=( const StaticVector< Size, Real1 >& a, const StaticVector< Size, Real2 >& b ) { return Expressions::StaticComparisonNE( a, b ); @@ -154,18 +172,21 @@ bool operator!=( const StaticVector< Size, Real1 >& a, const StaticVector< Size, //// // Comparison operations - operator < template< int Size, typename Real, typename ET > +__cuda_callable__ bool operator<( const StaticVector< Size, Real >& a, const ET& b ) { return Expressions::StaticComparisonLT( a, b ); } template< typename ET, int Size, typename Real > +__cuda_callable__ bool operator<( const ET& a, const StaticVector< Size, Real >& b ) { return Expressions::StaticComparisonLT( a, b ); } template< int Size, typename Real1, typename Real2 > +__cuda_callable__ bool operator<( const StaticVector< Size, Real1 >& a, const StaticVector< Size, Real2 >& b ) { return Expressions::StaticComparisonLT( a, b ); @@ -174,18 +195,21 @@ bool operator<( const StaticVector< Size, Real1 >& a, const StaticVector< Size, //// // Comparison operations - operator <= template< int Size, typename Real, typename ET > +__cuda_callable__ bool operator<=( const StaticVector< Size, Real >& a, const ET& b ) { return Expressions::StaticComparisonLE( a, b ); } template< typename ET, int Size, typename Real > +__cuda_callable__ bool operator<=( const ET& a, const StaticVector< Size, Real >& b ) { return Expressions::StaticComparisonLE( a, b ); } template< int Size, typename Real1, typename Real2 > +__cuda_callable__ bool operator<=( const StaticVector< Size, Real1 >& a, const StaticVector< Size, Real2 >& b ) { return Expressions::StaticComparisonLE( a, b ); @@ -194,18 +218,21 @@ bool operator<=( const StaticVector< Size, Real1 >& a, const StaticVector< Size, //// // Comparison operations - operator > template< int Size, typename Real, typename ET > +__cuda_callable__ bool operator>( const StaticVector< Size, Real >& a, const ET& b ) { return Expressions::StaticComparisonGT( a, b ); } template< typename ET, int Size, typename Real > +__cuda_callable__ bool operator>( const ET& a, const StaticVector< Size, Real >& b ) { return Expressions::StaticComparisonGT( a, b ); } template< int Size, typename Real1, typename Real2 > +__cuda_callable__ bool operator>( const StaticVector< Size, Real1 >& a, const StaticVector< Size, Real2 >& b ) { return Expressions::StaticComparisonGT( a, b ); @@ -214,18 +241,21 @@ bool operator>( const StaticVector< Size, Real1 >& a, const StaticVector< Size, //// // Comparison operations - operator >= template< int Size, typename Real, typename ET > +__cuda_callable__ bool operator>=( const StaticVector< Size, Real >& a, const ET& b ) { return Expressions::StaticComparisonGE( a, b ); } template< typename ET, int Size, typename Real > +__cuda_callable__ bool operator>=( const ET& a, const StaticVector< Size, Real >& b ) { return Expressions::StaticComparisonGE( a, b ); } template< int Size, typename Real1, typename Real2 > +__cuda_callable__ bool operator>=( const StaticVector< Size, Real1 >& a, const StaticVector< Size, Real2 >& b ) { return Expressions::StaticComparisonGE( a, b ); @@ -234,6 +264,7 @@ bool operator>=( const StaticVector< Size, Real1 >& a, const StaticVector< Size, //// // TODO: Replace this with multiplication when its safe template< int Size, typename Real, typename ET > +__cuda_callable__ StaticVector< Size, Real > Scale( const StaticVector< Size, Real >& a, const ET& b ) { @@ -242,6 +273,7 @@ Scale( const StaticVector< Size, Real >& a, const ET& b ) } template< typename ET, int Size, typename Real > +__cuda_callable__ Expressions::BinaryExpressionTemplate< ET, StaticVector< Size, Real >, Expressions::Multiplication > Scale( const ET& a, const StaticVector< Size, Real >& b ) { @@ -250,6 +282,7 @@ Scale( const ET& a, const StaticVector< Size, Real >& b ) } template< int Size, typename Real1, typename Real2 > +__cuda_callable__ Expressions::BinaryExpressionTemplate< StaticVector< Size, Real1 >, StaticVector< Size, Real2 >, Expressions::Multiplication > Scale( const StaticVector< Size, Real1 >& a, const StaticVector< Size, Real2 >& b ) { -- GitLab From f25c3254abfcd5549f7c21d27ccaa31a039923c3 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Wed, 24 Apr 2019 15:34:11 +0200 Subject: [PATCH 09/93] Small refactoring of static array. --- src/TNL/Containers/StaticArray.h | 4 +- src/TNL/Containers/StaticVector.h | 42 +++++++++++++++---- src/UnitTests/Containers/StaticVectorTest.cpp | 2 + 3 files changed, 37 insertions(+), 11 deletions(-) diff --git a/src/TNL/Containers/StaticArray.h b/src/TNL/Containers/StaticArray.h index 6327b2783..c5b8ec564 100644 --- a/src/TNL/Containers/StaticArray.h +++ b/src/TNL/Containers/StaticArray.h @@ -26,8 +26,8 @@ template< int Size, typename Value > class StaticArray { public: - typedef Value ValueType; - typedef int IndexType; + using ValueType = Value; + using IndexType = int; enum { size = Size }; /** diff --git a/src/TNL/Containers/StaticVector.h b/src/TNL/Containers/StaticVector.h index 47809ca41..011f88a35 100644 --- a/src/TNL/Containers/StaticVector.h +++ b/src/TNL/Containers/StaticVector.h @@ -17,7 +17,7 @@ namespace TNL { namespace Containers { - + /** * \brief Vector with constant size. @@ -29,10 +29,15 @@ template< int Size, typename Real = double > class StaticVector : public StaticArray< Size, Real > { public: - typedef Real RealType; - enum { size = Size }; + using RealType = Real; + using IndexType = int; + using ThisType = StaticVector< Size, Real >; + + constexpr static int size = Size; using StaticArray< Size, Real >::getSize; + using StaticArray< Size, Real >::operator ==; + using StaticArray< Size, Real >::operator !=; /** * \brief Basic constructor. @@ -262,10 +267,17 @@ template< typename Real > class StaticVector< 1, Real > : public StaticArray< 1, Real > { public: - typedef Real RealType; - enum { size = 1 }; + using RealType = Real; + using IndexType = int; + using ThisType = StaticVector< 1, Real >; + + constexpr static int size = 1; using StaticArray< 1, Real >::getSize; + using StaticArray< 1, Real >::operator ==; + using StaticArray< 1, Real >::operator !=; + + /** \brief See StaticVector::StaticVector().*/ __cuda_callable__ @@ -390,10 +402,16 @@ template< typename Real > class StaticVector< 2, Real > : public StaticArray< 2, Real > { public: - typedef Real RealType; - enum { size = 2 }; + using RealType = Real; + using IndexType = int; + using ThisType = StaticVector< 2, Real >; + + constexpr static int size = 2; using StaticArray< 2, Real >::getSize; + using StaticArray< 2, Real >::operator ==; + using StaticArray< 2, Real >::operator !=; + /** \brief See StaticVector::StaticVector().*/ __cuda_callable__ @@ -527,10 +545,16 @@ template< typename Real > class StaticVector< 3, Real > : public StaticArray< 3, Real > { public: - typedef Real RealType; - enum { size = 3 }; + using RealType = Real; + using IndexType = int; + using ThisType = StaticVector< 3, Real >; + + constexpr static int size = 3; using StaticArray< 3, Real >::getSize; + using StaticArray< 3, Real >::operator ==; + using StaticArray< 3, Real >::operator !=; + /** \brief See StaticVector::StaticVector().*/ __cuda_callable__ diff --git a/src/UnitTests/Containers/StaticVectorTest.cpp b/src/UnitTests/Containers/StaticVectorTest.cpp index d86b7644a..1c3326e4e 100644 --- a/src/UnitTests/Containers/StaticVectorTest.cpp +++ b/src/UnitTests/Containers/StaticVectorTest.cpp @@ -140,6 +140,8 @@ TYPED_TEST( StaticVectorTest, comparisons ) EXPECT_TRUE( u3 >= u1 ); EXPECT_TRUE( u2 > u1 ); EXPECT_TRUE( u2 >= u1 ); + EXPECT_TRUE( u1 != u4 ); + EXPECT_FALSE( u1 == u2 ); if( size > 2 ) { EXPECT_FALSE( u1 < u4 ); -- GitLab From c5dc42667a2b198736cb0be86fa69acefc37c713 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Wed, 24 Apr 2019 15:43:14 +0200 Subject: [PATCH 10/93] Adding unary expression template. --- .../Expressions/UnaryExpressionTemplate.h | 22 ++++ src/TNL/Containers/StaticVector.h | 118 ------------------ src/TNL/Containers/StaticVector1D_impl.h | 36 ------ src/TNL/Containers/StaticVector2D_impl.h | 41 ------ src/TNL/Containers/StaticVector3D_impl.h | 45 ------- 5 files changed, 22 insertions(+), 240 deletions(-) create mode 100644 src/TNL/Containers/Expressions/UnaryExpressionTemplate.h diff --git a/src/TNL/Containers/Expressions/UnaryExpressionTemplate.h b/src/TNL/Containers/Expressions/UnaryExpressionTemplate.h new file mode 100644 index 000000000..4203847d6 --- /dev/null +++ b/src/TNL/Containers/Expressions/UnaryExpressionTemplate.h @@ -0,0 +1,22 @@ +/*************************************************************************** + UnaryExpressionTemplate.h - description + ------------------- + begin : Apr 24, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include + +namespace TNL { + namespace Containers { + namespace Expressions { + + + } //namespace Expressions + } //namespace Containers +} // namespace TNL \ No newline at end of file diff --git a/src/TNL/Containers/StaticVector.h b/src/TNL/Containers/StaticVector.h index 011f88a35..ba967ac83 100644 --- a/src/TNL/Containers/StaticVector.h +++ b/src/TNL/Containers/StaticVector.h @@ -141,48 +141,6 @@ class StaticVector : public StaticArray< Size, Real > __cuda_callable__ StaticVector& operator /= ( const Real& c ); -#ifdef UNDEF - /** - * \brief Addition operator. - * - * This function adds static vector \e u to this static vector and returns the resulting static vector. - * The addition is applied to all the vector elements separately. - * \param u Reference to another static vector. - */ - __cuda_callable__ - StaticVector operator + ( const StaticVector& u ) const; - - /** - * \brief Subtraction operator. - * - * This function subtracts static vector \e u from this static vector and returns the resulting static vector. - * The subtraction is applied to all the vector elements separately. - * \param u Reference to another static vector. - */ - __cuda_callable__ - StaticVector operator - ( const StaticVector& u ) const; - - /** - * \brief Multiplication by number. - * - * This function multiplies this static vector by \e c and returns the resulting static vector. - * The addition is applied to all the vector elements separately. - * \param c Multiplicator. - */ - __cuda_callable__ - StaticVector operator * ( const Real& c ) const; - - /** - * \brief Computes scalar (dot) product. - * - * An algebraic operation that takes two equal-length vectors and returns a single number. - * - * \param u Reference to another static vector of the same size as this static vector. - */ - __cuda_callable__ - Real operator * ( const StaticVector& u ) const; -#endif - /** * \brief Compares this static vector with static vector \e v. * @@ -331,24 +289,6 @@ class StaticVector< 1, Real > : public StaticArray< 1, Real > __cuda_callable__ StaticVector& operator /= ( const Real& c ); -#ifdef UNDEF - /** \brief See StaticVector::operator + ( const StaticVector& u ) const.*/ - __cuda_callable__ - StaticVector operator + ( const StaticVector& u ) const; - - /** \brief See StaticVector::operator - ( const StaticVector& u ) const.*/ - __cuda_callable__ - StaticVector operator - ( const StaticVector& u ) const; - - /** \brief See StaticVector::operator * ( const Real& c ) const.*/ - __cuda_callable__ - StaticVector operator * ( const Real& c ) const; - - /** \brief See StaticVector::operator * ( const StaticVector& u ) const.*/ - __cuda_callable__ - Real operator * ( const StaticVector& u ) const; -#endif - /** \brief See StaticVector::operator <.*/ __cuda_callable__ bool operator < ( const StaticVector& v ) const; @@ -474,24 +414,6 @@ class StaticVector< 2, Real > : public StaticArray< 2, Real > __cuda_callable__ StaticVector& operator /= ( const Real& c ); -#ifdef UNDEF - /** \brief See StaticVector::operator + ( const StaticVector& u ) const.*/ - __cuda_callable__ - StaticVector operator + ( const StaticVector& u ) const; - - /** \brief See StaticVector::operator - ( const StaticVector& u ) const.*/ - __cuda_callable__ - StaticVector operator - ( const StaticVector& u ) const; - - /** \brief See StaticVector::operator * ( const Real& c ) const.*/ - __cuda_callable__ - StaticVector operator * ( const Real& c ) const; - - /** \brief See StaticVector::operator * ( const StaticVector& u ) const.*/ - __cuda_callable__ - Real operator * ( const StaticVector& u ) const; -#endif - /** \brief See StaticVector::operator <.*/ __cuda_callable__ bool operator < ( const StaticVector& v ) const; @@ -618,24 +540,6 @@ class StaticVector< 3, Real > : public StaticArray< 3, Real > __cuda_callable__ StaticVector& operator /= ( const Real& c ); -#ifdef UNDEF - /** \brief See StaticVector::operator + ( const StaticVector& u ) const.*/ - __cuda_callable__ - StaticVector operator + ( const StaticVector& u ) const; - - /** \brief See StaticVector::operator - ( const StaticVector& u ) const.*/ - __cuda_callable__ - StaticVector operator - ( const StaticVector& u ) const; - - /** \brief See StaticVector::operator * ( const Real& c ) const.*/ - __cuda_callable__ - StaticVector operator * ( const Real& c ) const; - - /** \brief See StaticVector::operator * ( const StaticVector& u ) const.*/ - __cuda_callable__ - Real operator * ( const StaticVector& u ) const; -#endif - /** \brief See StaticVector::operator <.*/ __cuda_callable__ bool operator < ( const StaticVector& v ) const; @@ -752,28 +656,6 @@ StaticVector< 3, Real > VectorProduct( const StaticVector< 3, Real >& u, return p; } -/*template< typename Real > -Real ScalarProduct( const StaticVector< 1, Real >& u, - const StaticVector< 1, Real >& v ) -{ - return u[ 0 ] * v[ 0 ]; -} - - -template< typename Real > -Real ScalarProduct( const StaticVector< 2, Real >& u, - const StaticVector< 2, Real >& v ) -{ - return u[ 0 ] * v[ 0 ] + u[ 1 ] * v[ 1 ]; -} - -template< typename Real > -Real ScalarProduct( const StaticVector< 3, Real >& u, - const StaticVector< 3, Real >& v ) -{ - return u[ 0 ] * v[ 0 ] + u[ 1 ] * v[ 1 ] + u[ 2 ] * v[ 2 ]; -}*/ - template< typename T1, typename T2> StaticVector<1, T1> Scale( const StaticVector< 1, T1 >& u, diff --git a/src/TNL/Containers/StaticVector1D_impl.h b/src/TNL/Containers/StaticVector1D_impl.h index 5248d821d..fd955b16c 100644 --- a/src/TNL/Containers/StaticVector1D_impl.h +++ b/src/TNL/Containers/StaticVector1D_impl.h @@ -121,42 +121,6 @@ StaticVector< 1, Real >& StaticVector< 1, Real >::operator /= ( const Real& c ) return *this; } -#ifdef UNDEF -template< typename Real > -__cuda_callable__ -StaticVector< 1, Real > StaticVector< 1, Real >::operator + ( const StaticVector& u ) const -{ - StaticVector< 1, Real > res; - res[ 0 ] = this->data[ 0 ] + u[ 0 ]; - return res; -} - -template< typename Real > -__cuda_callable__ -StaticVector< 1, Real > StaticVector< 1, Real >::operator - ( const StaticVector& u ) const -{ - StaticVector< 1, Real > res; - res[ 0 ] = this->data[ 0 ] - u[ 0 ]; - return res; -} - -template< typename Real > -__cuda_callable__ -StaticVector< 1, Real > StaticVector< 1, Real >::operator * ( const Real& c ) const -{ - StaticVector< 1, Real > res; - res[ 0 ] = c * this->data[ 0 ]; - return res; -} - -template< typename Real > -__cuda_callable__ -Real StaticVector< 1, Real >::operator * ( const StaticVector& u ) const -{ - return this->data[ 0 ] * u[ 0 ]; -} -#endif - template< typename Real > __cuda_callable__ bool StaticVector< 1, Real >::operator < ( const StaticVector& v ) const diff --git a/src/TNL/Containers/StaticVector2D_impl.h b/src/TNL/Containers/StaticVector2D_impl.h index bacc94ee6..57ad0d93b 100644 --- a/src/TNL/Containers/StaticVector2D_impl.h +++ b/src/TNL/Containers/StaticVector2D_impl.h @@ -133,47 +133,6 @@ StaticVector< 2, Real >& StaticVector< 2, Real >::operator /= ( const Real& c ) return *this; } -#ifdef UNDEF -template< typename Real > -__cuda_callable__ -StaticVector< 2, Real > StaticVector< 2, Real >::operator + ( const StaticVector& u ) const -{ - StaticVector< 2, Real > res; - res[ 0 ] = this->data[ 0 ] + u[ 0 ]; - res[ 1 ] = this->data[ 1 ] + u[ 1 ]; - return res; -} - -template< typename Real > -__cuda_callable__ -StaticVector< 2, Real > StaticVector< 2, Real >::operator - ( const StaticVector& u ) const -{ - StaticVector< 2, Real > res; - res[ 0 ] = this->data[ 0 ] - u[ 0 ]; - res[ 1 ] = this->data[ 1 ] - u[ 1 ]; - return res; -} - -template< typename Real > -__cuda_callable__ -StaticVector< 2, Real > StaticVector< 2, Real >::operator * ( const Real& c ) const -{ - StaticVector< 2, Real > res; - res[ 0 ] = c * this->data[ 0 ]; - res[ 1 ] = c * this->data[ 1 ]; - return res; -} - -template< typename Real > -__cuda_callable__ -Real StaticVector< 2, Real >::operator * ( const StaticVector& u ) const -{ - return this->data[ 0 ] * u[ 0 ] + - this->data[ 1 ] * u[ 1 ]; -} -#endif - - template< typename Real > __cuda_callable__ bool StaticVector< 2, Real >::operator < ( const StaticVector& v ) const diff --git a/src/TNL/Containers/StaticVector3D_impl.h b/src/TNL/Containers/StaticVector3D_impl.h index 8b796c54a..7e84edf2c 100644 --- a/src/TNL/Containers/StaticVector3D_impl.h +++ b/src/TNL/Containers/StaticVector3D_impl.h @@ -139,51 +139,6 @@ StaticVector< 3, Real >& StaticVector< 3, Real >::operator /= ( const Real& c ) return *this; } -#ifdef UNDEF -template< typename Real > -__cuda_callable__ -StaticVector< 3, Real > StaticVector< 3, Real >::operator + ( const StaticVector& u ) const -{ - StaticVector< 3, Real > res; - res[ 0 ] = this->data[ 0 ] + u[ 0 ]; - res[ 1 ] = this->data[ 1 ] + u[ 1 ]; - res[ 2 ] = this->data[ 2 ] + u[ 2 ]; - return res; -} - -template< typename Real > -__cuda_callable__ -StaticVector< 3, Real > StaticVector< 3, Real >::operator - ( const StaticVector& u ) const -{ - StaticVector< 3, Real > res; - res[ 0 ] = this->data[ 0 ] - u[ 0 ]; - res[ 1 ] = this->data[ 1 ] - u[ 1 ]; - res[ 2 ] = this->data[ 2 ] - u[ 2 ]; - return res; -} - -template< typename Real > -__cuda_callable__ -StaticVector< 3, Real > StaticVector< 3, Real >::operator * ( const Real& c ) const -{ - StaticVector< 3, Real > res; - res[ 0 ] = c * this->data[ 0 ]; - res[ 1 ] = c * this->data[ 1 ]; - res[ 2 ] = c * this->data[ 2 ]; - return res; -} - -template< typename Real > -__cuda_callable__ -Real StaticVector< 3, Real >::operator * ( const StaticVector& u ) const -{ - return this->data[ 0 ] * u[ 0 ] + - this->data[ 1 ] * u[ 1 ] + - this->data[ 2 ] * u[ 2 ]; -} -#endif - - template< typename Real > __cuda_callable__ bool StaticVector< 3, Real >::operator < ( const StaticVector& v ) const -- GitLab From 1324d90c2ca7ebe4f51e7c9a8ffe2f51f339c143 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Wed, 24 Apr 2019 21:31:08 +0200 Subject: [PATCH 11/93] Added min and max for static vectors. --- .../ExpressionTemplatesOperations.h | 23 ++++++++ src/TNL/Containers/StaticVectorExpressions.h | 52 +++++++++++++++++++ src/UnitTests/Containers/StaticVectorTest.cpp | 19 +++++++ 3 files changed, 94 insertions(+) diff --git a/src/TNL/Containers/Expressions/ExpressionTemplatesOperations.h b/src/TNL/Containers/Expressions/ExpressionTemplatesOperations.h index 703a04180..1842b088f 100644 --- a/src/TNL/Containers/Expressions/ExpressionTemplatesOperations.h +++ b/src/TNL/Containers/Expressions/ExpressionTemplatesOperations.h @@ -10,6 +10,8 @@ #pragma once +#include + namespace TNL { namespace Containers { namespace Expressions { @@ -53,6 +55,27 @@ struct Division return a / b; } }; + +template< typename T1, typename T2 > +struct Min +{ + __cuda_callable__ + static auto evaluate( const T1& a, const T2& b ) -> decltype( TNL::min( a , b ) ) + { + return TNL::min( a, b ); + } +}; + +template< typename T1, typename T2 > +struct Max +{ + __cuda_callable__ + static auto evaluate( const T1& a, const T2& b ) -> decltype( TNL::max( a, b ) ) + { + return TNL::max( a, b ); + } +}; + } //namespace Expressions } // namespace Containers } // namespace TNL \ No newline at end of file diff --git a/src/TNL/Containers/StaticVectorExpressions.h b/src/TNL/Containers/StaticVectorExpressions.h index 4cb9c7daf..85758b478 100644 --- a/src/TNL/Containers/StaticVectorExpressions.h +++ b/src/TNL/Containers/StaticVectorExpressions.h @@ -123,6 +123,58 @@ operator/( const StaticVector< Size, Real1 >& a, const StaticVector< Size, Real2 return Expressions::BinaryExpressionTemplate< StaticVector< Size, Real1 >, StaticVector< Size, Real2 >, Expressions::Division >( a, b ); } +//// +// Min +template< int Size, typename Real, typename ET > +__cuda_callable__ +const Expressions::BinaryExpressionTemplate< StaticVector< Size, Real >, ET, Expressions::Min > +min( const StaticVector< Size, Real >& a, const ET& b ) +{ + return Expressions::BinaryExpressionTemplate< StaticVector< Size, Real >, ET, Expressions::Min >( a, b ); +} + +template< typename ET, int Size, typename Real > +__cuda_callable__ +const Expressions::BinaryExpressionTemplate< ET, StaticVector< Size, Real >, Expressions::Min > +min( const ET& a, const StaticVector< Size, Real >& b ) +{ + return Expressions::BinaryExpressionTemplate< ET, StaticVector< Size, Real >, Expressions::Min >( a, b ); +} + +template< int Size, typename Real1, typename Real2 > +__cuda_callable__ +const Expressions::BinaryExpressionTemplate< StaticVector< Size, Real1 >, StaticVector< Size, Real2 >, Expressions::Min > +min( const StaticVector< Size, Real1 >& a, const StaticVector< Size, Real2 >& b ) +{ + return Expressions::BinaryExpressionTemplate< StaticVector< Size, Real1 >, StaticVector< Size, Real2 >, Expressions::Min >( a, b ); +} + +//// +// Max +template< int Size, typename Real, typename ET > +__cuda_callable__ +const Expressions::BinaryExpressionTemplate< StaticVector< Size, Real >, ET, Expressions::Max > +max( const StaticVector< Size, Real >& a, const ET& b ) +{ + return Expressions::BinaryExpressionTemplate< StaticVector< Size, Real >, ET, Expressions::Max >( a, b ); +} + +template< typename ET, int Size, typename Real > +__cuda_callable__ +const Expressions::BinaryExpressionTemplate< ET, StaticVector< Size, Real >, Expressions::Max > +max( const ET& a, const StaticVector< Size, Real >& b ) +{ + return Expressions::BinaryExpressionTemplate< ET, StaticVector< Size, Real >, Expressions::Max >( a, b ); +} + +template< int Size, typename Real1, typename Real2 > +__cuda_callable__ +const Expressions::BinaryExpressionTemplate< StaticVector< Size, Real1 >, StaticVector< Size, Real2 >, Expressions::Max > +max( const StaticVector< Size, Real1 >& a, const StaticVector< Size, Real2 >& b ) +{ + return Expressions::BinaryExpressionTemplate< StaticVector< Size, Real1 >, StaticVector< Size, Real2 >, Expressions::Max >( a, b ); +} + //// // Comparison operations - operator == template< int Size, typename Real, typename ET > diff --git a/src/UnitTests/Containers/StaticVectorTest.cpp b/src/UnitTests/Containers/StaticVectorTest.cpp index 1c3326e4e..23bc7665d 100644 --- a/src/UnitTests/Containers/StaticVectorTest.cpp +++ b/src/UnitTests/Containers/StaticVectorTest.cpp @@ -123,6 +123,25 @@ TYPED_TEST( StaticVectorTest, operators ) EXPECT_EQ( ScalarProduct( u1, u2 ), 4 * size ); } +TYPED_TEST( StaticVectorTest, MinMax ) +{ + using VectorType = typename TestFixture::VectorType; + constexpr int size = VectorType::size; + + VectorType u1( 1 ), u2( 2 ), u3( 3 ), u4, u_min, u_max; + for( int i = 0; i < size; i++ ) + { + u4[ i ] = i; + u_min[ i ] = TNL::min( i, 3 ); + u_max[ i ] = TNL::max( i, 3 ); + } + + EXPECT_TRUE( min( u1, u2 ) == u1 ); + EXPECT_TRUE( max( u1, u2 ) == u2 ); + EXPECT_TRUE( min( u3, u4 ) == u_min ); + EXPECT_TRUE( max( u3, u4 ) == u_max ); +} + TYPED_TEST( StaticVectorTest, comparisons ) { using VectorType = typename TestFixture::VectorType; -- GitLab From f7eea5386f9753b1a91927bb224e6268dbce92ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Wed, 24 Apr 2019 21:46:00 +0200 Subject: [PATCH 12/93] Implementing ET for abs. --- .../ExpressionTemplatesOperations.h | 11 +++ .../Expressions/UnaryExpressionTemplate.h | 71 +++++++++++++++++++ src/TNL/Containers/StaticVector.h | 4 +- src/TNL/Containers/StaticVectorExpressions.h | 1 + src/UnitTests/Containers/StaticVectorTest.cpp | 2 +- 5 files changed, 86 insertions(+), 3 deletions(-) diff --git a/src/TNL/Containers/Expressions/ExpressionTemplatesOperations.h b/src/TNL/Containers/Expressions/ExpressionTemplatesOperations.h index 1842b088f..d9afdab73 100644 --- a/src/TNL/Containers/Expressions/ExpressionTemplatesOperations.h +++ b/src/TNL/Containers/Expressions/ExpressionTemplatesOperations.h @@ -76,6 +76,17 @@ struct Max } }; +template< typename T1 > +struct Abs +{ + __cuda_callable__ + static auto evaluate( const T1& a ) -> decltype( TNL::abs( a ) ) + { + return TNL::abs( a ); + } +}; + + } //namespace Expressions } // namespace Containers } // namespace TNL \ No newline at end of file diff --git a/src/TNL/Containers/Expressions/UnaryExpressionTemplate.h b/src/TNL/Containers/Expressions/UnaryExpressionTemplate.h index 4203847d6..cf606bb10 100644 --- a/src/TNL/Containers/Expressions/UnaryExpressionTemplate.h +++ b/src/TNL/Containers/Expressions/UnaryExpressionTemplate.h @@ -17,6 +17,77 @@ namespace TNL { namespace Expressions { +template< typename T1, + template< typename, typename > class Operation, + ExpressionVariableType T1Type = ExpressionVariableTypeGetter< T1 >::value > +struct UnaryExpressionTemplate +{ +}; + +template< typename T1, + template< typename, typename > class Operation > +struct UnaryExpressionTemplate< T1, Operation, VectorVariable > +{ + using RealType = typename T1::RealType; + using IsExpressionTemplate = bool; + + __cuda_callable__ + UnaryExpressionTemplate( const T1& a ): operand( a ){} + + __cuda_callable__ + static UnaryExpressionTemplate evaluate( const T1& a ) + { + return UnaryExpressionTemplate( a ); + } + + __cuda_callable__ + RealType operator[]( const int i ) const + { + return Operation< typename T1::RealType, typename T2::RealType >::evaluate( operand[ i ] ); + } + + __cuda_callable__ + int getSize() const + { + return operand.getSize(); + } + + protected: + const T1 &operand; +}; + +//// +// Abs +template< typename L1, + typename L2, + template< typename, typename > class LOperation > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Abs > +abs( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) +{ + return Expressions::UnaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< L1, L2, ROperation >, + Expressions::Abs >( a ); +} + +template< typename L1, + template< typename > class LOperation > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< + Expressions::UnaryExpressionTemplate< L1, LOperation >, + Expressions::Abs > +abs( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a ) +{ + return Expressions::UnaryExpressionTemplate< + Expressions::UnaryExpressionTemplate< L1, L2, ROperation >, + Expressions::Abs >( a ); +} + + + + } //namespace Expressions } //namespace Containers } // namespace TNL \ No newline at end of file diff --git a/src/TNL/Containers/StaticVector.h b/src/TNL/Containers/StaticVector.h index ba967ac83..850f1cf0c 100644 --- a/src/TNL/Containers/StaticVector.h +++ b/src/TNL/Containers/StaticVector.h @@ -591,9 +591,9 @@ __cuda_callable__ StaticVector< Size, Real > operator * ( const Scalar& c, const StaticVector< Size, Real >& u ); */ -template< int Size, typename Real > +/*template< int Size, typename Real > __cuda_callable__ -StaticVector< Size, Real > abs( const StaticVector< Size, Real >& u ) { return u.abs(); }; +StaticVector< Size, Real > abs( const StaticVector< Size, Real >& u ) { return u.abs(); };*/ } // namespace Containers } // namespace TNL diff --git a/src/TNL/Containers/StaticVectorExpressions.h b/src/TNL/Containers/StaticVectorExpressions.h index 85758b478..b8d89b458 100644 --- a/src/TNL/Containers/StaticVectorExpressions.h +++ b/src/TNL/Containers/StaticVectorExpressions.h @@ -11,6 +11,7 @@ #pragma once #include +#include #include #include diff --git a/src/UnitTests/Containers/StaticVectorTest.cpp b/src/UnitTests/Containers/StaticVectorTest.cpp index 23bc7665d..baca520fe 100644 --- a/src/UnitTests/Containers/StaticVectorTest.cpp +++ b/src/UnitTests/Containers/StaticVectorTest.cpp @@ -190,7 +190,7 @@ TYPED_TEST( StaticVectorTest, abs ) // TODO: implement unary minus operator VectorType v = - 1 * u; - EXPECT_EQ( v.abs(), u ); + EXPECT_EQ( abs( v ), u ); } TYPED_TEST( StaticVectorTest, lpNorm ) -- GitLab From ee7e70488d556042df97c65f651d50d4a0177e48 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Thu, 25 Apr 2019 17:16:40 +0200 Subject: [PATCH 13/93] [WIP] Implementing unary operations. --- .../Expressions/BinaryExpressionTemplate.h | 469 ------ .../ExpressionTemplatesOperations.h | 192 ++- .../Expressions/StaticExpressionTemplates.h | 1445 +++++++++++++++++ .../Expressions/UnaryExpressionTemplate.h | 93 -- src/TNL/Containers/StaticVector.h | 22 +- src/TNL/Containers/StaticVector1D_impl.h | 9 + src/TNL/Containers/StaticVector2D_impl.h | 9 + src/TNL/Containers/StaticVector3D_impl.h | 9 + src/TNL/Containers/StaticVectorExpressions.h | 49 +- src/TNL/Containers/StaticVector_impl.h | 10 + src/TNL/Math.h | 224 +++ src/UnitTests/Containers/StaticVectorTest.cpp | 23 +- 12 files changed, 1986 insertions(+), 568 deletions(-) delete mode 100644 src/TNL/Containers/Expressions/BinaryExpressionTemplate.h create mode 100644 src/TNL/Containers/Expressions/StaticExpressionTemplates.h delete mode 100644 src/TNL/Containers/Expressions/UnaryExpressionTemplate.h diff --git a/src/TNL/Containers/Expressions/BinaryExpressionTemplate.h b/src/TNL/Containers/Expressions/BinaryExpressionTemplate.h deleted file mode 100644 index 5a68130bd..000000000 --- a/src/TNL/Containers/Expressions/BinaryExpressionTemplate.h +++ /dev/null @@ -1,469 +0,0 @@ -/*************************************************************************** - BinaryExpressionTemplate.h - description - ------------------- - begin : Apr 18, 2019 - copyright : (C) 2019 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/* See Copyright Notice in tnl/Copyright */ - -#pragma once - -#include -#include -#include -#include - -namespace TNL { - namespace Containers { - namespace Expressions { - -template< typename T1, - typename T2, - template< typename, typename > class Operation, - ExpressionVariableType T1Type = ExpressionVariableTypeGetter< T1 >::value, - ExpressionVariableType T2Type = ExpressionVariableTypeGetter< T2 >::value > -struct BinaryExpressionTemplate -{ -}; - -template< typename T1, - typename T2, - template< typename, typename > class Operation > -struct BinaryExpressionTemplate< T1, T2, Operation, VectorVariable, VectorVariable > -{ - using RealType = typename T1::RealType; - using IsExpressionTemplate = bool; - - __cuda_callable__ - BinaryExpressionTemplate( const T1& a, const T2& b ): op1( a ), op2( b ){} - - __cuda_callable__ - static BinaryExpressionTemplate evaluate( const T1& a, const T2& b ) - { - return BinaryExpressionTemplate( a, b ); - } - - __cuda_callable__ - RealType operator[]( const int i ) const - { - return Operation< typename T1::RealType, typename T2::RealType >::evaluate( op1[ i ], op2[ i ] ); - } - - __cuda_callable__ - int getSize() const - { - return op1.getSize(); - } - - protected: - const T1 &op1; - const T2 &op2; - -}; - -template< typename T1, - typename T2, - template< typename, typename > class Operation > -struct BinaryExpressionTemplate< T1, T2, Operation, VectorVariable, ArithmeticVariable > -{ - using RealType = typename T1::RealType; - using IsExpressionTemplate = bool; - - __cuda_callable__ - BinaryExpressionTemplate( const T1& a, const T2& b ): op1( a ), op2( b ){} - - __cuda_callable__ - BinaryExpressionTemplate evaluate( const T1& a, const T2& b ) - { - return BinaryExpressionTemplate( a, b ); - } - - __cuda_callable__ - RealType operator[]( const int i ) const - { - return Operation< typename T1::RealType, T2 >::evaluate( op1[ i ], op2 ); - } - - __cuda_callable__ - int getSize() const - { - return op1.getSize(); - } - - protected: - const T1 &op1; - const T2 &op2; - -}; - -template< typename T1, - typename T2, - template< typename, typename > class Operation > -struct BinaryExpressionTemplate< T1, T2, Operation, ArithmeticVariable, VectorVariable > -{ - using RealType = typename T2::RealType; - using IsExpressionTemplate = bool; - - __cuda_callable__ - BinaryExpressionTemplate( const T1& a, const T2& b ): op1( a ), op2( b ){} - - __cuda_callable__ - BinaryExpressionTemplate evaluate( const T1& a, const T2& b ) - { - return BinaryExpressionTemplate( a, b ); - } - - __cuda_callable__ - RealType operator[]( const int i ) const - { - return Operation< T1, typename T2::RealType >::evaluate( op1, op2[ i ] ); - } - - __cuda_callable__ - int getSize() const - { - return op2.getSize(); - } - - protected: - const T1& op1; - const T2& op2; -}; - -//// -// Binary expressions addition -template< typename L1, - typename L2, - template< typename, typename > class LOperation, - typename R1, - typename R2, - template< typename, typename > class ROperation > -__cuda_callable__ -const Expressions::BinaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, - Expressions::Addition > -operator + ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, - const Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) -{ - return Expressions::BinaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Addition >( a, b ); -} - -template< typename T1, - typename T2, - template< typename, typename > class Operation > -__cuda_callable__ -const Expressions::BinaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< T1, T2, Operation >, - typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType, - Expressions::Addition > -operator + ( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, - const typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) -{ - return Expressions::BinaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< T1, T2, Operation >, - typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType, - Expressions::Addition >( a, b ); -} - -//// -// Binary expression subtraction -template< typename L1, - typename L2, - template< typename, typename > class LOperation, - typename R1, - typename R2, - template< typename, typename > class ROperation > -__cuda_callable__ -const Expressions::BinaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, - Expressions::Subtraction > -operator - ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, - const Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) -{ - return Expressions::BinaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Subtraction >( a, b ); -} - -template< typename T1, - typename T2, - template< typename, typename > class Operation > -__cuda_callable__ -const Expressions::BinaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< T1, T2, Operation >, - typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType, - Expressions::Subtraction > -operator - ( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, - const typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) -{ - return Expressions::BinaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< T1, T2, Operation >, - typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType, - Expressions::Subtraction >( a, b ); -} - -//// -// Binary expression multiplication -template< typename L1, - typename L2, - template< typename, typename > class LOperation, - typename R1, - typename R2, - template< typename, typename > class ROperation > -__cuda_callable__ -const Expressions::BinaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, - Expressions::Multiplication > -operator * ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, - const Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) -{ - return Expressions::BinaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Multiplication >( a, b ); -} - -template< typename T1, - typename T2, - template< typename, typename > class Operation > -__cuda_callable__ -const Expressions::BinaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< T1, T2, Operation >, - typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType, - Expressions::Multiplication > -operator * ( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, - const typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) -{ - return Expressions::BinaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< T1, T2, Operation >, - typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType, - Expressions::Multiplication >( a, b ); -} - -//// -// Binary expression division -template< typename L1, - typename L2, - template< typename, typename > class LOperation, - typename R1, - typename R2, - template< typename, typename > class ROperation > -__cuda_callable__ -const Expressions::BinaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, - Expressions::Division > -operator / ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, - const Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) -{ - return Expressions::BinaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Division >( a, b ); -} - -template< typename T1, - typename T2, - template< typename, typename > class Operation > -__cuda_callable__ -const Expressions::BinaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< T1, T2, Operation >, - typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType, - Expressions::Division > -operator / ( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, - const typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) -{ - return Expressions::BinaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< T1, T2, Operation >, - typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType, - Expressions::Division >( a, b ); -} - -//// -// Comparison operator == -template< typename L1, - typename L2, - template< typename, typename > class LOperation, - typename R1, - typename R2, - template< typename, typename > class ROperation > -__cuda_callable__ -bool -operator == ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, - const Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) -{ - return Expressions::StaticComparisonEQ( a, b ); -} - -template< typename T1, - typename T2, - template< typename, typename > class Operation > -__cuda_callable__ -bool -operator == ( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, - const typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) -{ - return Expressions::StaticComparisonEQ( a, b ); -} - -//// -// Comparison operator != -template< typename L1, - typename L2, - template< typename, typename > class LOperation, - typename R1, - typename R2, - template< typename, typename > class ROperation > -__cuda_callable__ -bool -operator != ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, - const Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) -{ - return Expressions::StaticComparisonNE( a, b ); -} - -template< typename T1, - typename T2, - template< typename, typename > class Operation > -__cuda_callable__ -bool -operator != ( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, - const typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) -{ - return Expressions::StaticComparisonNE( a, b ); -} - -//// -// Comparison operator < -template< typename L1, - typename L2, - template< typename, typename > class LOperation, - typename R1, - typename R2, - template< typename, typename > class ROperation > -__cuda_callable__ -bool -operator < ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, - const Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) -{ - return Expressions::StaticComparisonLT( a, b ); -} - -template< typename T1, - typename T2, - template< typename, typename > class Operation > -__cuda_callable__ -bool -operator < ( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, - const typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) -{ - return Expressions::StaticComparisonLT( a, b ); -} - -//// -// Comparison operator <= -template< typename L1, - typename L2, - template< typename, typename > class LOperation, - typename R1, - typename R2, - template< typename, typename > class ROperation > -__cuda_callable__ -bool -operator <= ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, - const Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) -{ - return Expressions::StaticComparisonLE( a, b ); -} - -template< typename T1, - typename T2, - template< typename, typename > class Operation > -__cuda_callable__ -bool -operator <= ( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, - const typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) -{ - return Expressions::StaticComparisonLE( a, b ); -} - -//// -// Comparison operator > -template< typename L1, - typename L2, - template< typename, typename > class LOperation, - typename R1, - typename R2, - template< typename, typename > class ROperation > -__cuda_callable__ -bool -operator > ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, - const Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) -{ - return Expressions::StaticComparisonGT( a, b ); -} - -template< typename T1, - typename T2, - template< typename, typename > class Operation > -__cuda_callable__ -bool -operator > ( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, - const typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) -{ - return Expressions::StaticComparisonGT( a, b ); -} - -//// -// Comparison operator >= -template< typename L1, - typename L2, - template< typename, typename > class LOperation, - typename R1, - typename R2, - template< typename, typename > class ROperation > -__cuda_callable__ -bool -operator >= ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, - const Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) -{ - return Expressions::StaticComparisonGE( a, b ); -} - -template< typename T1, - typename T2, - template< typename, typename > class Operation > -__cuda_callable__ -bool -operator >= ( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, - const typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) -{ - return Expressions::StaticComparisonGE( a, b ); -} - -//// -// Output stream -template< typename T1, - typename T2, - template< typename, typename > class Operation > -std::ostream& operator << ( std::ostream& str, const BinaryExpressionTemplate< T1, T2, Operation >& expression ) -{ - str << "[ "; - for( int i = 0; i < expression.getSize() - 1; i++ ) - str << expression[ i ] << " "; - str << expression[ expression.getSize() - 1 ] << " ]"; - return str; -} - - } //namespace Expressions - } //namespace Containers -} // namespace TNL diff --git a/src/TNL/Containers/Expressions/ExpressionTemplatesOperations.h b/src/TNL/Containers/Expressions/ExpressionTemplatesOperations.h index d9afdab73..331197f23 100644 --- a/src/TNL/Containers/Expressions/ExpressionTemplatesOperations.h +++ b/src/TNL/Containers/Expressions/ExpressionTemplatesOperations.h @@ -76,6 +76,16 @@ struct Max } }; +template< typename T1 > +struct Minus +{ + __cuda_callable__ + static T1 evaluate( const T1& a ) + { + return -a; + } +}; + template< typename T1 > struct Abs { @@ -86,7 +96,187 @@ struct Abs } }; +template< typename T1 > +struct Sin +{ + __cuda_callable__ + static auto evaluate( const T1& a ) -> decltype( TNL::sin( a ) ) + { + return TNL::sin( a ); + } +}; + +template< typename T1 > +struct Cos +{ + __cuda_callable__ + static auto evaluate( const T1& a ) -> decltype( TNL::cos( a ) ) + { + return TNL::cos( a ); + } +}; + +template< typename T1 > +struct Tan +{ + __cuda_callable__ + static auto evaluate( const T1& a ) -> decltype( TNL::tan( a ) ) + { + return TNL::tan( a ); + } +}; + +template< typename T1 > +struct Sqrt +{ + __cuda_callable__ + static auto evaluate( const T1& a ) -> decltype( TNL::sqrt( a ) ) + { + return TNL::sqrt( a ); + } +}; + +template< typename T1 > +struct Cbrt +{ + __cuda_callable__ + static auto evaluate( const T1& a ) -> decltype( TNL::cbrt( a ) ) + { + return TNL::cbrt( a ); + } +}; + +template< typename T1, typename Real = T1 > +struct Pow +{ + __cuda_callable__ + static auto evaluate( const T1& a, const Real& exp ) -> decltype( TNL::pow( a, exp ) ) + { + return TNL::pow( a, exp ); + } +}; + +template< typename T1 > +struct Floor +{ + __cuda_callable__ + static auto evaluate( const T1& a ) -> decltype( TNL::floor( a ) ) + { + return TNL::floor( a ); + } +}; + +template< typename T1 > +struct Ceil +{ + __cuda_callable__ + static auto evaluate( const T1& a ) -> decltype( TNL::ceil( a ) ) + { + return TNL::ceil( a ); + } +}; + +template< typename T1 > +struct Acos +{ + __cuda_callable__ + static auto evaluate( const T1& a ) -> decltype( TNL::acos( a ) ) + { + return TNL::acos( a ); + } +}; + +template< typename T1 > +struct Asin +{ + __cuda_callable__ + static auto evaluate( const T1& a ) -> decltype( TNL::asin( a ) ) + { + return TNL::asin( a ); + } +}; + +template< typename T1 > +struct Atan +{ + __cuda_callable__ + static auto evaluate( const T1& a ) -> decltype( TNL::atan( a ) ) + { + return TNL::atan( a ); + } +}; + +template< typename T1 > +struct Cosh +{ + __cuda_callable__ + static auto evaluate( const T1& a ) -> decltype( TNL::cosh( a ) ) + { + return TNL::cosh( a ); + } +}; + +template< typename T1 > +struct Tanh +{ + __cuda_callable__ + static auto evaluate( const T1& a ) -> decltype( TNL::tanh( a ) ) + { + return TNL::tanh( a ); + } +}; + +template< typename T1 > +struct Log +{ + __cuda_callable__ + static auto evaluate( const T1& a ) -> decltype( TNL::log( a ) ) + { + return TNL::log( a ); + } +}; + +template< typename T1 > +struct Log10 +{ + __cuda_callable__ + static auto evaluate( const T1& a ) -> decltype( TNL::log10( a ) ) + { + return TNL::log10( a ); + } +}; + +template< typename T1 > +struct Log2 +{ + __cuda_callable__ + static auto evaluate( const T1& a ) -> decltype( TNL::log2( a ) ) + { + return TNL::log2( a ); + } +}; + +template< typename T1 > +struct Exp +{ + __cuda_callable__ + static auto evaluate( const T1& a ) -> decltype( TNL::exp( a ) ) + { + return TNL::exp( a ); + } +}; + +template< typename T1 > +struct Sign +{ + __cuda_callable__ + static auto evaluate( const T1& a ) -> decltype( TNL::sign( a ) ) + { + return TNL::sign( a ); + } +}; + } //namespace Expressions } // namespace Containers -} // namespace TNL \ No newline at end of file +} // namespace TNL diff --git a/src/TNL/Containers/Expressions/StaticExpressionTemplates.h b/src/TNL/Containers/Expressions/StaticExpressionTemplates.h new file mode 100644 index 000000000..de5585299 --- /dev/null +++ b/src/TNL/Containers/Expressions/StaticExpressionTemplates.h @@ -0,0 +1,1445 @@ +/*************************************************************************** + StaticExpressionTemplates.h - description + ------------------- + begin : Apr 18, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include +#include +#include +#include + +namespace TNL { + namespace Containers { + namespace Expressions { + +template< typename T1, + template< typename > class Operation, + ExpressionVariableType T1Type = ExpressionVariableTypeGetter< T1 >::value > +struct UnaryExpressionTemplate +{ +}; + +template< typename T1, + typename T2, + template< typename, typename > class Operation, + ExpressionVariableType T1Type = ExpressionVariableTypeGetter< T1 >::value, + ExpressionVariableType T2Type = ExpressionVariableTypeGetter< T2 >::value > +struct BinaryExpressionTemplate +{ +}; + +//// +// Binary expression template +template< typename T1, + typename T2, + template< typename, typename > class Operation > +struct BinaryExpressionTemplate< T1, T2, Operation, VectorVariable, VectorVariable > +{ + using RealType = typename T1::RealType; + using IsExpressionTemplate = bool; + + __cuda_callable__ + BinaryExpressionTemplate( const T1& a, const T2& b ): op1( a ), op2( b ){} + + __cuda_callable__ + static BinaryExpressionTemplate evaluate( const T1& a, const T2& b ) + { + return BinaryExpressionTemplate( a, b ); + } + + __cuda_callable__ + RealType operator[]( const int i ) const + { + return Operation< typename T1::RealType, typename T2::RealType >::evaluate( op1[ i ], op2[ i ] ); + } + + __cuda_callable__ + int getSize() const + { + return op1.getSize(); + } + + protected: + const T1 &op1; + const T2 &op2; +}; + +template< typename T1, + typename T2, + template< typename, typename > class Operation > +struct BinaryExpressionTemplate< T1, T2, Operation, VectorVariable, ArithmeticVariable > +{ + using RealType = typename T1::RealType; + using IsExpressionTemplate = bool; + + __cuda_callable__ + BinaryExpressionTemplate( const T1& a, const T2& b ): op1( a ), op2( b ){} + + __cuda_callable__ + BinaryExpressionTemplate evaluate( const T1& a, const T2& b ) + { + return BinaryExpressionTemplate( a, b ); + } + + __cuda_callable__ + RealType operator[]( const int i ) const + { + return Operation< typename T1::RealType, T2 >::evaluate( op1[ i ], op2 ); + } + + __cuda_callable__ + int getSize() const + { + return op1.getSize(); + } + + protected: + const T1 &op1; + const T2 &op2; + +}; + +template< typename T1, + typename T2, + template< typename, typename > class Operation > +struct BinaryExpressionTemplate< T1, T2, Operation, ArithmeticVariable, VectorVariable > +{ + using RealType = typename T2::RealType; + using IsExpressionTemplate = bool; + + __cuda_callable__ + BinaryExpressionTemplate( const T1& a, const T2& b ): op1( a ), op2( b ){} + + __cuda_callable__ + BinaryExpressionTemplate evaluate( const T1& a, const T2& b ) + { + return BinaryExpressionTemplate( a, b ); + } + + __cuda_callable__ + RealType operator[]( const int i ) const + { + return Operation< T1, typename T2::RealType >::evaluate( op1, op2[ i ] ); + } + + __cuda_callable__ + int getSize() const + { + return op2.getSize(); + } + + protected: + const T1& op1; + const T2& op2; +}; + + +//// +// Unary expression template +template< typename T1, + template< typename > class Operation > +struct UnaryExpressionTemplate< T1, Operation, VectorVariable > +{ + using RealType = typename T1::RealType; + using IsExpressionTemplate = bool; + + __cuda_callable__ + UnaryExpressionTemplate( const T1& a ): operand( a ){} + + __cuda_callable__ + static UnaryExpressionTemplate evaluate( const T1& a ) + { + return UnaryExpressionTemplate( a ); + } + + __cuda_callable__ + RealType operator[]( const int i ) const + { + return Operation< typename T1::RealType >::evaluate( operand[ i ] ); + } + + __cuda_callable__ + int getSize() const + { + return operand.getSize(); + } + + protected: + const T1 &operand; +}; + +//// +// Binary expressions addition +template< typename L1, + typename L2, + template< typename, typename > class LOperation, + typename R1, + typename R2, + template< typename, typename > class ROperation > +__cuda_callable__ +const Expressions::BinaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, + Expressions::Addition > +operator + ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, + const Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) +{ + return Expressions::BinaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Addition >( a, b ); +} + +template< typename T1, + typename T2, + template< typename, typename > class Operation > +__cuda_callable__ +const Expressions::BinaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< T1, T2, Operation >, + typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType, + Expressions::Addition > +operator + ( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, + const typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) +{ + return Expressions::BinaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< T1, T2, Operation >, + typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType, + Expressions::Addition >( a, b ); +} + +template< typename L1, + template< typename > class LOperation, + typename R1, + typename R2, + template< typename, typename > class ROperation > +__cuda_callable__ +const Expressions::BinaryExpressionTemplate< + Expressions::UnaryExpressionTemplate< L1, LOperation >, + typename Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, + Expressions::Addition > +operator + ( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a, + const typename Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) +{ + return Expressions::BinaryExpressionTemplate< + Expressions::UnaryExpressionTemplate< L1, LOperation >, + typename Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, + Expressions::Addition >( a, b ); +} + +template< typename L1, + typename L2, + template< typename, typename > class LOperation, + typename R1, + template< typename > class ROperation > +__cuda_callable__ +const Expressions::BinaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + typename Expressions::UnaryExpressionTemplate< R1, ROperation >, + Expressions::Addition > +operator + ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, + const typename Expressions::UnaryExpressionTemplate< R1,ROperation >& b ) +{ + return Expressions::BinaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + typename Expressions::UnaryExpressionTemplate< R1, ROperation >, + Expressions::Addition >( a, b ); +} + +//// +// Binary expression subtraction +template< typename L1, + typename L2, + template< typename, typename > class LOperation, + typename R1, + typename R2, + template< typename, typename > class ROperation > +__cuda_callable__ +const Expressions::BinaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, + Expressions::Subtraction > +operator - ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, + const Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) +{ + return Expressions::BinaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Subtraction >( a, b ); +} + +template< typename T1, + typename T2, + template< typename, typename > class Operation > +__cuda_callable__ +const Expressions::BinaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< T1, T2, Operation >, + typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType, + Expressions::Subtraction > +operator - ( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, + const typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) +{ + return Expressions::BinaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< T1, T2, Operation >, + typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType, + Expressions::Subtraction >( a, b ); +} + +template< typename L1, + template< typename > class LOperation, + typename R1, + typename R2, + template< typename, typename > class ROperation > +__cuda_callable__ +const Expressions::BinaryExpressionTemplate< + Expressions::UnaryExpressionTemplate< L1, LOperation >, + typename Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, + Expressions::Subtraction > +operator - ( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a, + const typename Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) +{ + return Expressions::BinaryExpressionTemplate< + Expressions::UnaryExpressionTemplate< L1, LOperation >, + typename Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, + Expressions::Subtraction >( a, b ); +} + +template< typename L1, + typename L2, + template< typename, typename > class LOperation, + typename R1, + template< typename > class ROperation > +__cuda_callable__ +const Expressions::BinaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + typename Expressions::UnaryExpressionTemplate< R1, ROperation >, + Expressions::Subtraction > +operator - ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, + const typename Expressions::UnaryExpressionTemplate< R1,ROperation >& b ) +{ + return Expressions::BinaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + typename Expressions::UnaryExpressionTemplate< R1, ROperation >, + Expressions::Subtraction >( a, b ); +} + +//// +// Binary expression multiplication +template< typename L1, + typename L2, + template< typename, typename > class LOperation, + typename R1, + typename R2, + template< typename, typename > class ROperation > +__cuda_callable__ +const Expressions::BinaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, + Expressions::Multiplication > +operator * ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, + const Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) +{ + return Expressions::BinaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Multiplication >( a, b ); +} + +template< typename T1, + typename T2, + template< typename, typename > class Operation > +__cuda_callable__ +const Expressions::BinaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< T1, T2, Operation >, + typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType, + Expressions::Multiplication > +operator * ( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, + const typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) +{ + return Expressions::BinaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< T1, T2, Operation >, + typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType, + Expressions::Multiplication >( a, b ); +} + +template< typename L1, + template< typename > class LOperation, + typename R1, + typename R2, + template< typename, typename > class ROperation > +__cuda_callable__ +const Expressions::BinaryExpressionTemplate< + Expressions::UnaryExpressionTemplate< L1, LOperation >, + typename Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, + Expressions::Multiplication > +operator * ( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a, + const typename Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) +{ + return Expressions::BinaryExpressionTemplate< + Expressions::UnaryExpressionTemplate< L1, LOperation >, + typename Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, + Expressions::Multiplication >( a, b ); +} + +template< typename L1, + typename L2, + template< typename, typename > class LOperation, + typename R1, + template< typename > class ROperation > +__cuda_callable__ +const Expressions::BinaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + typename Expressions::UnaryExpressionTemplate< R1, ROperation >, + Expressions::Multiplication > +operator * ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, + const typename Expressions::UnaryExpressionTemplate< R1, ROperation >& b ) +{ + return Expressions::BinaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + typename Expressions::UnaryExpressionTemplate< R1, ROperation >, + Expressions::Multiplication >( a, b ); +} + +//// +// Binary expression division +template< typename L1, + typename L2, + template< typename, typename > class LOperation, + typename R1, + typename R2, + template< typename, typename > class ROperation > +__cuda_callable__ +const Expressions::BinaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, + Expressions::Division > +operator / ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, + const Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) +{ + return Expressions::BinaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Division >( a, b ); +} + +template< typename T1, + typename T2, + template< typename, typename > class Operation > +__cuda_callable__ +const Expressions::BinaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< T1, T2, Operation >, + typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType, + Expressions::Division > +operator / ( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, + const typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) +{ + return Expressions::BinaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< T1, T2, Operation >, + typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType, + Expressions::Division >( a, b ); +} + +template< typename L1, + template< typename > class LOperation, + typename R1, + typename R2, + template< typename, typename > class ROperation > +__cuda_callable__ +const Expressions::BinaryExpressionTemplate< + Expressions::UnaryExpressionTemplate< L1, LOperation >, + typename Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, + Expressions::Division > +operator / ( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a, + const typename Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) +{ + return Expressions::BinaryExpressionTemplate< + Expressions::UnaryExpressionTemplate< L1, LOperation >, + typename Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, + Expressions::Division >( a, b ); +} + +template< typename L1, + typename L2, + template< typename, typename > class LOperation, + typename R1, + template< typename > class ROperation > +__cuda_callable__ +const Expressions::BinaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + typename Expressions::UnaryExpressionTemplate< R1, ROperation >, + Expressions::Division > +operator / ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, + const typename Expressions::UnaryExpressionTemplate< R1,ROperation >& b ) +{ + return Expressions::BinaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + typename Expressions::UnaryExpressionTemplate< R1, ROperation >, + Expressions::Division >( a, b ); +} + +//// +// Binary expression min +template< typename L1, + typename L2, + template< typename, typename > class LOperation, + typename R1, + typename R2, + template< typename, typename > class ROperation > +__cuda_callable__ +const Expressions::BinaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, + Expressions::Min > +min ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, + const Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) +{ + return Expressions::BinaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Min >( a, b ); +} + +template< typename T1, + typename T2, + template< typename, typename > class Operation > +__cuda_callable__ +const Expressions::BinaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< T1, T2, Operation >, + typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType, + Expressions::Min > +min( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, + const typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) +{ + return Expressions::BinaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< T1, T2, Operation >, + typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType, + Expressions::Min >( a, b ); +} + +template< typename L1, + template< typename > class LOperation, + typename R1, + typename R2, + template< typename, typename > class ROperation > +__cuda_callable__ +const Expressions::BinaryExpressionTemplate< + Expressions::UnaryExpressionTemplate< L1, LOperation >, + typename Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, + Expressions::Min > +min( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a, + const typename Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) +{ + return Expressions::BinaryExpressionTemplate< + Expressions::UnaryExpressionTemplate< L1, LOperation >, + typename Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, + Expressions::Min >( a, b ); +} + +template< typename L1, + typename L2, + template< typename, typename > class LOperation, + typename R1, + template< typename > class ROperation > +__cuda_callable__ +const Expressions::BinaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + typename Expressions::UnaryExpressionTemplate< R1, ROperation >, + Expressions::Min > +min( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, + const typename Expressions::UnaryExpressionTemplate< R1,ROperation >& b ) +{ + return Expressions::BinaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + typename Expressions::UnaryExpressionTemplate< R1, ROperation >, + Expressions::Min >( a, b ); +} + +//// +// Binary expression max +template< typename L1, + typename L2, + template< typename, typename > class LOperation, + typename R1, + typename R2, + template< typename, typename > class ROperation > +__cuda_callable__ +const Expressions::BinaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, + Expressions::Max > +max( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, + const Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) +{ + return Expressions::BinaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Max >( a, b ); +} + +template< typename T1, + typename T2, + template< typename, typename > class Operation > +__cuda_callable__ +const Expressions::BinaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< T1, T2, Operation >, + typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType, + Expressions::Max > +max( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, + const typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) +{ + return Expressions::BinaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< T1, T2, Operation >, + typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType, + Expressions::Max >( a, b ); +} + +template< typename L1, + template< typename > class LOperation, + typename R1, + typename R2, + template< typename, typename > class ROperation > +__cuda_callable__ +const Expressions::BinaryExpressionTemplate< + Expressions::UnaryExpressionTemplate< L1, LOperation >, + typename Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, + Expressions::Max > +max( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a, + const typename Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) +{ + return Expressions::BinaryExpressionTemplate< + Expressions::UnaryExpressionTemplate< L1, LOperation >, + typename Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, + Expressions::Max >( a, b ); +} + +template< typename L1, + typename L2, + template< typename, typename > class LOperation, + typename R1, + template< typename > class ROperation > +__cuda_callable__ +const Expressions::BinaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + typename Expressions::UnaryExpressionTemplate< R1, ROperation >, + Expressions::Max > +max( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, + const typename Expressions::UnaryExpressionTemplate< R1,ROperation >& b ) +{ + return Expressions::BinaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + typename Expressions::UnaryExpressionTemplate< R1, ROperation >, + Expressions::Max >( a, b ); +} + +//// +// Comparison operator == +template< typename L1, + typename L2, + template< typename, typename > class LOperation, + typename R1, + typename R2, + template< typename, typename > class ROperation > +__cuda_callable__ +bool +operator == ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, + const Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) +{ + return Expressions::StaticComparisonEQ( a, b ); +} + +template< typename T1, + typename T2, + template< typename, typename > class Operation > +__cuda_callable__ +bool +operator == ( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, + const typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) +{ + return Expressions::StaticComparisonEQ( a, b ); +} + +template< typename L1, + template< typename > class LOperation, + typename R1, + typename R2, + template< typename, typename > class ROperation > +__cuda_callable__ +bool +operator == ( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a, + const typename Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) +{ + return Expressions::StaticComparisonEQ( a, b ); +} + +template< typename L1, + typename L2, + template< typename, typename > class LOperation, + typename R1, + template< typename > class ROperation > +__cuda_callable__ +bool +operator == ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, + const typename Expressions::UnaryExpressionTemplate< R1,ROperation >& b ) +{ + return Expressions::StaticComparisonEQ( a, b ); +} + +//// +// Comparison operator != +template< typename L1, + typename L2, + template< typename, typename > class LOperation, + typename R1, + typename R2, + template< typename, typename > class ROperation > +__cuda_callable__ +bool +operator != ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, + const Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) +{ + return Expressions::StaticComparisonNE( a, b ); +} + +template< typename T1, + typename T2, + template< typename, typename > class Operation > +__cuda_callable__ +bool +operator != ( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, + const typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) +{ + return Expressions::StaticComparisonNE( a, b ); +} + +template< typename L1, + template< typename > class LOperation, + typename R1, + typename R2, + template< typename, typename > class ROperation > +__cuda_callable__ +bool +operator != ( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a, + const typename Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) +{ + return Expressions::StaticComparisonNE( a, b ); +} + +template< typename L1, + typename L2, + template< typename, typename > class LOperation, + typename R1, + template< typename > class ROperation > +__cuda_callable__ +bool +operator != ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, + const typename Expressions::UnaryExpressionTemplate< R1, ROperation >& b ) +{ + return Expressions::StaticComparisonNE( a, b ); +} + +//// +// Comparison operator < +template< typename L1, + typename L2, + template< typename, typename > class LOperation, + typename R1, + typename R2, + template< typename, typename > class ROperation > +__cuda_callable__ +bool +operator < ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, + const Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) +{ + return Expressions::StaticComparisonLT( a, b ); +} + +template< typename T1, + typename T2, + template< typename, typename > class Operation > +__cuda_callable__ +bool +operator < ( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, + const typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) +{ + return Expressions::StaticComparisonLT( a, b ); +} + +template< typename L1, + template< typename > class LOperation, + typename R1, + typename R2, + template< typename, typename > class ROperation > +__cuda_callable__ +bool +operator < ( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a, + const typename Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) +{ + return Expressions::StaticComparisonLT( a, b ); +} + +template< typename L1, + typename L2, + template< typename, typename > class LOperation, + typename R1, + template< typename > class ROperation > +__cuda_callable__ +bool +operator < ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, + const typename Expressions::UnaryExpressionTemplate< R1, ROperation >& b ) +{ + return Expressions::StaticComparisonLT( a, b ); +} + +//// +// Comparison operator <= +template< typename L1, + typename L2, + template< typename, typename > class LOperation, + typename R1, + typename R2, + template< typename, typename > class ROperation > +__cuda_callable__ +bool +operator <= ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, + const Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) +{ + return Expressions::StaticComparisonLE( a, b ); +} + +template< typename T1, + typename T2, + template< typename, typename > class Operation > +__cuda_callable__ +bool +operator <= ( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, + const typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) +{ + return Expressions::StaticComparisonLE( a, b ); +} + +template< typename L1, + template< typename > class LOperation, + typename R1, + typename R2, + template< typename, typename > class ROperation > +__cuda_callable__ +bool +operator <= ( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a, + const typename Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) +{ + return Expressions::StaticComparisonLE( a, b ); +} + +template< typename L1, + typename L2, + template< typename, typename > class LOperation, + typename R1, + template< typename > class ROperation > +__cuda_callable__ +bool +operator <= ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, + const typename Expressions::UnaryExpressionTemplate< R1, ROperation >& b ) +{ + return Expressions::StaticComparisonLE( a, b ); +} + +//// +// Comparison operator > +template< typename L1, + typename L2, + template< typename, typename > class LOperation, + typename R1, + typename R2, + template< typename, typename > class ROperation > +__cuda_callable__ +bool +operator > ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, + const Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) +{ + return Expressions::StaticComparisonGT( a, b ); +} + +template< typename T1, + typename T2, + template< typename, typename > class Operation > +__cuda_callable__ +bool +operator > ( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, + const typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) +{ + return Expressions::StaticComparisonGT( a, b ); +} + +template< typename L1, + template< typename > class LOperation, + typename R1, + typename R2, + template< typename, typename > class ROperation > +__cuda_callable__ +bool +operator > ( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a, + const typename Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) +{ + return Expressions::StaticComparisonGT( a, b ); +} + +template< typename L1, + typename L2, + template< typename, typename > class LOperation, + typename R1, + template< typename > class ROperation > +__cuda_callable__ +bool +operator > ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, + const typename Expressions::UnaryExpressionTemplate< R1, ROperation >& b ) +{ + return Expressions::StaticComparisonGT( a, b ); +} + +//// +// Comparison operator >= +template< typename L1, + typename L2, + template< typename, typename > class LOperation, + typename R1, + typename R2, + template< typename, typename > class ROperation > +__cuda_callable__ +bool +operator >= ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, + const Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) +{ + return Expressions::StaticComparisonGE( a, b ); +} + +template< typename T1, + typename T2, + template< typename, typename > class Operation > +__cuda_callable__ +bool +operator >= ( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, + const typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) +{ + return Expressions::StaticComparisonGE( a, b ); +} + +template< typename L1, + template< typename > class LOperation, + typename R1, + typename R2, + template< typename, typename > class ROperation > +__cuda_callable__ +bool +operator >= ( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a, + const typename Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) +{ + return Expressions::StaticComparisonGE( a, b ); +} + +template< typename L1, + typename L2, + template< typename, typename > class LOperation, + typename R1, + template< typename > class ROperation > +__cuda_callable__ +bool +operator >= ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, + const typename Expressions::UnaryExpressionTemplate< R1, ROperation >& b ) +{ + return Expressions::StaticComparisonGE( a, b ); +} + +//// +// Unary operations + + +//// +// Minus +template< typename L1, + typename L2, + template< typename, typename > class LOperation > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Minus > +operator -( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) +{ + return Expressions::UnaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Minus >( a ); +} + +template< typename L1, + template< typename > class LOperation > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< + Expressions::UnaryExpressionTemplate< L1, LOperation >, + Expressions::Abs > +operator -( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a ) +{ + return Expressions::UnaryExpressionTemplate< + Expressions::UnaryExpressionTemplate< L1, LOperation >, + Expressions::Minus >( a ); +} + +//// +// Abs +template< typename L1, + typename L2, + template< typename, typename > class LOperation > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Abs > +abs( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) +{ + return Expressions::UnaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Abs >( a ); +} + +template< typename L1, + template< typename > class LOperation > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< + Expressions::UnaryExpressionTemplate< L1, LOperation >, + Expressions::Abs > +abs( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a ) +{ + return Expressions::UnaryExpressionTemplate< + Expressions::UnaryExpressionTemplate< L1, LOperation >, + Expressions::Abs >( a ); +} + +//// +// Sin +template< typename L1, + typename L2, + template< typename, typename > class LOperation > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Sin > +sin( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) +{ + return Expressions::UnaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Sin >( a ); +} + +template< typename L1, + template< typename > class LOperation > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< + Expressions::UnaryExpressionTemplate< L1, LOperation >, + Expressions::Sin > +sin( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a ) +{ + return Expressions::UnaryExpressionTemplate< + Expressions::UnaryExpressionTemplate< L1, LOperation >, + Expressions::Sin >( a ); +} + +//// +// Cos +template< typename L1, + typename L2, + template< typename, typename > class LOperation > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Cos > +cos( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) +{ + return Expressions::UnaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Cos >( a ); +} + +template< typename L1, + template< typename > class LOperation > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< + Expressions::UnaryExpressionTemplate< L1, LOperation >, + Expressions::Cos > +cos( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a ) +{ + return Expressions::UnaryExpressionTemplate< + Expressions::UnaryExpressionTemplate< L1, LOperation >, + Expressions::Cos >( a ); +} + +//// +// Tan +template< typename L1, + typename L2, + template< typename, typename > class LOperation > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Tan > +tan( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) +{ + return Expressions::UnaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Din >( a ); +} + +template< typename L1, + template< typename > class LOperation > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< + Expressions::UnaryExpressionTemplate< L1, LOperation >, + Expressions::Tan > +tan( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a ) +{ + return Expressions::UnaryExpressionTemplate< + Expressions::UnaryExpressionTemplate< L1, LOperation >, + Expressions::Tan >( a ); +} + +//// +// Sqrt +template< typename L1, + typename L2, + template< typename, typename > class LOperation > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Sqrt > +sqrt( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) +{ + return Expressions::UnaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Sqrt >( a ); +} + +template< typename L1, + template< typename > class LOperation > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< + Expressions::UnaryExpressionTemplate< L1, LOperation >, + Expressions::Sqrt > +sqrt( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a ) +{ + return Expressions::UnaryExpressionTemplate< + Expressions::UnaryExpressionTemplate< L1, LOperation >, + Expressions::Sqrt >( a ); +} + +//// +// Cbrt +template< typename L1, + typename L2, + template< typename, typename > class LOperation > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Cbrt > +cbrt( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) +{ + return Expressions::UnaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Cbrt >( a ); +} + +template< typename L1, + template< typename > class LOperation > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< + Expressions::UnaryExpressionTemplate< L1, LOperation >, + Expressions::Cbrt > +cbrt( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a ) +{ + return Expressions::UnaryExpressionTemplate< + Expressions::UnaryExpressionTemplate< L1, LOperation >, + Expressions::Cbrt >( a ); +} + +//// +// Pow +template< typename L1, + typename L2, + template< typename, typename > class LOperation, + typename Real > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Pow > +pow( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, const Real& exp ) +{ + return Expressions::UnaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Pow >( a, exp ); +} + +template< typename L1, + template< typename > class LOperation, + typename Real > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< + Expressions::UnaryExpressionTemplate< L1, LOperation >, + Expressions::Pow > +pow( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a, const Real& exp ) +{ + return Expressions::UnaryExpressionTemplate< + Expressions::UnaryExpressionTemplate< L1, LOperation >, + Expressions::Pow >( a, exp ); +} + +//// +// Floor +template< typename L1, + typename L2, + template< typename, typename > class LOperation > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Sin > +floor( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) +{ + return Expressions::UnaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Floor >( a ); +} + +template< typename L1, + template< typename > class LOperation > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< + Expressions::UnaryExpressionTemplate< L1, LOperation >, + Expressions::Floor > +floor( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a ) +{ + return Expressions::UnaryExpressionTemplate< + Expressions::UnaryExpressionTemplate< L1, LOperation >, + Expressions::Floor >( a ); +} + +//// +// Ceil +template< typename L1, + typename L2, + template< typename, typename > class LOperation > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Ceil > +ceil( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) +{ + return Expressions::UnaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Ceil >( a ); +} + +template< typename L1, + template< typename > class LOperation > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< + Expressions::UnaryExpressionTemplate< L1, LOperation >, + Expressions::Ceil > +sin( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a ) +{ + return Expressions::UnaryExpressionTemplate< + Expressions::UnaryExpressionTemplate< L1, LOperation >, + Expressions::Ceil >( a ); +} + +//// +// Asin +template< typename L1, + typename L2, + template< typename, typename > class LOperation > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Asin > +asin( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) +{ + return Expressions::UnaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Asin >( a ); +} + +template< typename L1, + template< typename > class LOperation > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< + Expressions::UnaryExpressionTemplate< L1, LOperation >, + Expressions::Asin > +asin( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a ) +{ + return Expressions::UnaryExpressionTemplate< + Expressions::UnaryExpressionTemplate< L1, LOperation >, + Expressions::Asin >( a ); +} + +//// +// Acos +template< typename L1, + typename L2, + template< typename, typename > class LOperation > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Acos > +cos( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) +{ + return Expressions::UnaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Acos >( a ); +} + +template< typename L1, + template< typename > class LOperation > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< + Expressions::UnaryExpressionTemplate< L1, LOperation >, + Expressions::Acos > +acos( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a ) +{ + return Expressions::UnaryExpressionTemplate< + Expressions::UnaryExpressionTemplate< L1, LOperation >, + Expressions::Cos >( a ); +} + +//// +// Atan +template< typename L1, + typename L2, + template< typename, typename > class LOperation > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Atan > +tan( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) +{ + return Expressions::UnaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Atan >( a ); +} + +template< typename L1, + template< typename > class LOperation > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< + Expressions::UnaryExpressionTemplate< L1, LOperation >, + Expressions::Atan > +atan( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a ) +{ + return Expressions::UnaryExpressionTemplate< + Expressions::UnaryExpressionTemplate< L1, LOperation >, + Expressions::Atan >( a ); +} + +//// +// Sinh +template< typename L1, + typename L2, + template< typename, typename > class LOperation > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Sinh > +sinh( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) +{ + return Expressions::UnaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Sinh >( a ); +} + +template< typename L1, + template< typename > class LOperation > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< + Expressions::UnaryExpressionTemplate< L1, LOperation >, + Expressions::Sinh > +sinh( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a ) +{ + return Expressions::UnaryExpressionTemplate< + Expressions::UnaryExpressionTemplate< L1, LOperation >, + Expressions::Sinh >( a ); +} + +//// +// Cosh +template< typename L1, + typename L2, + template< typename, typename > class LOperation > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Cosh > +cosh( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) +{ + return Expressions::UnaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::cosh >( a ); +} + +template< typename L1, + template< typename > class LOperation > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< + Expressions::UnaryExpressionTemplate< L1, LOperation >, + Expressions::Cosh > +cosh( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a ) +{ + return Expressions::UnaryExpressionTemplate< + Expressions::UnaryExpressionTemplate< L1, LOperation >, + Expressions::Cosh >( a ); +} + + +//// +// tanh +// log +// log10 +// log2 +// exp + + + + + + + + + + + + + +//// +// Output stream +template< typename T1, + typename T2, + template< typename, typename > class Operation > +std::ostream& operator << ( std::ostream& str, const BinaryExpressionTemplate< T1, T2, Operation >& expression ) +{ + str << "[ "; + for( int i = 0; i < expression.getSize() - 1; i++ ) + str << expression[ i ] << " "; + str << expression[ expression.getSize() - 1 ] << " ]"; + return str; +} + +template< typename T, + template< typename > class Operation > +std::ostream& operator << ( std::ostream& str, const UnaryExpressionTemplate< T, Operation >& expression ) +{ + str << "[ "; + for( int i = 0; i < expression.getSize() - 1; i++ ) + str << expression[ i ] << " "; + str << expression[ expression.getSize() - 1 ] << " ]"; + return str; +} + } //namespace Expressions + } //namespace Containers +} // namespace TNL diff --git a/src/TNL/Containers/Expressions/UnaryExpressionTemplate.h b/src/TNL/Containers/Expressions/UnaryExpressionTemplate.h deleted file mode 100644 index cf606bb10..000000000 --- a/src/TNL/Containers/Expressions/UnaryExpressionTemplate.h +++ /dev/null @@ -1,93 +0,0 @@ -/*************************************************************************** - UnaryExpressionTemplate.h - description - ------------------- - begin : Apr 24, 2019 - copyright : (C) 2019 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/* See Copyright Notice in tnl/Copyright */ - -#pragma once - -#include - -namespace TNL { - namespace Containers { - namespace Expressions { - - -template< typename T1, - template< typename, typename > class Operation, - ExpressionVariableType T1Type = ExpressionVariableTypeGetter< T1 >::value > -struct UnaryExpressionTemplate -{ -}; - -template< typename T1, - template< typename, typename > class Operation > -struct UnaryExpressionTemplate< T1, Operation, VectorVariable > -{ - using RealType = typename T1::RealType; - using IsExpressionTemplate = bool; - - __cuda_callable__ - UnaryExpressionTemplate( const T1& a ): operand( a ){} - - __cuda_callable__ - static UnaryExpressionTemplate evaluate( const T1& a ) - { - return UnaryExpressionTemplate( a ); - } - - __cuda_callable__ - RealType operator[]( const int i ) const - { - return Operation< typename T1::RealType, typename T2::RealType >::evaluate( operand[ i ] ); - } - - __cuda_callable__ - int getSize() const - { - return operand.getSize(); - } - - protected: - const T1 &operand; -}; - -//// -// Abs -template< typename L1, - typename L2, - template< typename, typename > class LOperation > -__cuda_callable__ -const Expressions::UnaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Abs > -abs( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) -{ - return Expressions::UnaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< L1, L2, ROperation >, - Expressions::Abs >( a ); -} - -template< typename L1, - template< typename > class LOperation > -__cuda_callable__ -const Expressions::UnaryExpressionTemplate< - Expressions::UnaryExpressionTemplate< L1, LOperation >, - Expressions::Abs > -abs( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a ) -{ - return Expressions::UnaryExpressionTemplate< - Expressions::UnaryExpressionTemplate< L1, L2, ROperation >, - Expressions::Abs >( a ); -} - - - - - } //namespace Expressions - } //namespace Containers -} // namespace TNL \ No newline at end of file diff --git a/src/TNL/Containers/StaticVector.h b/src/TNL/Containers/StaticVector.h index 850f1cf0c..1332dc2a8 100644 --- a/src/TNL/Containers/StaticVector.h +++ b/src/TNL/Containers/StaticVector.h @@ -12,7 +12,7 @@ #include #include -#include +#include namespace TNL { namespace Containers { @@ -84,6 +84,11 @@ class StaticVector : public StaticArray< Size, Real > __cuda_callable__ StaticVector( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& op ); + template< typename T, + template< typename > class Operation > + __cuda_callable__ + StaticVector( const Expressions::UnaryExpressionTemplate< T, Operation >& op ); + /** * \brief Sets up a new (vector) parameter which means it can have more elements. * @@ -264,6 +269,11 @@ class StaticVector< 1, Real > : public StaticArray< 1, Real > __cuda_callable__ StaticVector( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& op ); + template< typename T, + template< typename > class Operation > + __cuda_callable__ + StaticVector( const Expressions::UnaryExpressionTemplate< T, Operation >& op ); + bool setup( const Config::ParameterContainer& parameters, const String& prefix = "" ); @@ -389,6 +399,11 @@ class StaticVector< 2, Real > : public StaticArray< 2, Real > __cuda_callable__ StaticVector( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& op ); + template< typename T, + template< typename > class Operation > + __cuda_callable__ + StaticVector( const Expressions::UnaryExpressionTemplate< T, Operation >& op ); + bool setup( const Config::ParameterContainer& parameters, const String& prefix = "" ); @@ -515,6 +530,11 @@ class StaticVector< 3, Real > : public StaticArray< 3, Real > __cuda_callable__ StaticVector( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& op ); + template< typename T, + template< typename > class Operation > + __cuda_callable__ + StaticVector( const Expressions::UnaryExpressionTemplate< T, Operation >& op ); + bool setup( const Config::ParameterContainer& parameters, const String& prefix = "" ); diff --git a/src/TNL/Containers/StaticVector1D_impl.h b/src/TNL/Containers/StaticVector1D_impl.h index fd955b16c..436605285 100644 --- a/src/TNL/Containers/StaticVector1D_impl.h +++ b/src/TNL/Containers/StaticVector1D_impl.h @@ -61,6 +61,15 @@ StaticVector< 1, Real >::StaticVector( const Expressions::BinaryExpressionTempla Algorithms::VectorAssignment< StaticVector< 1, Real >, Expressions::BinaryExpressionTemplate< T1, T2, Operation > >::assign( *this, op ); }; +template< typename Real > + template< typename T, + template< typename > class Operation > +__cuda_callable__ +StaticVector< 1, Real >::StaticVector( const Expressions::UnaryExpressionTemplate< T, Operation >& op ) +{ + Algorithms::VectorAssignment< StaticVector< 1, Real >, Expressions::UnaryExpressionTemplate< T, Operation > >::assign( *this, op ); +}; + template< typename Real > bool StaticVector< 1, Real >::setup( const Config::ParameterContainer& parameters, diff --git a/src/TNL/Containers/StaticVector2D_impl.h b/src/TNL/Containers/StaticVector2D_impl.h index 57ad0d93b..75e74d675 100644 --- a/src/TNL/Containers/StaticVector2D_impl.h +++ b/src/TNL/Containers/StaticVector2D_impl.h @@ -67,6 +67,15 @@ StaticVector< 2, Real >::StaticVector( const Expressions::BinaryExpressionTempla Algorithms::VectorAssignment< StaticVector< 2, Real >, Expressions::BinaryExpressionTemplate< T1, T2, Operation > >::assign( *this, op ); }; +template< typename Real > + template< typename T, + template< typename > class Operation > +__cuda_callable__ +StaticVector< 2, Real >::StaticVector( const Expressions::UnaryExpressionTemplate< T, Operation >& op ) +{ + Algorithms::VectorAssignment< StaticVector< 2, Real >, Expressions::UnaryExpressionTemplate< T, Operation > >::assign( *this, op ); +}; + template< typename Real > bool StaticVector< 2, Real >::setup( const Config::ParameterContainer& parameters, diff --git a/src/TNL/Containers/StaticVector3D_impl.h b/src/TNL/Containers/StaticVector3D_impl.h index 7e84edf2c..1626dce7f 100644 --- a/src/TNL/Containers/StaticVector3D_impl.h +++ b/src/TNL/Containers/StaticVector3D_impl.h @@ -67,6 +67,15 @@ StaticVector< 3, Real >::StaticVector( const Expressions::BinaryExpressionTempla Algorithms::VectorAssignment< StaticVector< 3, Real >, Expressions::BinaryExpressionTemplate< T1, T2, Operation > >::assign( *this, op ); }; +template< typename Real > + template< typename T, + template< typename > class Operation > +__cuda_callable__ +StaticVector< 3, Real >::StaticVector( const Expressions::UnaryExpressionTemplate< T, Operation >& op ) +{ + Algorithms::VectorAssignment< StaticVector< 3, Real >, Expressions::UnaryExpressionTemplate< T, Operation > >::assign( *this, op ); +}; + template< typename Real > bool diff --git a/src/TNL/Containers/StaticVectorExpressions.h b/src/TNL/Containers/StaticVectorExpressions.h index b8d89b458..7680ab9c7 100644 --- a/src/TNL/Containers/StaticVectorExpressions.h +++ b/src/TNL/Containers/StaticVectorExpressions.h @@ -10,8 +10,7 @@ #pragma once -#include -#include +#include #include #include @@ -314,6 +313,52 @@ bool operator>=( const StaticVector< Size, Real1 >& a, const StaticVector< Size, return Expressions::StaticComparisonGE( a, b ); } +//// +// Minus +template< int Size, typename Real > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Minus > +operator -( const StaticVector< Size, Real >& a ) +{ + return Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Minus >( a ); +} + +//// +// Abs +template< int Size, typename Real > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Abs > +abs( const StaticVector< Size, Real >& a ) +{ + return Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Abs >( a ); +} + + +//// +// sin +// cos +// tan +// sqrt +// cbrt +// pow +// floor +// ceil +// acos +// asin +// atan +// cosh +// tanh +// log +// log10 +// log2 +// exp +// sign + + + + + + //// // TODO: Replace this with multiplication when its safe template< int Size, typename Real, typename ET > diff --git a/src/TNL/Containers/StaticVector_impl.h b/src/TNL/Containers/StaticVector_impl.h index f3c19d92f..6437e9dc6 100644 --- a/src/TNL/Containers/StaticVector_impl.h +++ b/src/TNL/Containers/StaticVector_impl.h @@ -60,6 +60,16 @@ StaticVector< Size, Real >::StaticVector( const Expressions::BinaryExpressionTem Algorithms::VectorAssignment< StaticVector< Size, Real >, Expressions::BinaryExpressionTemplate< T1, T2, Operation > >::assign( *this, op ); }; +template< int Size, + typename Real > + template< typename T, + template< typename > class Operation > +__cuda_callable__ +StaticVector< Size, Real >::StaticVector( const Expressions::UnaryExpressionTemplate< T, Operation >& op ) +{ + Algorithms::VectorAssignment< StaticVector< Size, Real >, Expressions::UnaryExpressionTemplate< T, Operation > >::assign( *this, op ); +}; + template< int Size, typename Real > bool StaticVector< Size, Real >::setup( const Config::ParameterContainer& parameters, diff --git a/src/TNL/Math.h b/src/TNL/Math.h index dcfa91a34..4c5b0d593 100644 --- a/src/TNL/Math.h +++ b/src/TNL/Math.h @@ -163,6 +163,230 @@ T sqrt( const T& value ) #endif } +/** + * \brief This function returns cubic root of the given \e value. + */ +template< typename T > +__cuda_callable__ inline +T cbrt( const T& value ) +{ +#if defined(__CUDA_ARCH__) || defined(__MIC__) + return ::cbrt( value ); +#else + return std::cbrt( value ); +#endif +} + +/** + * \brief This function returns sine of the given \e value. + */ +template< typename T > +__cuda_callable__ inline +T sin( const T& value ) +{ +#if defined(__CUDA_ARCH__) || defined(__MIC__) + return ::sin( value ); +#else + return std::sin( value ); +#endif +} + +/** + * \brief This function returns cosine of the given \e value. + */ +template< typename T > +__cuda_callable__ inline +T cos( const T& value ) +{ +#if defined(__CUDA_ARCH__) || defined(__MIC__) + return ::cos( value ); +#else + return std::cos( value ); +#endif +} + +/** + * \brief This function returns tangent of the given \e value. + */ +template< typename T > +__cuda_callable__ inline +T tan( const T& value ) +{ +#if defined(__CUDA_ARCH__) || defined(__MIC__) + return ::tan( value ); +#else + return std::tan( value ); +#endif +} + +/** + * \brief This function returns largest integer value not greater than the given \e value. + */ +template< typename T > +__cuda_callable__ inline +T floor( const T& value ) +{ +#if defined(__CUDA_ARCH__) || defined(__MIC__) + return ::floor( value ); +#else + return std::floor( value ); +#endif +} + +/** + * \brief This function returns the smallest integer value not less than the given \e value. + */ +template< typename T > +__cuda_callable__ inline +T ceil( const T& value ) +{ +#if defined(__CUDA_ARCH__) || defined(__MIC__) + return ::ceil( value ); +#else + return std::ceil( value ); +#endif +} + +/** + * \brief This function returns the arc cosine of the given \e value. + */ +template< typename T > +__cuda_callable__ inline +T acos( const T& value ) +{ +#if defined(__CUDA_ARCH__) || defined(__MIC__) + return ::acos( value ); +#else + return std::acos( value ); +#endif +} + +/** + * \brief This function returns the arc sine of the given \e value. + */ +template< typename T > +__cuda_callable__ inline +T asin( const T& value ) +{ +#if defined(__CUDA_ARCH__) || defined(__MIC__) + return ::asin( value ); +#else + return std::asin( value ); +#endif +} + +/** + * \brief This function returns the arc tangent of the given \e value. + */ +template< typename T > +__cuda_callable__ inline +T atan( const T& value ) +{ +#if defined(__CUDA_ARCH__) || defined(__MIC__) + return ::atan( value ); +#else + return std::atan( value ); +#endif +} + +/** + * \brief This function returns the hyperbolic sine of the given \e value. + */ +template< typename T > +__cuda_callable__ inline +T sinh( const T& value ) +{ +#if defined(__CUDA_ARCH__) || defined(__MIC__) + return ::sinh( value ); +#else + return std::sinh( value ); +#endif +} + +/** + * \brief This function returns the hyperbolic cosine of the given \e value. + */ +template< typename T > +__cuda_callable__ inline +T cosh( const T& value ) +{ +#if defined(__CUDA_ARCH__) || defined(__MIC__) + return ::cosh( value ); +#else + return std::cosh( value ); +#endif +} + +/** + * \brief This function returns the hyperbolic tangent of the given \e value. + */ +template< typename T > +__cuda_callable__ inline +T tanh( const T& value ) +{ +#if defined(__CUDA_ARCH__) || defined(__MIC__) + return ::tanh( value ); +#else + return std::tanh( value ); +#endif +} + +/** + * \brief This function returns the natural logarithm of the given \e value. + */ +template< typename T > +__cuda_callable__ inline +T log( const T& value ) +{ +#if defined(__CUDA_ARCH__) || defined(__MIC__) + return ::log( value ); +#else + return std::log( value ); +#endif +} + +/** + * \brief This function returns the common logarithm of the given \e value. + */ +template< typename T > +__cuda_callable__ inline +T log10( const T& value ) +{ +#if defined(__CUDA_ARCH__) || defined(__MIC__) + return ::log10( value ); +#else + return std::log10( value ); +#endif +} + +/** + * \brief This function returns the binary logarithm of the given \e value. + */ +template< typename T > +__cuda_callable__ inline +T log2( const T& value ) +{ +#if defined(__CUDA_ARCH__) || defined(__MIC__) + return ::log2( value ); +#else + return std::log2( value ); +#endif +} + +/** + * \brief This function returns the base-e exponential of the given \e value. + */ +template< typename T > +__cuda_callable__ inline +T exp( const T& value ) +{ +#if defined(__CUDA_ARCH__) || defined(__MIC__) + return ::exp( value ); +#else + return std::exp( value ); +#endif +} + /** * \brief This function swaps values of two parameters. * diff --git a/src/UnitTests/Containers/StaticVectorTest.cpp b/src/UnitTests/Containers/StaticVectorTest.cpp index baca520fe..e92697fff 100644 --- a/src/UnitTests/Containers/StaticVectorTest.cpp +++ b/src/UnitTests/Containers/StaticVectorTest.cpp @@ -188,11 +188,30 @@ TYPED_TEST( StaticVectorTest, abs ) for( int i = 0; i < size; i++ ) u[ i ] = i; - // TODO: implement unary minus operator - VectorType v = - 1 * u; + VectorType v = -u; EXPECT_EQ( abs( v ), u ); } +//// +// sin +// cos +// tan +// sqrt +// cbrt +// pow +// floor +// ceil +// acos +// asin +// atan +// cosh +// tanh +// log +// log10 +// log2 +// exp +// sign + TYPED_TEST( StaticVectorTest, lpNorm ) { using VectorType = typename TestFixture::VectorType; -- GitLab From 38ce7667c75e952698d068dd7a7e8ee424b331aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Thu, 25 Apr 2019 22:52:57 +0200 Subject: [PATCH 14/93] Unary expression templates seem to work. --- .../Algorithms/ArrayOperationsCuda.hpp | 8 +- .../Algorithms/CudaMultireductionKernel.h | 4 +- .../Algorithms/CudaReductionKernel.h | 2 +- .../Algorithms/VectorOperationsCuda_impl.h | 6 +- .../ExpressionTemplatesOperations.h | 14 +- .../Expressions/StaticExpressionTemplates.h | 210 +++++++++++-- src/TNL/Containers/StaticVectorExpressions.h | 191 ++++++++++-- .../ExpressionTemplates/CMakeLists.txt | 24 +- src/UnitTests/CMakeLists.txt | 6 +- src/UnitTests/Containers/StaticVectorTest.cpp | 291 ++++++++++++++++-- .../ExpressionTemplatesDynamicTest.cpp | 4 +- 11 files changed, 676 insertions(+), 84 deletions(-) diff --git a/src/TNL/Containers/Algorithms/ArrayOperationsCuda.hpp b/src/TNL/Containers/Algorithms/ArrayOperationsCuda.hpp index 739e5149b..8e7c801f3 100644 --- a/src/TNL/Containers/Algorithms/ArrayOperationsCuda.hpp +++ b/src/TNL/Containers/Algorithms/ArrayOperationsCuda.hpp @@ -109,8 +109,8 @@ setMemory( Element* data, #ifdef HAVE_CUDA dim3 blockSize( 0 ), gridSize( 0 ); blockSize. x = 256; - Index blocksNumber = ceil( ( double ) size / ( double ) blockSize. x ); - gridSize. x = min( blocksNumber, Devices::Cuda::getMaxGridSize() ); + Index blocksNumber = TNL::ceil( ( double ) size / ( double ) blockSize. x ); + gridSize. x = TNL::min( blocksNumber, Devices::Cuda::getMaxGridSize() ); setArrayValueCudaKernel<<< gridSize, blockSize >>>( data, size, value ); TNL_CHECK_CUDA_DEVICE; #else @@ -161,7 +161,7 @@ copyMemory( DestinationElement* destination, { dim3 blockSize( 0 ), gridSize( 0 ); blockSize. x = 256; - Index blocksNumber = ceil( ( double ) size / ( double ) blockSize. x ); + Index blocksNumber = TNL::ceil( ( double ) size / ( double ) blockSize. x ); gridSize. x = min( blocksNumber, Devices::Cuda::getMaxGridSize() ); copyMemoryCudaToCudaKernel<<< gridSize, blockSize >>>( destination, source, size ); TNL_CHECK_CUDA_DEVICE; @@ -282,7 +282,7 @@ copyMemory( DestinationElement* destination, { if( cudaMemcpy( (void*) buffer.get(), (void*) &source[ i ], - min( size - i, Devices::Cuda::getGPUTransferBufferSize() ) * sizeof( SourceElement ), + TNL::min( size - i, Devices::Cuda::getGPUTransferBufferSize() ) * sizeof( SourceElement ), cudaMemcpyDeviceToHost ) != cudaSuccess ) std::cerr << "Transfer of data from CUDA device to host failed." << std::endl; TNL_CHECK_CUDA_DEVICE; diff --git a/src/TNL/Containers/Algorithms/CudaMultireductionKernel.h b/src/TNL/Containers/Algorithms/CudaMultireductionKernel.h index 2074d5941..e8fc7f8bb 100644 --- a/src/TNL/Containers/Algorithms/CudaMultireductionKernel.h +++ b/src/TNL/Containers/Algorithms/CudaMultireductionKernel.h @@ -199,7 +199,7 @@ CudaMultireductionKernelLauncher( Operation& operation, dim3 blockSize, gridSize; // version A: max 16 rows of threads - blockSize.y = min( n, 16 ); + blockSize.y = TNL::min( n, 16 ); // version B: up to 16 rows of threads, then "minimize" number of inactive rows // if( n <= 16 ) @@ -221,7 +221,7 @@ CudaMultireductionKernelLauncher( Operation& operation, while( blockSize.x * blockSize.y > Multireduction_maxThreadsPerBlock ) blockSize.x /= 2; - gridSize.x = min( Devices::Cuda::getNumberOfBlocks( size, blockSize.x ), desGridSizeX ); + gridSize.x = TNL::min( Devices::Cuda::getNumberOfBlocks( size, blockSize.x ), desGridSizeX ); gridSize.y = Devices::Cuda::getNumberOfBlocks( n, blockSize.y ); if( gridSize.y > (unsigned) Devices::Cuda::getMaxGridSize() ) { diff --git a/src/TNL/Containers/Algorithms/CudaReductionKernel.h b/src/TNL/Containers/Algorithms/CudaReductionKernel.h index 59603d0f3..3ef43a055 100644 --- a/src/TNL/Containers/Algorithms/CudaReductionKernel.h +++ b/src/TNL/Containers/Algorithms/CudaReductionKernel.h @@ -264,7 +264,7 @@ struct CudaReductionKernelLauncher { dim3 blockSize, gridSize; blockSize.x = Reduction_maxThreadsPerBlock; - gridSize.x = min( Devices::Cuda::getNumberOfBlocks( size, blockSize.x ), desGridSize ); + gridSize.x = TNL::min( Devices::Cuda::getNumberOfBlocks( size, blockSize.x ), desGridSize ); //// // when there is only one warp per blockSize.x, we need to allocate two warps diff --git a/src/TNL/Containers/Algorithms/VectorOperationsCuda_impl.h b/src/TNL/Containers/Algorithms/VectorOperationsCuda_impl.h index d79b5f069..ca7ce908a 100644 --- a/src/TNL/Containers/Algorithms/VectorOperationsCuda_impl.h +++ b/src/TNL/Containers/Algorithms/VectorOperationsCuda_impl.h @@ -346,7 +346,7 @@ vectorScalarMultiplication( Vector& v, dim3 blockSize( 0 ), gridSize( 0 ); const Index& size = v.getSize(); blockSize.x = 256; - Index blocksNumber = ceil( ( double ) size / ( double ) blockSize.x ); + Index blocksNumber = TNL::ceil( ( double ) size / ( double ) blockSize.x ); gridSize.x = min( blocksNumber, Devices::Cuda::getMaxGridSize() ); vectorScalarMultiplicationCudaKernel<<< gridSize, blockSize >>>( v.getData(), size, @@ -417,7 +417,7 @@ addVector( Vector1& y, const Index& size = x.getSize(); dim3 cudaBlockSize( 256 ); dim3 cudaBlocks; - cudaBlocks.x = min( Devices::Cuda::getMaxGridSize(), Devices::Cuda::getNumberOfBlocks( size, cudaBlockSize.x ) ); + cudaBlocks.x = TNL::min( Devices::Cuda::getMaxGridSize(), Devices::Cuda::getNumberOfBlocks( size, cudaBlockSize.x ) ); vectorAddVectorCudaKernel<<< cudaBlocks, cudaBlockSize >>>( y.getData(), x.getData(), @@ -483,7 +483,7 @@ addVectors( Vector1& v, const Index& size = v.getSize(); dim3 cudaBlockSize( 256 ); dim3 cudaBlocks; - cudaBlocks.x = min( Devices::Cuda::getMaxGridSize(), Devices::Cuda::getNumberOfBlocks( size, cudaBlockSize.x ) ); + cudaBlocks.x = TNL::min( Devices::Cuda::getMaxGridSize(), Devices::Cuda::getNumberOfBlocks( size, cudaBlockSize.x ) ); vectorAddVectorsCudaKernel<<< cudaBlocks, cudaBlockSize >>>( v.getData(), v1.getData(), diff --git a/src/TNL/Containers/Expressions/ExpressionTemplatesOperations.h b/src/TNL/Containers/Expressions/ExpressionTemplatesOperations.h index 331197f23..75fb4dd36 100644 --- a/src/TNL/Containers/Expressions/ExpressionTemplatesOperations.h +++ b/src/TNL/Containers/Expressions/ExpressionTemplatesOperations.h @@ -146,9 +146,10 @@ struct Cbrt } }; -template< typename T1, typename Real = T1 > +template< typename T1 > struct Pow { + template< typename Real > __cuda_callable__ static auto evaluate( const T1& a, const Real& exp ) -> decltype( TNL::pow( a, exp ) ) { @@ -206,6 +207,16 @@ struct Atan } }; +template< typename T1 > +struct Sinh +{ + __cuda_callable__ + static auto evaluate( const T1& a ) -> decltype( TNL::sinh( a ) ) + { + return TNL::sinh( a ); + } +}; + template< typename T1 > struct Cosh { @@ -276,7 +287,6 @@ struct Sign } }; - } //namespace Expressions } // namespace Containers } // namespace TNL diff --git a/src/TNL/Containers/Expressions/StaticExpressionTemplates.h b/src/TNL/Containers/Expressions/StaticExpressionTemplates.h index de5585299..9953233c9 100644 --- a/src/TNL/Containers/Expressions/StaticExpressionTemplates.h +++ b/src/TNL/Containers/Expressions/StaticExpressionTemplates.h @@ -21,6 +21,7 @@ namespace TNL { template< typename T1, template< typename > class Operation, + typename Parameter = void, ExpressionVariableType T1Type = ExpressionVariableTypeGetter< T1 >::value > struct UnaryExpressionTemplate { @@ -140,12 +141,55 @@ struct BinaryExpressionTemplate< T1, T2, Operation, ArithmeticVariable, VectorVa const T2& op2; }; - //// // Unary expression template +// +// Parameter type serves mainly for pow( base, exp ). Here exp is parameter we need +// to pass to pow. +template< typename T1, + template< typename > class Operation, + typename Parameter > +struct UnaryExpressionTemplate< T1, Operation, Parameter, VectorVariable > +{ + using RealType = typename T1::RealType; + using IsExpressionTemplate = bool; + + __cuda_callable__ + UnaryExpressionTemplate( const T1& a, const Parameter& p ) + : operand( a ), parameter( p ) {} + + __cuda_callable__ + static UnaryExpressionTemplate evaluate( const T1& a ) + { + return UnaryExpressionTemplate( a ); + } + + __cuda_callable__ + RealType operator[]( const int i ) const + { + return Operation< typename T1::RealType >::evaluate( operand[ i ], parameter ); + } + + __cuda_callable__ + int getSize() const + { + return operand.getSize(); + } + + void set( const Parameter& p ) { parameter = p; } + + const Parameter& get() { return parameter; } + + protected: + const T1& operand; + Parameter parameter; +}; + +//// +// Unary expression template with no parameter template< typename T1, template< typename > class Operation > -struct UnaryExpressionTemplate< T1, Operation, VectorVariable > +struct UnaryExpressionTemplate< T1, Operation, void, VectorVariable > { using RealType = typename T1::RealType; using IsExpressionTemplate = bool; @@ -172,9 +216,10 @@ struct UnaryExpressionTemplate< T1, Operation, VectorVariable > } protected: - const T1 &operand; + const T1& operand; }; + //// // Binary expressions addition template< typename L1, @@ -1088,7 +1133,7 @@ tan( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) { return Expressions::UnaryExpressionTemplate< Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Din >( a ); + Expressions::Tan >( a ); } template< typename L1, @@ -1174,9 +1219,11 @@ const Expressions::UnaryExpressionTemplate< Expressions::Pow > pow( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, const Real& exp ) { - return Expressions::UnaryExpressionTemplate< + auto e = Expressions::UnaryExpressionTemplate< Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Pow >( a, exp ); + Expressions::Pow >( a ); + e.parameter.set( exp ); + return e; } template< typename L1, @@ -1188,9 +1235,11 @@ const Expressions::UnaryExpressionTemplate< Expressions::Pow > pow( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a, const Real& exp ) { - return Expressions::UnaryExpressionTemplate< + auto e = Expressions::UnaryExpressionTemplate< Expressions::UnaryExpressionTemplate< L1, LOperation >, - Expressions::Pow >( a, exp ); + Expressions::Pow >( a ); + e.parameter.set( exp ); + return e; } //// @@ -1380,7 +1429,7 @@ cosh( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) { return Expressions::UnaryExpressionTemplate< Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::cosh >( a ); + Expressions::Cosh >( a ); } template< typename L1, @@ -1396,25 +1445,150 @@ cosh( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a ) Expressions::Cosh >( a ); } - //// -// tanh -// log -// log10 -// log2 -// exp - - - +// Tanh +template< typename L1, + typename L2, + template< typename, typename > class LOperation > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Tanh > +cosh( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) +{ + return Expressions::UnaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Tanh >( a ); +} +template< typename L1, + template< typename > class LOperation > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< + Expressions::UnaryExpressionTemplate< L1, LOperation >, + Expressions::Tanh > +tanh( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a ) +{ + return Expressions::UnaryExpressionTemplate< + Expressions::UnaryExpressionTemplate< L1, LOperation >, + Expressions::Tanh >( a ); +} +//// +// Log +template< typename L1, + typename L2, + template< typename, typename > class LOperation > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Log > +log( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) +{ + return Expressions::UnaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Log >( a ); +} +template< typename L1, + template< typename > class LOperation > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< + Expressions::UnaryExpressionTemplate< L1, LOperation >, + Expressions::Log > +log( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a ) +{ + return Expressions::UnaryExpressionTemplate< + Expressions::UnaryExpressionTemplate< L1, LOperation >, + Expressions::Log >( a ); +} +//// +// Log10 +template< typename L1, + typename L2, + template< typename, typename > class LOperation > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Log10 > +log10( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) +{ + return Expressions::UnaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Log10 >( a ); +} +template< typename L1, + template< typename > class LOperation > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< + Expressions::UnaryExpressionTemplate< L1, LOperation >, + Expressions::Log10 > +log10( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a ) +{ + return Expressions::UnaryExpressionTemplate< + Expressions::UnaryExpressionTemplate< L1, LOperation >, + Expressions::Log10 >( a ); +} +//// +// Log2 +template< typename L1, + typename L2, + template< typename, typename > class LOperation > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Log2 > +log2( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) +{ + return Expressions::UnaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Log2 >( a ); +} +template< typename L1, + template< typename > class LOperation > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< + Expressions::UnaryExpressionTemplate< L1, LOperation >, + Expressions::Log2 > +log2( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a ) +{ + return Expressions::UnaryExpressionTemplate< + Expressions::UnaryExpressionTemplate< L1, LOperation >, + Expressions::Log2 >( a ); +} +//// +// Exp +template< typename L1, + typename L2, + template< typename, typename > class LOperation > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Exp > +exp( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) +{ + return Expressions::UnaryExpressionTemplate< + Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Exp >( a ); +} +template< typename L1, + template< typename > class LOperation > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< + Expressions::UnaryExpressionTemplate< L1, LOperation >, + Expressions::Exp > +exp( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a ) +{ + return Expressions::UnaryExpressionTemplate< + Expressions::UnaryExpressionTemplate< L1, LOperation >, + Expressions::Exp >( a ); +} //// // Output stream diff --git a/src/TNL/Containers/StaticVectorExpressions.h b/src/TNL/Containers/StaticVectorExpressions.h index 7680ab9c7..ba290f670 100644 --- a/src/TNL/Containers/StaticVectorExpressions.h +++ b/src/TNL/Containers/StaticVectorExpressions.h @@ -333,30 +333,185 @@ abs( const StaticVector< Size, Real >& a ) return Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Abs >( a ); } +//// +// Sine +template< int Size, typename Real > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Sin > +sin( const StaticVector< Size, Real >& a ) +{ + return Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Sin >( a ); +} + +//// +// Cosine +template< int Size, typename Real > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Cos > +cos( const StaticVector< Size, Real >& a ) +{ + return Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Cos >( a ); +} + +//// +// Tangent +template< int Size, typename Real > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Tan > +tan( const StaticVector< Size, Real >& a ) +{ + return Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Tan >( a ); +} + +//// +// Sqrt +template< int Size, typename Real > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Sqrt > +sqrt( const StaticVector< Size, Real >& a ) +{ + return Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Sqrt >( a ); +} + +//// +// Cbrt +template< int Size, typename Real > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Cbrt > +cbrt( const StaticVector< Size, Real >& a ) +{ + return Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Cbrt >( a ); +} + +//// +// Power +template< int Size, typename Real, typename ExpType > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Pow, ExpType > +pow( const StaticVector< Size, Real >& a, const ExpType& exp ) +{ + return Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Pow, ExpType >( a, exp ); +} + +//// +// Floor +template< int Size, typename Real > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Floor > +floor( const StaticVector< Size, Real >& a ) +{ + return Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Floor >( a ); +} + +//// +// Ceil +template< int Size, typename Real > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Ceil > +ceil( const StaticVector< Size, Real >& a ) +{ + return Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Ceil >( a ); +} + +//// +// Acos +template< int Size, typename Real > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Acos > +acos( const StaticVector< Size, Real >& a ) +{ + return Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Acos >( a ); +} + +//// +// Asin +template< int Size, typename Real > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Asin > +asin( const StaticVector< Size, Real >& a ) +{ + return Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Asin >( a ); +} + +//// +// Atan +template< int Size, typename Real > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Atan > +atan( const StaticVector< Size, Real >& a ) +{ + return Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Atan >( a ); +} + +//// +// Cosh +template< int Size, typename Real > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Cosh > +cosh( const StaticVector< Size, Real >& a ) +{ + return Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Cosh >( a ); +} + +//// +// Tanh +template< int Size, typename Real > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Tanh > +tanh( const StaticVector< Size, Real >& a ) +{ + return Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Tanh >( a ); +} //// -// sin -// cos -// tan -// sqrt -// cbrt -// pow -// floor -// ceil -// acos -// asin -// atan -// cosh -// tanh -// log -// log10 -// log2 -// exp -// sign +// Log +template< int Size, typename Real > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Log > +log( const StaticVector< Size, Real >& a ) +{ + return Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Log >( a ); +} +//// +// Log10 +template< int Size, typename Real > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Log10 > +log10( const StaticVector< Size, Real >& a ) +{ + return Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Log10 >( a ); +} +//// +// Log2 +template< int Size, typename Real > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Log2 > +log2( const StaticVector< Size, Real >& a ) +{ + return Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Log2 >( a ); +} +//// +// Exp +template< int Size, typename Real > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Exp > +exp( const StaticVector< Size, Real >& a ) +{ + return Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Exp >( a ); +} +//// +// Sign +template< int Size, typename Real > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Sign > +sign( const StaticVector< Size, Real >& a ) +{ + return Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Sign >( a ); +} //// diff --git a/src/TNL/Experimental/ExpressionTemplates/CMakeLists.txt b/src/TNL/Experimental/ExpressionTemplates/CMakeLists.txt index e146717c6..22cbd28fc 100644 --- a/src/TNL/Experimental/ExpressionTemplates/CMakeLists.txt +++ b/src/TNL/Experimental/ExpressionTemplates/CMakeLists.txt @@ -2,15 +2,15 @@ set( headers StaticVectorExpressions.h VectorExpressions.h ) -IF( BUILD_CUDA ) - CUDA_ADD_EXECUTABLE( tnl-expression-templates expression-templates.cu ) -ELSE( BUILD_CUDA ) - ADD_EXECUTABLE( tnl-expression-templates expression-templates.cpp ) - ADD_EXECUTABLE( tnl-expression-templates-static expression-templates-static.cpp ) -ENDIF( BUILD_CUDA ) - -INSTALL( TARGETS tnl-expression-templates - RUNTIME DESTINATION bin - PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE ) - -INSTALL( FILES ${headers} DESTINATION ${TNL_TARGET_INCLUDE_DIRECTORY}/Experimental/ExpressionTemplates ) +#IF( BUILD_CUDA ) +# CUDA_ADD_EXECUTABLE( tnl-expression-templates expression-templates.cu ) +#ELSE( BUILD_CUDA ) +# ADD_EXECUTABLE( tnl-expression-templates expression-templates.cpp ) +# ADD_EXECUTABLE( tnl-expression-templates-static expression-templates-static.cpp ) +#ENDIF( BUILD_CUDA ) +# +#INSTALL( TARGETS tnl-expression-templates +# RUNTIME DESTINATION bin +# PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE ) +# +#INSTALL( FILES ${headers} DESTINATION ${TNL_TARGET_INCLUDE_DIRECTORY}/Experimental/ExpressionTemplates ) diff --git a/src/UnitTests/CMakeLists.txt b/src/UnitTests/CMakeLists.txt index 7c2d5582b..b268d8b7b 100644 --- a/src/UnitTests/CMakeLists.txt +++ b/src/UnitTests/CMakeLists.txt @@ -17,9 +17,9 @@ ADD_EXECUTABLE( ExpressionTemplatesStaticTest ExpressionTemplatesStaticTest.cpp TARGET_COMPILE_OPTIONS( ExpressionTemplatesStaticTest PRIVATE ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( ExpressionTemplatesStaticTest ${GTEST_BOTH_LIBRARIES} ) -ADD_EXECUTABLE( ExpressionTemplatesDynamicTest ExpressionTemplatesDynamicTest.cpp ) -TARGET_COMPILE_OPTIONS( ExpressionTemplatesDynamicTest PRIVATE ${CXX_TESTS_FLAGS} ) -TARGET_LINK_LIBRARIES( ExpressionTemplatesDynamicTest ${GTEST_BOTH_LIBRARIES} ) +#ADD_EXECUTABLE( ExpressionTemplatesDynamicTest ExpressionTemplatesDynamicTest.cpp ) +#TARGET_COMPILE_OPTIONS( ExpressionTemplatesDynamicTest PRIVATE ${CXX_TESTS_FLAGS} ) +#TARGET_LINK_LIBRARIES( ExpressionTemplatesDynamicTest ${GTEST_BOTH_LIBRARIES} ) if( BUILD_CUDA ) CUDA_ADD_EXECUTABLE( FileTest FileTest.cu OPTIONS ${CXX_TESTS_FLAGS} ) diff --git a/src/UnitTests/Containers/StaticVectorTest.cpp b/src/UnitTests/Containers/StaticVectorTest.cpp index e92697fff..cf1d3b520 100644 --- a/src/UnitTests/Containers/StaticVectorTest.cpp +++ b/src/UnitTests/Containers/StaticVectorTest.cpp @@ -192,25 +192,278 @@ TYPED_TEST( StaticVectorTest, abs ) EXPECT_EQ( abs( v ), u ); } -//// -// sin -// cos -// tan -// sqrt -// cbrt -// pow -// floor -// ceil -// acos -// asin -// atan -// cosh -// tanh -// log -// log10 -// log2 -// exp -// sign +TYPED_TEST( StaticVectorTest, sin ) +{ + using VectorType = typename TestFixture::VectorType; + constexpr int size = VectorType::size; + + VectorType u, v; + for( int i = 0; i < size; i++ ) + { + u[ i ] = i - size / 2; + v[ i ] = sin( u[ i ] ); + } + + EXPECT_EQ( sin( u ), v ); +} + +TYPED_TEST( StaticVectorTest, cos ) +{ + using VectorType = typename TestFixture::VectorType; + constexpr int size = VectorType::size; + + VectorType u, v; + for( int i = 0; i < size; i++ ) + { + u[ i ] = i - size / 2; + v[ i ] = cos( u[ i ] ); + } + + EXPECT_EQ( cos( u ), v ); +} + +TYPED_TEST( StaticVectorTest, tan ) +{ + using VectorType = typename TestFixture::VectorType; + constexpr int size = VectorType::size; + + VectorType u, v; + for( int i = 0; i < size; i++ ) + { + u[ i ] = i - size / 2; + v[ i ] = tan( u[ i ] ); + } + + EXPECT_EQ( tan( u ), v ); +} + +TYPED_TEST( StaticVectorTest, sqrt ) +{ + using VectorType = typename TestFixture::VectorType; + constexpr int size = VectorType::size; + + VectorType u, v; + for( int i = 0; i < size; i++ ) + { + u[ i ] = i; + v[ i ] = sqrt( u[ i ] ); + } + + EXPECT_EQ( sqrt( u ), v ); +} + +TYPED_TEST( StaticVectorTest, cbrt ) +{ + using VectorType = typename TestFixture::VectorType; + constexpr int size = VectorType::size; + + VectorType u, v; + for( int i = 0; i < size; i++ ) + { + u[ i ] = i; + v[ i ] = cbrt( u[ i ] ); + } + + EXPECT_EQ( cbrt( u ), v ); +} + +TYPED_TEST( StaticVectorTest, pow ) +{ + using VectorType = typename TestFixture::VectorType; + using RealType = typename VectorType::RealType; + constexpr int size = VectorType::size; + + VectorType u, v, w; + for( int i = 0; i < size; i++ ) + { + u[ i ] = i - size / 2; + v[ i ] = pow( u[ i ], 2.0 ); + w[ i ] = pow( u[ i ], 3.0 ); + } + + EXPECT_EQ( pow( u, 2.0 ), v ); + EXPECT_EQ( pow( u, 3.0 ), w ); +} + +TYPED_TEST( StaticVectorTest, floor ) +{ + using VectorType = typename TestFixture::VectorType; + constexpr int size = VectorType::size; + + VectorType u, v; + for( int i = 0; i < size; i++ ) + { + u[ i ] = i - size / 2; + v[ i ] = floor( u[ i ] ); + } + + EXPECT_EQ( floor( u ), v ); +} + +TYPED_TEST( StaticVectorTest, ceil ) +{ + using VectorType = typename TestFixture::VectorType; + constexpr int size = VectorType::size; + + VectorType u, v; + for( int i = 0; i < size; i++ ) + { + u[ i ] = i - size / 2; + v[ i ] = ceil( u[ i ] ); + } + + EXPECT_EQ( ceil( u ), v ); +} + +TYPED_TEST( StaticVectorTest, acos ) +{ + using VectorType = typename TestFixture::VectorType; + constexpr int size = VectorType::size; + + VectorType u, v; + for( int i = 0; i < size; i++ ) + { + u[ i ] = ( double )( i - size / 2 ) / ( double ) size; + v[ i ] = acos( u[ i ] ); + } + + EXPECT_EQ( acos( u ), v ); +} + +TYPED_TEST( StaticVectorTest, asin ) +{ + using VectorType = typename TestFixture::VectorType; + constexpr int size = VectorType::size; + + VectorType u, v; + for( int i = 0; i < size; i++ ) + { + u[ i ] = ( double ) ( i - size / 2 ) / ( double ) size; + v[ i ] = asin( u[ i ] ); + } + + EXPECT_EQ( asin( u ), v ); +} + +TYPED_TEST( StaticVectorTest, atan ) +{ + using VectorType = typename TestFixture::VectorType; + constexpr int size = VectorType::size; + + VectorType u, v; + for( int i = 0; i < size; i++ ) + { + u[ i ] = i - size / 2; + v[ i ] = atan( u[ i ] ); + } + + EXPECT_EQ( atan( u ), v ); +} + +TYPED_TEST( StaticVectorTest, cosh ) +{ + using VectorType = typename TestFixture::VectorType; + constexpr int size = VectorType::size; + + VectorType u, v; + for( int i = 0; i < size; i++ ) + { + u[ i ] = i - size / 2; + v[ i ] = cosh( u[ i ] ); + } + + EXPECT_EQ( cosh( u ), v ); +} + +TYPED_TEST( StaticVectorTest, tanh ) +{ + using VectorType = typename TestFixture::VectorType; + constexpr int size = VectorType::size; + + VectorType u, v; + for( int i = 0; i < size; i++ ) + { + u[ i ] = i - size / 2; + v[ i ] = tanh( u[ i ] ); + } + + EXPECT_EQ( tanh( u ), v ); +} + +TYPED_TEST( StaticVectorTest, log ) +{ + using VectorType = typename TestFixture::VectorType; + constexpr int size = VectorType::size; + + VectorType u, v; + for( int i = 0; i < size; i++ ) + { + u[ i ] = i + 1; + v[ i ] = log( u[ i ] ); + } + + EXPECT_EQ( log( u ), v ); +} + +TYPED_TEST( StaticVectorTest, log10 ) +{ + using VectorType = typename TestFixture::VectorType; + constexpr int size = VectorType::size; + + VectorType u, v; + for( int i = 0; i < size; i++ ) + { + u[ i ] = i + 1; + v[ i ] = log10( u[ i ] ); + } + + EXPECT_EQ( log10( u ), v ); +} + +TYPED_TEST( StaticVectorTest, log2 ) +{ + using VectorType = typename TestFixture::VectorType; + constexpr int size = VectorType::size; + + VectorType u, v; + for( int i = 0; i < size; i++ ) + { + u[ i ] = i + 1; + v[ i ] = log2( u[ i ] ); + } + + EXPECT_EQ( log2( u ), v ); +} + +TYPED_TEST( StaticVectorTest, exp ) +{ + using VectorType = typename TestFixture::VectorType; + constexpr int size = VectorType::size; + + VectorType u, v; + for( int i = 0; i < size; i++ ) + { + u[ i ] = i - size / 2; + v[ i ] = exp( u[ i ] ); + } + + EXPECT_EQ( exp( u ), v ); +} + +TYPED_TEST( StaticVectorTest, sign ) +{ + using VectorType = typename TestFixture::VectorType; + constexpr int size = VectorType::size; + + VectorType u, v; + for( int i = 0; i < size; i++ ) + { + u[ i ] = i - size / 2; + v[ i ] = sign( u[ i ] ); + } + + EXPECT_EQ( sign( u ), v ); +} TYPED_TEST( StaticVectorTest, lpNorm ) { diff --git a/src/UnitTests/ExpressionTemplatesDynamicTest.cpp b/src/UnitTests/ExpressionTemplatesDynamicTest.cpp index 35147591a..5b47ffcac 100644 --- a/src/UnitTests/ExpressionTemplatesDynamicTest.cpp +++ b/src/UnitTests/ExpressionTemplatesDynamicTest.cpp @@ -74,7 +74,7 @@ TEST( ExpressionTemplatesDynamicTest, ExponentialFunction ) Vector< double, Devices::Host, int > dr1( 6 ); VectorView< double, Devices::Host, int > dv1( d1 ); VectorView< double, Devices::Host, int > dvr1( dr1 ); - dvr1.evaluate( exp(dv1) ); + dvr1.evaluate( TNL::exp(dv1) ); double temp; for( int i = 0; i < 6; i++){ temp = std::exp( dv1[ i ] ); @@ -88,7 +88,7 @@ TEST( ExpressionTemplatesDynamicTest, NaturalLogarithm ) Vector< double, Devices::Host, int > dr1( 6 ); VectorView< double, Devices::Host, int > dv1( d1 ); VectorView< double, Devices::Host, int > dvr1( dr1 ); - dvr1.evaluate( log(dv1) ); + dvr1.evaluate( TNL::log(dv1) ); double temp; for( int i = 0; i < 6; i++){ temp = std::log( dv1[ i ] ); -- GitLab From d059699671fbb0d612ff6ea82ffb9faf438004dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Fri, 26 Apr 2019 18:58:52 +0200 Subject: [PATCH 15/93] Fixed StaticVectorTest. --- .../Containers/Expressions/StaticExpressionTemplates.h | 9 +++++---- src/UnitTests/Containers/StaticVectorTest.cpp | 10 ++++++++-- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/src/TNL/Containers/Expressions/StaticExpressionTemplates.h b/src/TNL/Containers/Expressions/StaticExpressionTemplates.h index 9953233c9..09178b780 100644 --- a/src/TNL/Containers/Expressions/StaticExpressionTemplates.h +++ b/src/TNL/Containers/Expressions/StaticExpressionTemplates.h @@ -1599,18 +1599,19 @@ std::ostream& operator << ( std::ostream& str, const BinaryExpressionTemplate< T { str << "[ "; for( int i = 0; i < expression.getSize() - 1; i++ ) - str << expression[ i ] << " "; + str << expression[ i ] << ", "; str << expression[ expression.getSize() - 1 ] << " ]"; return str; } template< typename T, - template< typename > class Operation > -std::ostream& operator << ( std::ostream& str, const UnaryExpressionTemplate< T, Operation >& expression ) + template< typename > class Operation, + typename Parameter > +std::ostream& operator << ( std::ostream& str, const UnaryExpressionTemplate< T, Operation, Parameter >& expression ) { str << "[ "; for( int i = 0; i < expression.getSize() - 1; i++ ) - str << expression[ i ] << " "; + str << expression[ i ] << ", "; str << expression[ expression.getSize() - 1 ] << " ]"; return str; } diff --git a/src/UnitTests/Containers/StaticVectorTest.cpp b/src/UnitTests/Containers/StaticVectorTest.cpp index cf1d3b520..c092a6b97 100644 --- a/src/UnitTests/Containers/StaticVectorTest.cpp +++ b/src/UnitTests/Containers/StaticVectorTest.cpp @@ -372,7 +372,10 @@ TYPED_TEST( StaticVectorTest, cosh ) v[ i ] = cosh( u[ i ] ); } - EXPECT_EQ( cosh( u ), v ); + // EXPECT_EQ( cosh( u ), v ) does not work here for float, maybe because + // of some fast-math optimization + for( int i = 0; i < size; i++ ) + EXPECT_NEAR( cosh( u )[ i ], v[ i ], 1.0e-6 ); } TYPED_TEST( StaticVectorTest, tanh ) @@ -417,7 +420,10 @@ TYPED_TEST( StaticVectorTest, log10 ) v[ i ] = log10( u[ i ] ); } - EXPECT_EQ( log10( u ), v ); + // EXPECT_EQ( log10( u ), v ) does not work here for float, maybe because + // of some fast-math optimization + for( int i = 0; i < size; i++ ) + EXPECT_NEAR( log10( u )[ i ], v[ i ], 1.0e-6 ); } TYPED_TEST( StaticVectorTest, log2 ) -- GitLab From 75b0c9da9cbfa8fbcbdfb737994c3d1c5ce6425b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Sun, 28 Apr 2019 22:13:01 +0200 Subject: [PATCH 16/93] [WIP] Implementing expression templates for VectorView. --- .../Containers/Algorithms/VectorAssignment.h | 38 +- ...ssionTemplates.h => ExpressionTemplates.h} | 255 +++++++- .../Expressions/ExpressionVariableType.h | 28 +- src/TNL/Containers/StaticVector.h | 2 +- src/TNL/Containers/StaticVector1D_impl.h | 8 +- src/TNL/Containers/StaticVector2D_impl.h | 8 +- src/TNL/Containers/StaticVector3D_impl.h | 8 +- src/TNL/Containers/StaticVectorExpressions.h | 4 +- src/TNL/Containers/StaticVector_impl.h | 4 +- src/TNL/Containers/Vector.h | 11 +- src/TNL/Containers/Vector.hpp | 29 +- src/TNL/Containers/VectorView.h | 18 +- src/TNL/Containers/VectorViewExpressions.h | 546 ++++++++++++++++++ src/TNL/Containers/VectorView_impl.h | 46 +- src/UnitTests/Containers/VectorTest.h | 377 +++++++++++- 15 files changed, 1317 insertions(+), 65 deletions(-) rename src/TNL/Containers/Expressions/{StaticExpressionTemplates.h => ExpressionTemplates.h} (88%) create mode 100644 src/TNL/Containers/VectorViewExpressions.h diff --git a/src/TNL/Containers/Algorithms/VectorAssignment.h b/src/TNL/Containers/Algorithms/VectorAssignment.h index f654b0ee7..0379622b7 100644 --- a/src/TNL/Containers/Algorithms/VectorAssignment.h +++ b/src/TNL/Containers/Algorithms/VectorAssignment.h @@ -54,12 +54,29 @@ struct VectorAssignment< Vector, T, true > v.setSize( t.getSize() ); } - static void assign( Vector& v, const T& t ) + __cuda_callable__ + static void assignStatic( Vector& v, const T& t ) { TNL_ASSERT_EQ( v.getSize(), t.getSize(), "The sizes of the vectors must be equal." ); for( decltype( v.getSize() ) i = 0; i < v.getSize(); i ++ ) v[ i ] = t[ i ]; }; + + static void assign( Vector& v, const T& t ) + { + TNL_ASSERT_EQ( v.getSize(), t.getSize(), "The sizes of the vectors must be equal." ); + using RealType = typename Vector::RealType; + using DeviceType = typename Vector::DeviceType; + using IndexType = typename Vector::IndexType; + + RealType* data = v.getData(); + auto ass = [=] __cuda_callable__ ( IndexType i ) + { + data[ i ] = t[ i ]; + }; + ParallelFor< DeviceType >::exec( 0, v.getSize(), ass ); + }; + }; /** @@ -74,12 +91,29 @@ struct VectorAssignment< Vector, T, false > { }; - static void assign( Vector& v, const T& t ) + __cuda_callable__ + static void assignStatic( Vector& v, const T& t ) { TNL_ASSERT_GT( v.getSize(), 0, "Cannot assign value to empty vector." ); for( decltype( v.getSize() ) i = 0; i < v.getSize(); i ++ ) v[ i ] = t; }; + + static void assign( Vector& v, const T& t ) + { + TNL_ASSERT_EQ( v.getSize(), t.getSize(), "The sizes of the vectors must be equal." ); + using RealType = typename Vector::RealType; + using DeviceType = typename Vector::DeviceType; + using IndexType = typename Vector::IndexType; + + RealType* data = v.getData(); + auto ass = [=] __cuda_callable__ ( IndexType i ) + { + data[ i ] = t; + }; + ParallelFor< DeviceType >::exec( 0, v.getSize(), ass ); + } + }; } // namespace Algorithms diff --git a/src/TNL/Containers/Expressions/StaticExpressionTemplates.h b/src/TNL/Containers/Expressions/ExpressionTemplates.h similarity index 88% rename from src/TNL/Containers/Expressions/StaticExpressionTemplates.h rename to src/TNL/Containers/Expressions/ExpressionTemplates.h index 09178b780..cdc998684 100644 --- a/src/TNL/Containers/Expressions/StaticExpressionTemplates.h +++ b/src/TNL/Containers/Expressions/ExpressionTemplates.h @@ -1,5 +1,5 @@ /*************************************************************************** - StaticExpressionTemplates.h - description + ExpressionTemplates.h - description ------------------- begin : Apr 18, 2019 copyright : (C) 2019 by Tomas Oberhuber @@ -19,10 +19,17 @@ namespace TNL { namespace Containers { namespace Expressions { +template< typename T > +struct IsStaticType +{ + static constexpr bool value = false; +}; + template< typename T1, template< typename > class Operation, typename Parameter = void, - ExpressionVariableType T1Type = ExpressionVariableTypeGetter< T1 >::value > + ExpressionVariableType T1Type = ExpressionVariableTypeGetter< T1 >::value, + bool StaticET = IsStaticType< T1 >::value > struct UnaryExpressionTemplate { }; @@ -31,20 +38,47 @@ template< typename T1, typename T2, template< typename, typename > class Operation, ExpressionVariableType T1Type = ExpressionVariableTypeGetter< T1 >::value, - ExpressionVariableType T2Type = ExpressionVariableTypeGetter< T2 >::value > + ExpressionVariableType T2Type = ExpressionVariableTypeGetter< T2 >::value, + bool StaticET = IsStaticType< T1 >::value || IsStaticType< T2 >::value > struct BinaryExpressionTemplate { }; +template< int Size, + typename Real > +struct IsStaticType< StaticVector< Size, Real > > +{ + static constexpr bool value = true; +}; + +template< typename T1, + template< typename > class Operation, + typename Parameter > +struct IsStaticType< UnaryExpressionTemplate< T1, Operation, Parameter > > +{ + static constexpr bool value = UnaryExpressionTemplate< T1, Operation, Parameter >::isStatic(); +}; + +template< typename T1, + typename T2, + template< typename, typename > class Operation > +struct IsStaticType< BinaryExpressionTemplate< T1, T2, Operation > > +{ + static constexpr bool value = BinaryExpressionTemplate< T1, T2, Operation >::isStatic(); +}; + + //// -// Binary expression template +// Static binary expression template template< typename T1, typename T2, template< typename, typename > class Operation > -struct BinaryExpressionTemplate< T1, T2, Operation, VectorVariable, VectorVariable > +struct BinaryExpressionTemplate< T1, T2, Operation, VectorVariable, VectorVariable, true > { using RealType = typename T1::RealType; using IsExpressionTemplate = bool; + static_assert( IsStaticType< T1 >::value == IsStaticType< T2 >::value, "Attempt to mix static and non-static operands in binary expression templates" ); + static constexpr bool isStatic() { return true; } __cuda_callable__ BinaryExpressionTemplate( const T1& a, const T2& b ): op1( a ), op2( b ){} @@ -75,10 +109,11 @@ struct BinaryExpressionTemplate< T1, T2, Operation, VectorVariable, VectorVariab template< typename T1, typename T2, template< typename, typename > class Operation > -struct BinaryExpressionTemplate< T1, T2, Operation, VectorVariable, ArithmeticVariable > +struct BinaryExpressionTemplate< T1, T2, Operation, VectorVariable, ArithmeticVariable, true > { using RealType = typename T1::RealType; using IsExpressionTemplate = bool; + static constexpr bool isStatic() { return true; } __cuda_callable__ BinaryExpressionTemplate( const T1& a, const T2& b ): op1( a ), op2( b ){} @@ -110,10 +145,11 @@ struct BinaryExpressionTemplate< T1, T2, Operation, VectorVariable, ArithmeticVa template< typename T1, typename T2, template< typename, typename > class Operation > -struct BinaryExpressionTemplate< T1, T2, Operation, ArithmeticVariable, VectorVariable > +struct BinaryExpressionTemplate< T1, T2, Operation, ArithmeticVariable, VectorVariable, true > { using RealType = typename T2::RealType; using IsExpressionTemplate = bool; + static constexpr bool isStatic() { return true; } __cuda_callable__ BinaryExpressionTemplate( const T1& a, const T2& b ): op1( a ), op2( b ){} @@ -142,17 +178,132 @@ struct BinaryExpressionTemplate< T1, T2, Operation, ArithmeticVariable, VectorVa }; //// -// Unary expression template +// Non-static binary expression template +template< typename T1, + typename T2, + template< typename, typename > class Operation > +struct BinaryExpressionTemplate< T1, T2, Operation, VectorVariable, VectorVariable, false > +{ + using RealType = typename T1::RealType; + using DeviceType = typename T1::DeviceType; + using IndexType = typename T1::IndexType; + using IsExpressionTemplate = bool; + + static_assert( std::is_same< typename T1::DeviceType, typename T2::DeviceType >::value, "Attempt to mix operands allocated on different device types." ); + static_assert( IsStaticType< T1 >::value == IsStaticType< T2 >::value, "Attempt to mix static and non-static operands in binary expression templates." ); + static constexpr bool isStatic() { return false; } + + BinaryExpressionTemplate( const T1& a, const T2& b ): op1( a ), op2( b ){} + + static BinaryExpressionTemplate evaluate( const T1& a, const T2& b ) + { + return BinaryExpressionTemplate( a, b ); + } + + __cuda_callable__ + RealType operator[]( const int i ) const + { + return Operation< typename T1::RealType, typename T2::RealType >::evaluate( op1[ i ], op2[ i ] ); + } + + __cuda_callable__ + int getSize() const + { + return op1.getSize(); + } + + protected: + typename OperandType< T1, DeviceType >::type op1; + typename OperandType< T2, DeviceType >::type op2; +}; + +template< typename T1, + typename T2, + template< typename, typename > class Operation > +struct BinaryExpressionTemplate< T1, T2, Operation, VectorVariable, ArithmeticVariable, false > +{ + using RealType = typename T1::RealType; + using DeviceType = typename T1::DeviceType; + using IndexType = typename T1::IndexType; + + using IsExpressionTemplate = bool; + static constexpr bool isStatic() { return false; } + + BinaryExpressionTemplate( const T1& a, const T2& b ): op1( a ), op2( b ){} + + BinaryExpressionTemplate evaluate( const T1& a, const T2& b ) + { + return BinaryExpressionTemplate( a, b ); + } + + __cuda_callable__ + RealType operator[]( const int i ) const + { + return Operation< typename T1::RealType, T2 >::evaluate( op1[ i ], op2 ); + } + + __cuda_callable__ + int getSize() const + { + return op1.getSize(); + } + + protected: + typename OperandType< T1, DeviceType >::type op1; + typename OperandType< T2, DeviceType >::type op2; +}; + +template< typename T1, + typename T2, + template< typename, typename > class Operation > +struct BinaryExpressionTemplate< T1, T2, Operation, ArithmeticVariable, VectorVariable, false > +{ + using RealType = typename T2::RealType; + using DeviceType = typename T2::DeviceType; + using IndexType = typename T2::IndexType; + + using IsExpressionTemplate = bool; + static constexpr bool isStatic() { return false; } + + BinaryExpressionTemplate( const T1& a, const T2& b ): op1( a ), op2( b ){} + + BinaryExpressionTemplate evaluate( const T1& a, const T2& b ) + { + return BinaryExpressionTemplate( a, b ); + } + + __cuda_callable__ + RealType operator[]( const int i ) const + { + return Operation< T1, typename T2::RealType >::evaluate( op1, op2[ i ] ); + } + + __cuda_callable__ + int getSize() const + { + return op2.getSize(); + } + + protected: + const T1 op1; + const T2 op2; + //typename OperandType< T1, DeviceType >::type op1; + //typename OperandType< T2, DeviceType >::type op2; +}; + +//// +// Static unary expression template // // Parameter type serves mainly for pow( base, exp ). Here exp is parameter we need // to pass to pow. template< typename T1, template< typename > class Operation, typename Parameter > -struct UnaryExpressionTemplate< T1, Operation, Parameter, VectorVariable > +struct UnaryExpressionTemplate< T1, Operation, Parameter, VectorVariable, true > { using RealType = typename T1::RealType; using IsExpressionTemplate = bool; + static constexpr bool isStatic() { return true; } __cuda_callable__ UnaryExpressionTemplate( const T1& a, const Parameter& p ) @@ -186,13 +337,14 @@ struct UnaryExpressionTemplate< T1, Operation, Parameter, VectorVariable > }; //// -// Unary expression template with no parameter +// Static unary expression template with no parameter template< typename T1, template< typename > class Operation > -struct UnaryExpressionTemplate< T1, Operation, void, VectorVariable > +struct UnaryExpressionTemplate< T1, Operation, void, VectorVariable, true > { using RealType = typename T1::RealType; using IsExpressionTemplate = bool; + static constexpr bool isStatic() { return true; } __cuda_callable__ UnaryExpressionTemplate( const T1& a ): operand( a ){} @@ -219,6 +371,87 @@ struct UnaryExpressionTemplate< T1, Operation, void, VectorVariable > const T1& operand; }; +//// +// Non-static unary expression template +// +// Parameter type serves mainly for pow( base, exp ). Here exp is parameter we need +// to pass to pow. +template< typename T1, + template< typename > class Operation, + typename Parameter > +struct UnaryExpressionTemplate< T1, Operation, Parameter, VectorVariable, false > +{ + using RealType = typename T1::RealType; + using DeviceType = typename T1::DeviceType; + using IndexType = typename T1::IndexType; + using IsExpressionTemplate = bool; + static constexpr bool isStatic() { return false; } + + UnaryExpressionTemplate( const T1& a, const Parameter& p ) + : operand( a ), parameter( p ) {} + + static UnaryExpressionTemplate evaluate( const T1& a ) + { + return UnaryExpressionTemplate( a ); + } + + __cuda_callable__ + RealType operator[]( const int i ) const + { + return Operation< typename T1::RealType >::evaluate( operand[ i ], parameter ); + } + + __cuda_callable__ + int getSize() const + { + return operand.getSize(); + } + + void set( const Parameter& p ) { parameter = p; } + + const Parameter& get() { return parameter; } + + protected: + const T1 operand; + //typename OperandType< T1, DeviceType >::type operand; + Parameter parameter; +}; + +//// +// Non-static unary expression template with no parameter +template< typename T1, + template< typename > class Operation > +struct UnaryExpressionTemplate< T1, Operation, void, VectorVariable, false > +{ + using RealType = typename T1::RealType; + using DeviceType = typename T1::DeviceType; + using IndexType = typename T1::IndexType; + using IsExpressionTemplate = bool; + static constexpr bool isStatic() { return false; } + + UnaryExpressionTemplate( const T1& a ): operand( a ){} + + static UnaryExpressionTemplate evaluate( const T1& a ) + { + return UnaryExpressionTemplate( a ); + } + + __cuda_callable__ + RealType operator[]( const int i ) const + { + return Operation< typename T1::RealType >::evaluate( operand[ i ] ); + } + + __cuda_callable__ + int getSize() const + { + return operand.getSize(); + } + + protected: + const T1 operand; // TODO: fix + //typename std::add_const< typename OperandType< T1, DeviceType >::type >::type operand; +}; //// // Binary expressions addition diff --git a/src/TNL/Containers/Expressions/ExpressionVariableType.h b/src/TNL/Containers/Expressions/ExpressionVariableType.h index bc3d2b5f2..a37cfaa57 100644 --- a/src/TNL/Containers/Expressions/ExpressionVariableType.h +++ b/src/TNL/Containers/Expressions/ExpressionVariableType.h @@ -18,6 +18,9 @@ namespace TNL { template< int Size, typename Real > class StaticVector; +template< typename Real, typename Device, typename Index > +class VectorView; + namespace Expressions { enum ExpressionVariableType { ArithmeticVariable, VectorVariable, OtherVariable }; @@ -40,7 +43,6 @@ public: static constexpr bool value = ( sizeof( test< typename std::remove_reference< T >::type >(0) ) == sizeof( YesType ) ); }; - template< typename T > struct IsVectorType { @@ -54,6 +56,14 @@ struct IsVectorType< StaticVector< Size, Real > > static constexpr bool value = true; }; +template< typename Real, + typename Device, + typename Index > +struct IsVectorType< VectorView< Real, Device, Index > > +{ + static constexpr bool value = true; +}; + template< typename T, bool IsArithmetic = std::is_arithmetic< T >::value, bool IsVector = IsVectorType< T >::value || IsExpressionTemplate< T >::value > @@ -74,6 +84,22 @@ struct ExpressionVariableTypeGetter< T, false, true > static constexpr ExpressionVariableType value = VectorVariable; }; +//// +// Non-static expression templates might be passed on GPU, for example. In this +// case, we cannot store ET operands using references but we nee to make copies. +template< typename T, + typename Device > +struct OperandType +{ + using type = typename std::add_const< typename std::remove_reference< T >::type >::type; +}; + +template< typename T > +struct OperandType< T, Devices::Host > +{ + using type = typename std::add_const< typename std::add_lvalue_reference< T >::type >::type; +}; + } //namespace Expressions } //namespace Containers } //namespace TNL diff --git a/src/TNL/Containers/StaticVector.h b/src/TNL/Containers/StaticVector.h index 1332dc2a8..00c3b8022 100644 --- a/src/TNL/Containers/StaticVector.h +++ b/src/TNL/Containers/StaticVector.h @@ -12,7 +12,7 @@ #include #include -#include +#include namespace TNL { namespace Containers { diff --git a/src/TNL/Containers/StaticVector1D_impl.h b/src/TNL/Containers/StaticVector1D_impl.h index 436605285..e6e7ae4e7 100644 --- a/src/TNL/Containers/StaticVector1D_impl.h +++ b/src/TNL/Containers/StaticVector1D_impl.h @@ -58,7 +58,8 @@ template< typename Real > __cuda_callable__ StaticVector< 1, Real >::StaticVector( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& op ) { - Algorithms::VectorAssignment< StaticVector< 1, Real >, Expressions::BinaryExpressionTemplate< T1, T2, Operation > >::assign( *this, op ); + static_assert( Expressions::BinaryExpressionTemplate< T1, T2, Operation >::isStatic(), "Attempt to assign non-static expression to static vector." ); + Algorithms::VectorAssignment< StaticVector< 1, Real >, Expressions::BinaryExpressionTemplate< T1, T2, Operation > >::assignStatic( *this, op ); }; template< typename Real > @@ -67,7 +68,8 @@ template< typename Real > __cuda_callable__ StaticVector< 1, Real >::StaticVector( const Expressions::UnaryExpressionTemplate< T, Operation >& op ) { - Algorithms::VectorAssignment< StaticVector< 1, Real >, Expressions::UnaryExpressionTemplate< T, Operation > >::assign( *this, op ); + static_assert( Expressions::UnaryExpressionTemplate< T, Operation >::isStatic(), "Attempt to assign non-static expression to static vector." ); + Algorithms::VectorAssignment< StaticVector< 1, Real >, Expressions::UnaryExpressionTemplate< T, Operation > >::assignStatic( *this, op ); }; template< typename Real > @@ -94,7 +96,7 @@ template< typename Real > StaticVector< 1, Real >& StaticVector< 1, Real >::operator =( const RHS& rhs ) { - Algorithms::VectorAssignment< StaticVector< 1, Real >, RHS >::assign( *this, rhs ); + Algorithms::VectorAssignment< StaticVector< 1, Real >, RHS >::assignStatic( *this, rhs ); return *this; } diff --git a/src/TNL/Containers/StaticVector2D_impl.h b/src/TNL/Containers/StaticVector2D_impl.h index 75e74d675..3b0f28951 100644 --- a/src/TNL/Containers/StaticVector2D_impl.h +++ b/src/TNL/Containers/StaticVector2D_impl.h @@ -64,7 +64,8 @@ template< typename Real > __cuda_callable__ StaticVector< 2, Real >::StaticVector( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& op ) { - Algorithms::VectorAssignment< StaticVector< 2, Real >, Expressions::BinaryExpressionTemplate< T1, T2, Operation > >::assign( *this, op ); + static_assert( Expressions::BinaryExpressionTemplate< T1, T2, Operation >::isStatic(), "Attempt to assign non-static expression to static vector." ); + Algorithms::VectorAssignment< StaticVector< 2, Real >, Expressions::BinaryExpressionTemplate< T1, T2, Operation > >::assignStatic( *this, op ); }; template< typename Real > @@ -73,7 +74,8 @@ template< typename Real > __cuda_callable__ StaticVector< 2, Real >::StaticVector( const Expressions::UnaryExpressionTemplate< T, Operation >& op ) { - Algorithms::VectorAssignment< StaticVector< 2, Real >, Expressions::UnaryExpressionTemplate< T, Operation > >::assign( *this, op ); + static_assert( Expressions::UnaryExpressionTemplate< T, Operation >::isStatic(), "Attempt to assign non-static expression to static vector." ); + Algorithms::VectorAssignment< StaticVector< 2, Real >, Expressions::UnaryExpressionTemplate< T, Operation > >::assignStatic( *this, op ); }; template< typename Real > @@ -101,7 +103,7 @@ template< typename Real > StaticVector< 2, Real >& StaticVector< 2, Real >::operator =( const RHS& rhs ) { - Algorithms::VectorAssignment< StaticVector< 2, Real >, RHS >::assign( *this, rhs ); + Algorithms::VectorAssignment< StaticVector< 2, Real >, RHS >::assignStatic( *this, rhs ); return *this; } diff --git a/src/TNL/Containers/StaticVector3D_impl.h b/src/TNL/Containers/StaticVector3D_impl.h index 1626dce7f..8475fb035 100644 --- a/src/TNL/Containers/StaticVector3D_impl.h +++ b/src/TNL/Containers/StaticVector3D_impl.h @@ -64,7 +64,8 @@ template< typename Real > __cuda_callable__ StaticVector< 3, Real >::StaticVector( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& op ) { - Algorithms::VectorAssignment< StaticVector< 3, Real >, Expressions::BinaryExpressionTemplate< T1, T2, Operation > >::assign( *this, op ); + static_assert( Expressions::BinaryExpressionTemplate< T1, T2, Operation >::isStatic(), "Attempt to assign non-static expression to static vector." ); + Algorithms::VectorAssignment< StaticVector< 3, Real >, Expressions::BinaryExpressionTemplate< T1, T2, Operation > >::assignStatic( *this, op ); }; template< typename Real > @@ -73,7 +74,8 @@ template< typename Real > __cuda_callable__ StaticVector< 3, Real >::StaticVector( const Expressions::UnaryExpressionTemplate< T, Operation >& op ) { - Algorithms::VectorAssignment< StaticVector< 3, Real >, Expressions::UnaryExpressionTemplate< T, Operation > >::assign( *this, op ); + static_assert( Expressions::UnaryExpressionTemplate< T, Operation >::isStatic(), "Attempt to assign non-static expression to static vector." ); + Algorithms::VectorAssignment< StaticVector< 3, Real >, Expressions::UnaryExpressionTemplate< T, Operation > >::assignStatic( *this, op ); }; @@ -103,7 +105,7 @@ template< typename Real > StaticVector< 3, Real >& StaticVector< 3, Real >::operator =( const RHS& rhs ) { - Algorithms::VectorAssignment< StaticVector< 3, Real >, RHS >::assign( *this, rhs ); + Algorithms::VectorAssignment< StaticVector< 3, Real >, RHS >::assignStatic( *this, rhs ); return *this; } diff --git a/src/TNL/Containers/StaticVectorExpressions.h b/src/TNL/Containers/StaticVectorExpressions.h index ba290f670..4d7eefd94 100644 --- a/src/TNL/Containers/StaticVectorExpressions.h +++ b/src/TNL/Containers/StaticVectorExpressions.h @@ -10,7 +10,7 @@ #pragma once -#include +#include #include #include @@ -318,7 +318,7 @@ bool operator>=( const StaticVector< Size, Real1 >& a, const StaticVector< Size, template< int Size, typename Real > __cuda_callable__ const Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Minus > -operator -( const StaticVector< Size, Real >& a ) +operator-( const StaticVector< Size, Real >& a ) { return Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Minus >( a ); } diff --git a/src/TNL/Containers/StaticVector_impl.h b/src/TNL/Containers/StaticVector_impl.h index 6437e9dc6..4f92c56aa 100644 --- a/src/TNL/Containers/StaticVector_impl.h +++ b/src/TNL/Containers/StaticVector_impl.h @@ -57,6 +57,7 @@ template< int Size, typename Real > template< typename, typename > class Operation > StaticVector< Size, Real >::StaticVector( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& op ) { + static_assert( Expressions::BinaryExpressionTemplate< T1, T2, Operation >::isStatic(), "Attempt to assign non-static expression to static vector." ); Algorithms::VectorAssignment< StaticVector< Size, Real >, Expressions::BinaryExpressionTemplate< T1, T2, Operation > >::assign( *this, op ); }; @@ -67,6 +68,7 @@ template< int Size, __cuda_callable__ StaticVector< Size, Real >::StaticVector( const Expressions::UnaryExpressionTemplate< T, Operation >& op ) { + static_assert( Expressions::UnaryExpressionTemplate< T, Operation >::isStatic(), "Attempt to assign non-static expression to static vector." ); Algorithms::VectorAssignment< StaticVector< Size, Real >, Expressions::UnaryExpressionTemplate< T, Operation > >::assign( *this, op ); }; @@ -96,7 +98,7 @@ template< int Size, typename Real > StaticVector< Size, Real >& StaticVector< Size, Real >::operator =( const RHS& rhs ) { - Algorithms::VectorAssignment< StaticVector< Size, Real >, RHS >::assign( *this, rhs ); + Algorithms::VectorAssignment< StaticVector< Size, Real >, RHS >::assignStatic( *this, rhs ); return *this; } diff --git a/src/TNL/Containers/Vector.h b/src/TNL/Containers/Vector.h index 30f959452..2c3a88cd4 100644 --- a/src/TNL/Containers/Vector.h +++ b/src/TNL/Containers/Vector.h @@ -41,8 +41,8 @@ public: using ViewType = VectorView< Real, Device, Index >; using ConstViewType = VectorView< std::add_const_t< Real >, Device, Index >; - /** Assignment operators are inherited from the class \ref Array. */ - using Array< Real, Device, Index >::operator=; + /** Subscript operator is inherited from the class \ref Array. */ + using Array< Real, Device, Index >::operator[]; /** * \brief Basic constructor. @@ -168,8 +168,13 @@ public: void addElement( const IndexType i, const RealType& value, const Scalar thisElementMultiplicator ); + + __cuda_callable__ Real& operator[]( const Index& i ); - Vector& operator=( const Vector& v ); + __cuda_callable__ const Real& operator[]( const Index& i ) const; + + template< typename VectorExpression > + Vector& operator = ( const VectorExpression& expression ); /** * \brief This function subtracts \e vector from this vector and returns the resulting vector. diff --git a/src/TNL/Containers/Vector.hpp b/src/TNL/Containers/Vector.hpp index f39d537c1..cad3b758d 100644 --- a/src/TNL/Containers/Vector.hpp +++ b/src/TNL/Containers/Vector.hpp @@ -189,13 +189,36 @@ addElement( const IndexType i, template< typename Real, typename Device, typename Index > +__cuda_callable__ +inline Real& +Vector< Real, Device, Index >:: +operator[]( const Index& i ) +{ + return this->operator[]( i ); +} + +template< typename Real, + typename Device, + typename Index > +__cuda_callable__ +inline const Real& +Vector< Real, Device, Index >:: +operator[]( const Index& i ) const +{ + return this->operator[]( i ); +} + +template< typename Real, + typename Device, + typename Index > + template< typename VectorExpression > Vector< Real, Device, Index >& -Vector< Real, Device, Index >::operator=( const Vector< Real, Device, Index >& v ) +Vector< Real, Device, Index >::operator = ( const VectorExpression& expression ) { - Array< Real, Device, Index >::operator = ( v ); - return *this; + Algorithms::VectorAssignment< Vector< Real, Device, Index >, VectorExpression >::assign( *this, expression ); } + template< typename Real, typename Device, typename Index > diff --git a/src/TNL/Containers/VectorView.h b/src/TNL/Containers/VectorView.h index e6294f242..5d3197494 100644 --- a/src/TNL/Containers/VectorView.h +++ b/src/TNL/Containers/VectorView.h @@ -13,6 +13,7 @@ #pragma once #include +#include namespace TNL { namespace Containers { @@ -43,8 +44,8 @@ public: using ArrayView< Real, Device, Index >::ArrayView; #endif - // inherit all ArrayView's assignment operators - using BaseType::operator=; + /** Subscript operator is inherited from the class \ref Array. */ + using ArrayView< Real, Device, Index >::operator[]; // In C++14, default constructors cannot be inherited, although Clang // and GCC since version 7.0 inherit them. @@ -58,6 +59,16 @@ public: VectorView( const ArrayView< Real_, Device, Index >& view ) : BaseType::ArrayView( view ) {} + template< typename T1, + typename T2, + template< typename, typename > class Operation > + VectorView( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& expression ); + + template< typename T, + template< typename > class Operation > + VectorView( const Expressions::UnaryExpressionTemplate< T, Operation >& expression ); + + /** * \brief Returns a modifiable view of the array view. */ @@ -87,6 +98,9 @@ public: RealType value, Scalar thisElementMultiplicator ); + template< typename VectorExpression > + VectorView& operator = ( const VectorExpression& expression ); + template< typename Vector > VectorView& operator-=( const Vector& vector ); diff --git a/src/TNL/Containers/VectorViewExpressions.h b/src/TNL/Containers/VectorViewExpressions.h new file mode 100644 index 000000000..c44b72712 --- /dev/null +++ b/src/TNL/Containers/VectorViewExpressions.h @@ -0,0 +1,546 @@ +/*************************************************************************** + VectorViewExpressions.h - description + ------------------- + begin : Apr 27, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include +#include +#include + +namespace TNL { + namespace Containers { + +//// +// Addition +template< typename Real, typename Device, typename Index, typename ET > +__cuda_callable__ +const Expressions::BinaryExpressionTemplate< VectorView< Real, Device, Index >, ET, Expressions::Addition > +operator+( const VectorView< Real, Device, Index >& a, const ET& b ) +{ + return Expressions::BinaryExpressionTemplate< VectorView< Real, Device, Index >, ET, Expressions::Addition >( a, b ); +} + +template< typename ET, typename Real, typename Device, typename Index > +__cuda_callable__ +const Expressions::BinaryExpressionTemplate< ET, VectorView< Real, Device, Index >, Expressions::Addition > +operator+( const ET& a, const VectorView< Real, Device, Index >& b ) +{ + return Expressions::BinaryExpressionTemplate< ET, VectorView< Real, Device, Index >, Expressions::Addition >( a, b ); +} + +template< typename Real1, typename Real2, typename Device, typename Index > +__cuda_callable__ +const Expressions::BinaryExpressionTemplate< VectorView< Real1, Device, Index >, VectorView< Real2, Device, Index >, Expressions::Addition > +operator+( const VectorView< Real1, Device, Index >& a, const VectorView< Real2, Device, Index >& b ) +{ + return Expressions::BinaryExpressionTemplate< VectorView< Real1, Device, Index >, VectorView< Real2, Device, Index >, Expressions::Addition >( a, b ); +} + +//// +// Subtraction +template< typename Real, typename Device, typename Index, typename ET > +__cuda_callable__ +const Expressions::BinaryExpressionTemplate< VectorView< Real, Device, Index >, ET, Expressions::Subtraction > +operator-( const VectorView< Real, Device, Index >& a, const ET& b ) +{ + return Expressions::BinaryExpressionTemplate< VectorView< Real, Device, Index >, ET, Expressions::Subtraction >( a, b ); +} + +template< typename ET, typename Real, typename Device, typename Index > +__cuda_callable__ +const Expressions::BinaryExpressionTemplate< ET, VectorView< Real, Device, Index >, Expressions::Subtraction > +operator-( const ET& a, const VectorView< Real, Device, Index >& b ) +{ + return Expressions::BinaryExpressionTemplate< ET, VectorView< Real, Device, Index >, Expressions::Subtraction >( a, b ); +} + +template< typename Real1, typename Real2, typename Device, typename Index > +__cuda_callable__ +const Expressions::BinaryExpressionTemplate< VectorView< Real1, Device, Index >, VectorView< Real2, Device, Index >, Expressions::Subtraction > +operator-( const VectorView< Real1, Device, Index >& a, const VectorView< Real2, Device, Index >& b ) +{ + return Expressions::BinaryExpressionTemplate< VectorView< Real1, Device, Index >, VectorView< Real2, Device, Index >, Expressions::Subtraction >( a, b ); +} + +//// +// Multiplication +template< typename Real, typename Device, typename Index, typename ET > +__cuda_callable__ +const Expressions::BinaryExpressionTemplate< VectorView< Real, Device, Index >, ET, Expressions::Multiplication > +operator*( const VectorView< Real, Device, Index >& a, const ET& b ) +{ + return Expressions::BinaryExpressionTemplate< VectorView< Real, Device, Index >, ET, Expressions::Multiplication >( a, b ); +} + +template< typename ET, typename Real, typename Device, typename Index > +__cuda_callable__ +const Expressions::BinaryExpressionTemplate< ET, VectorView< Real, Device, Index >, Expressions::Multiplication > +operator*( const ET& a, const VectorView< Real, Device, Index >& b ) +{ + return Expressions::BinaryExpressionTemplate< ET, VectorView< Real, Device, Index >, Expressions::Multiplication >( a, b ); +} + +template< typename Real1, typename Real2, typename Device, typename Index > +__cuda_callable__ +const Expressions::BinaryExpressionTemplate< VectorView< Real1, Device, Index >, VectorView< Real2, Device, Index >, Expressions::Multiplication > +operator*( const VectorView< Real1, Device, Index >& a, const VectorView< Real2, Device, Index >& b ) +{ + return Expressions::BinaryExpressionTemplate< VectorView< Real1, Device, Index >, VectorView< Real2, Device, Index >, Expressions::Multiplication >( a, b ); +} + +//// +// Division +template< typename Real, typename Device, typename Index, typename ET > +__cuda_callable__ +const Expressions::BinaryExpressionTemplate< VectorView< Real, Device, Index >, ET, Expressions::Division > +operator/( const VectorView< Real, Device, Index >& a, const ET& b ) +{ + return Expressions::BinaryExpressionTemplate< VectorView< Real, Device, Index >, ET, Expressions::Division >( a, b ); +} + +template< typename ET, typename Real, typename Device, typename Index > +__cuda_callable__ +const Expressions::BinaryExpressionTemplate< ET, VectorView< Real, Device, Index >, Expressions::Division > +operator/( const ET& a, const VectorView< Real, Device, Index >& b ) +{ + return Expressions::BinaryExpressionTemplate< ET, VectorView< Real, Device, Index >, Expressions::Division >( a, b ); +} + +template< typename Real1, typename Real2, typename Device, typename Index > +__cuda_callable__ +const Expressions::BinaryExpressionTemplate< VectorView< Real1, Device, Index >, VectorView< Real2, Device, Index >, Expressions::Division > +operator/( const VectorView< Real1, Device, Index >& a, const VectorView< Real2, Device, Index >& b ) +{ + return Expressions::BinaryExpressionTemplate< VectorView< Real1, Device, Index >, VectorView< Real2, Device, Index >, Expressions::Division >( a, b ); +} + +//// +// Min +template< typename Real, typename Device, typename Index, typename ET > +__cuda_callable__ +const Expressions::BinaryExpressionTemplate< VectorView< Real, Device, Index >, ET, Expressions::Min > +min( const VectorView< Real, Device, Index >& a, const ET& b ) +{ + return Expressions::BinaryExpressionTemplate< VectorView< Real, Device, Index >, ET, Expressions::Min >( a, b ); +} + +template< typename ET, typename Real, typename Device, typename Index > +__cuda_callable__ +const Expressions::BinaryExpressionTemplate< ET, VectorView< Real, Device, Index >, Expressions::Min > +min( const ET& a, const VectorView< Real, Device, Index >& b ) +{ + return Expressions::BinaryExpressionTemplate< ET, VectorView< Real, Device, Index >, Expressions::Min >( a, b ); +} + +template< typename Real1, typename Real2, typename Device, typename Index > +__cuda_callable__ +const Expressions::BinaryExpressionTemplate< VectorView< Real1, Device, Index >, VectorView< Real2, Device, Index >, Expressions::Min > +min( const VectorView< Real1, Device, Index >& a, const VectorView< Real2, Device, Index >& b ) +{ + return Expressions::BinaryExpressionTemplate< VectorView< Real1, Device, Index >, VectorView< Real2, Device, Index >, Expressions::Min >( a, b ); +} + +//// +// Max +template< typename Real, typename Device, typename Index, typename ET > +__cuda_callable__ +const Expressions::BinaryExpressionTemplate< VectorView< Real, Device, Index >, ET, Expressions::Max > +max( const VectorView< Real, Device, Index >& a, const ET& b ) +{ + return Expressions::BinaryExpressionTemplate< VectorView< Real, Device, Index >, ET, Expressions::Max >( a, b ); +} + +template< typename ET, typename Real, typename Device, typename Index > +__cuda_callable__ +const Expressions::BinaryExpressionTemplate< ET, VectorView< Real, Device, Index >, Expressions::Max > +max( const ET& a, const VectorView< Real, Device, Index >& b ) +{ + return Expressions::BinaryExpressionTemplate< ET, VectorView< Real, Device, Index >, Expressions::Max >( a, b ); +} + +template< typename Real1, typename Real2, typename Device, typename Index > +__cuda_callable__ +const Expressions::BinaryExpressionTemplate< VectorView< Real1, Device, Index >, VectorView< Real2, Device, Index >, Expressions::Max > +max( const VectorView< Real1, Device, Index >& a, const VectorView< Real2, Device, Index >& b ) +{ + return Expressions::BinaryExpressionTemplate< VectorView< Real1, Device, Index >, VectorView< Real2, Device, Index >, Expressions::Max >( a, b ); +} + +//// +// Comparison operations - operator == +template< typename Real, typename Device, typename Index, typename ET > +__cuda_callable__ +bool operator==( const VectorView< Real, Device, Index >& a, const ET& b ) +{ + return Expressions::StaticComparisonEQ( a, b ); +} + +template< typename ET, typename Real, typename Device, typename Index > +__cuda_callable__ +bool operator==( const ET& a, const VectorView< Real, Device, Index >& b ) +{ + return Expressions::StaticComparisonEQ( a, b ); +} + +template< typename Real1, typename Real2, typename Device, typename Index > +__cuda_callable__ +bool operator==( const VectorView< Real1, Device, Index >& a, const VectorView< Real2, Device, Index >& b ) +{ + return Expressions::StaticComparisonEQ( a, b ); +} + +//// +// Comparison operations - operator != +template< typename Real, typename Device, typename Index, typename ET > +__cuda_callable__ +bool operator!=( const VectorView< Real, Device, Index >& a, const ET& b ) +{ + return Expressions::StaticComparisonNE( a, b ); +} + +template< typename ET, typename Real, typename Device, typename Index > +__cuda_callable__ +bool operator!=( const ET& a, const VectorView< Real, Device, Index >& b ) +{ + return Expressions::StaticComparisonNE( a, b ); +} + +template< typename Real1, typename Real2, typename Device, typename Index > +__cuda_callable__ +bool operator!=( const VectorView< Real1, Device, Index >& a, const VectorView< Real2, Device, Index >& b ) +{ + return Expressions::StaticComparisonNE( a, b ); +} + +//// +// Comparison operations - operator < +template< typename Real, typename Device, typename Index, typename ET > +__cuda_callable__ +bool operator<( const VectorView< Real, Device, Index >& a, const ET& b ) +{ + return Expressions::StaticComparisonLT( a, b ); +} + +template< typename ET, typename Real, typename Device, typename Index > +__cuda_callable__ +bool operator<( const ET& a, const VectorView< Real, Device, Index >& b ) +{ + return Expressions::StaticComparisonLT( a, b ); +} + +template< typename Real1, typename Real2, typename Device, typename Index > +__cuda_callable__ +bool operator<( const VectorView< Real1, Device, Index >& a, const VectorView< Real2, Device, Index >& b ) +{ + return Expressions::StaticComparisonLT( a, b ); +} + +//// +// Comparison operations - operator <= +template< typename Real, typename Device, typename Index, typename ET > +__cuda_callable__ +bool operator<=( const VectorView< Real, Device, Index >& a, const ET& b ) +{ + return Expressions::StaticComparisonLE( a, b ); +} + +template< typename ET, typename Real, typename Device, typename Index > +__cuda_callable__ +bool operator<=( const ET& a, const VectorView< Real, Device, Index >& b ) +{ + return Expressions::StaticComparisonLE( a, b ); +} + +template< typename Real1, typename Real2, typename Device, typename Index > +__cuda_callable__ +bool operator<=( const VectorView< Real1, Device, Index >& a, const VectorView< Real2, Device, Index >& b ) +{ + return Expressions::StaticComparisonLE( a, b ); +} + +//// +// Comparison operations - operator > +template< typename Real, typename Device, typename Index, typename ET > +__cuda_callable__ +bool operator>( const VectorView< Real, Device, Index >& a, const ET& b ) +{ + return Expressions::StaticComparisonGT( a, b ); +} + +template< typename ET, typename Real, typename Device, typename Index > +__cuda_callable__ +bool operator>( const ET& a, const VectorView< Real, Device, Index >& b ) +{ + return Expressions::StaticComparisonGT( a, b ); +} + +template< typename Real1, typename Real2, typename Device, typename Index > +__cuda_callable__ +bool operator>( const VectorView< Real1, Device, Index >& a, const VectorView< Real2, Device, Index >& b ) +{ + return Expressions::StaticComparisonGT( a, b ); +} + +//// +// Comparison operations - operator >= +template< typename Real, typename Device, typename Index, typename ET > +__cuda_callable__ +bool operator>=( const VectorView< Real, Device, Index >& a, const ET& b ) +{ + return Expressions::StaticComparisonGE( a, b ); +} + +template< typename ET, typename Real, typename Device, typename Index > +__cuda_callable__ +bool operator>=( const ET& a, const VectorView< Real, Device, Index >& b ) +{ + return Expressions::StaticComparisonGE( a, b ); +} + +template< typename Real1, typename Real2, typename Device, typename Index > +__cuda_callable__ +bool operator>=( const VectorView< Real1, Device, Index >& a, const VectorView< Real2, Device, Index >& b ) +{ + return Expressions::StaticComparisonGE( a, b ); +} + +//// +// Minus +template< typename Real, typename Device, typename Index > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Minus > +operator-( const VectorView< Real, Device, Index >& a ) +{ + return Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Minus >( a ); +} + +//// +// Abs +template< typename Real, typename Device, typename Index > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Abs > +abs( const VectorView< Real, Device, Index >& a ) +{ + return Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Abs >( a ); +} + +//// +// Sine +template< typename Real, typename Device, typename Index > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Sin > +sin( const VectorView< Real, Device, Index >& a ) +{ + return Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Sin >( a ); +} + +//// +// Cosine +template< typename Real, typename Device, typename Index > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Cos > +cos( const VectorView< Real, Device, Index >& a ) +{ + return Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Cos >( a ); +} + +//// +// Tangent +template< typename Real, typename Device, typename Index > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Tan > +tan( const VectorView< Real, Device, Index >& a ) +{ + return Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Tan >( a ); +} + +//// +// Sqrt +template< typename Real, typename Device, typename Index > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Sqrt > +sqrt( const VectorView< Real, Device, Index >& a ) +{ + return Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Sqrt >( a ); +} + +//// +// Cbrt +template< typename Real, typename Device, typename Index > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Cbrt > +cbrt( const VectorView< Real, Device, Index >& a ) +{ + return Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Cbrt >( a ); +} + +//// +// Power +template< typename Real, typename Device, typename Index, typename ExpType > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Pow, ExpType > +pow( const VectorView< Real, Device, Index >& a, const ExpType& exp ) +{ + return Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Pow, ExpType >( a, exp ); +} + +//// +// Floor +template< typename Real, typename Device, typename Index > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Floor > +floor( const VectorView< Real, Device, Index >& a ) +{ + return Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Floor >( a ); +} + +//// +// Ceil +template< typename Real, typename Device, typename Index > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Ceil > +ceil( const VectorView< Real, Device, Index >& a ) +{ + return Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Ceil >( a ); +} + +//// +// Acos +template< typename Real, typename Device, typename Index > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Acos > +acos( const VectorView< Real, Device, Index >& a ) +{ + return Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Acos >( a ); +} + +//// +// Asin +template< typename Real, typename Device, typename Index > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Asin > +asin( const VectorView< Real, Device, Index >& a ) +{ + return Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Asin >( a ); +} + +//// +// Atan +template< typename Real, typename Device, typename Index > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Atan > +atan( const VectorView< Real, Device, Index >& a ) +{ + return Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Atan >( a ); +} + +//// +// Cosh +template< typename Real, typename Device, typename Index > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Cosh > +cosh( const VectorView< Real, Device, Index >& a ) +{ + return Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Cosh >( a ); +} + +//// +// Tanh +template< typename Real, typename Device, typename Index > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Tanh > +tanh( const VectorView< Real, Device, Index >& a ) +{ + return Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Tanh >( a ); +} + +//// +// Log +template< typename Real, typename Device, typename Index > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Log > +log( const VectorView< Real, Device, Index >& a ) +{ + return Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Log >( a ); +} + +//// +// Log10 +template< typename Real, typename Device, typename Index > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Log10 > +log10( const VectorView< Real, Device, Index >& a ) +{ + return Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Log10 >( a ); +} + +//// +// Log2 +template< typename Real, typename Device, typename Index > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Log2 > +log2( const VectorView< Real, Device, Index >& a ) +{ + return Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Log2 >( a ); +} + +//// +// Exp +template< typename Real, typename Device, typename Index > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Exp > +exp( const VectorView< Real, Device, Index >& a ) +{ + return Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Exp >( a ); +} + +//// +// Sign +template< typename Real, typename Device, typename Index > +__cuda_callable__ +const Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Sign > +sign( const VectorView< Real, Device, Index >& a ) +{ + return Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Sign >( a ); +} + + +//// +// TODO: Replace this with multiplication when its safe +template< typename Real, typename Device, typename Index, typename ET > +__cuda_callable__ +VectorView< Real, Device, Index > +Scale( const VectorView< Real, Device, Index >& a, const ET& b ) +{ + VectorView< Real, Device, Index > result = Expressions::BinaryExpressionTemplate< VectorView< Real, Device, Index >, ET, Expressions::Multiplication >( a, b ); + return result; +} + +template< typename ET, typename Real, typename Device, typename Index > +__cuda_callable__ +Expressions::BinaryExpressionTemplate< ET, VectorView< Real, Device, Index >, Expressions::Multiplication > +Scale( const ET& a, const VectorView< Real, Device, Index >& b ) +{ + VectorView< Real, Device, Index > result = Expressions::BinaryExpressionTemplate< ET, VectorView< Real, Device, Index >, Expressions::Multiplication >( a, b ); + return result; +} + +template< typename Real1, typename Real2, typename Device, typename Index > +__cuda_callable__ +Expressions::BinaryExpressionTemplate< VectorView< Real1, Device, Index >, VectorView< Real2, Device, Index >, Expressions::Multiplication > +Scale( const VectorView< Real1, Device, Index >& a, const VectorView< Real2, Device, Index >& b ) +{ + VectorView< Real1, Device, Index > result = Expressions::BinaryExpressionTemplate< VectorView< Real1, Device, Index >, VectorView< Real2, Device, Index >, Expressions::Multiplication >( a, b ); + return result; +} + + + } //namespace Containers +} // namespace TNL diff --git a/src/TNL/Containers/VectorView_impl.h b/src/TNL/Containers/VectorView_impl.h index 02b95397d..0a4c5b590 100644 --- a/src/TNL/Containers/VectorView_impl.h +++ b/src/TNL/Containers/VectorView_impl.h @@ -12,25 +12,49 @@ #include #include +#include +#include namespace TNL { namespace Containers { -template< typename Value, +template< typename Real, + typename Device, + typename Index > + template< typename T1, + typename T2, + template< typename, typename > class Operation > +VectorView< Real, Device, Index >::VectorView( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& expression ) +{ + Algorithms::VectorAssignment< VectorView< Real, Device, Index >, Expressions::BinaryExpressionTemplate< T1, T2, Operation > >::assign( *this, expression ); +}; + +template< typename Real, + typename Device, + typename Index > + template< typename T, + template< typename > class Operation > +__cuda_callable__ +VectorView< Real, Device, Index >::VectorView( const Expressions::UnaryExpressionTemplate< T, Operation >& expression ) +{ + Algorithms::VectorAssignment< VectorView< Real, Device, Index >, Expressions::UnaryExpressionTemplate< T, Operation > >::assign( *this, expression ); +}; + +template< typename Real, typename Device, typename Index > -typename VectorView< Value, Device, Index >::ViewType -VectorView< Value, Device, Index >:: +typename VectorView< Real, Device, Index >::ViewType +VectorView< Real, Device, Index >:: getView() { return *this; } -template< typename Value, +template< typename Real, typename Device, typename Index > -typename VectorView< Value, Device, Index >::ConstViewType -VectorView< Value, Device, Index >:: +typename VectorView< Real, Device, Index >::ConstViewType +VectorView< Real, Device, Index >:: getConstView() const { return *this; @@ -126,6 +150,16 @@ addElement( IndexType i, RealType value, Scalar thisElementMultiplicator ) Algorithms::VectorOperations< Device >::addElement( *this, i, value, thisElementMultiplicator ); } +template< typename Real, + typename Device, + typename Index > + template< typename VectorExpression > +VectorView< Real, Device, Index >& +VectorView< Real, Device, Index >::operator = ( const VectorExpression& expression ) +{ + Algorithms::VectorAssignment< VectorView< Real, Device, Index >, VectorExpression >::assign( *this, expression ); +} + template< typename Real, typename Device, typename Index > diff --git a/src/UnitTests/Containers/VectorTest.h b/src/UnitTests/Containers/VectorTest.h index 0e704f12e..4bc4d9229 100644 --- a/src/UnitTests/Containers/VectorTest.h +++ b/src/UnitTests/Containers/VectorTest.h @@ -148,8 +148,9 @@ TYPED_TEST_SUITE( VectorTest, VectorTypes ); TYPED_TEST( VectorTest, constructors ) { using VectorType = typename TestFixture::VectorType; + const int size = VECTOR_TEST_SIZE; - VectorType u; + VectorType u( size ); EXPECT_EQ( u.getSize(), 0 ); VectorType v( 10 ); @@ -335,9 +336,7 @@ TYPED_TEST( VectorTest, differenceMax ) using ViewType = typename TestFixture::ViewType; const int size = VECTOR_TEST_SIZE; - VectorType u, v; - u.setSize( size ); - v.setSize( size ); + VectorType u( size ), v( size ); ViewType u_view( u ), v_view( v ); setLinearSequence( u ); setConstantSequence( v, size / 2 ); @@ -354,9 +353,7 @@ TYPED_TEST( VectorTest, differenceMin ) using ViewType = typename TestFixture::ViewType; const int size = VECTOR_TEST_SIZE; - VectorType u, v; - u.setSize( size ); - v.setSize( size ); + VectorType u( size ), v( size ); ViewType u_view( u ), v_view( v ); setLinearSequence( u ); setConstantSequence( v, size / 2 ); @@ -377,9 +374,7 @@ TYPED_TEST( VectorTest, differenceAbsMax ) // this test expects an odd size const int size = VECTOR_TEST_SIZE % 2 ? VECTOR_TEST_SIZE : VECTOR_TEST_SIZE - 1; - VectorType u, v; - u.setSize( size ); - v.setSize( size ); + VectorType u( size ), v( size ); ViewType u_view( u ), v_view( v ); setNegativeLinearSequence( u ); setConstantSequence( v, - size / 2 ); @@ -396,9 +391,7 @@ TYPED_TEST( VectorTest, differenceAbsMin ) using ViewType = typename TestFixture::ViewType; const int size = VECTOR_TEST_SIZE; - VectorType u, v; - u.setSize( size ); - v.setSize( size ); + VectorType u( size ), v( size ); ViewType u_view( u ), v_view( v ); setNegativeLinearSequence( u ); setConstantSequence( v, - size / 2 ); @@ -420,9 +413,7 @@ TYPED_TEST( VectorTest, differenceLpNorm ) const int size = VECTOR_TEST_SIZE; const RealType epsilon = 64 * std::numeric_limits< RealType >::epsilon(); - VectorType u, v; - u.setSize( size ); - v.setSize( size ); + VectorType u( size ), v( size ); ViewType u_view( u ), v_view( v ); u.setValue( 3.0 ); v.setValue( 1.0 ); @@ -449,9 +440,7 @@ TYPED_TEST( VectorTest, differenceSum ) // this test expect an even size const int size = VECTOR_TEST_SIZE % 2 ? VECTOR_TEST_SIZE - 1 : VECTOR_TEST_SIZE; - VectorType u, v; - u.setSize( size ); - v.setSize( size ); + VectorType u( size ), v( size ); ViewType u_view( u ), v_view( v ); v.setValue( 1.0 ); @@ -483,8 +472,7 @@ TYPED_TEST( VectorTest, scalarMultiplication ) using ViewType = typename TestFixture::ViewType; const int size = VECTOR_TEST_SIZE; - VectorType u; - u.setSize( size ); + VectorType u( size ); ViewType u_view( u ); typename VectorType::HostType expected; @@ -521,9 +509,7 @@ TYPED_TEST( VectorTest, scalarProduct ) // this test expects an odd size const int size = VECTOR_TEST_SIZE % 2 ? VECTOR_TEST_SIZE : VECTOR_TEST_SIZE - 1; - VectorType u, v; - u.setSize( size ); - v.setSize( size ); + VectorType u( size ), v( size ); ViewType u_view( u ), v_view( v ); setOscilatingSequence( u, 1.0 ); setConstantSequence( v, 1 ); @@ -708,6 +694,336 @@ TYPED_TEST( VectorTest, exclusivePrefixSum ) EXPECT_EQ( v.getElement( i ) - v.getElement( i - 1 ), i - 1 ); } +TYPED_TEST( VectorTest, abs ) +{ + using VectorType = typename TestFixture::VectorType; + using ViewType = typename TestFixture::ViewType; + const int size = VECTOR_TEST_SIZE; + + VectorType _u( size ), _v( size ); + ViewType u( _u ), v( _v ); + for( int i = 0; i < size; i++ ) + u[ i ] = i; + + v = -u; + EXPECT_EQ( abs( v ), u ); +} + +TYPED_TEST( VectorTest, sin ) +{ + using VectorType = typename TestFixture::VectorType; + using ViewType = typename TestFixture::ViewType; + const int size = VECTOR_TEST_SIZE; + + VectorType _u( size ), _v( size ); + ViewType u( _u ), v( _v ); + for( int i = 0; i < size; i++ ) + { + u[ i ] = i - size / 2; + v[ i ] = sin( u[ i ] ); + } + + EXPECT_EQ( sin( u ), v ); +} + +TYPED_TEST( VectorTest, cos ) +{ + using VectorType = typename TestFixture::VectorType; + using ViewType = typename TestFixture::ViewType; + const int size = VECTOR_TEST_SIZE; + + VectorType _u( size ), _v( size ); + ViewType u( _u ), v( _v ); + for( int i = 0; i < size; i++ ) + { + u[ i ] = i - size / 2; + v[ i ] = cos( u[ i ] ); + } + + EXPECT_EQ( cos( u ), v ); +} + +TYPED_TEST( VectorTest, tan ) +{ + using VectorType = typename TestFixture::VectorType; + using ViewType = typename TestFixture::ViewType; + const int size = VECTOR_TEST_SIZE; + + VectorType _u( size ), _v( size ); + ViewType u( _u ), v( _v ); + for( int i = 0; i < size; i++ ) + { + u[ i ] = i - size / 2; + v[ i ] = tan( u[ i ] ); + } + + EXPECT_EQ( tan( u ), v ); +} + +TYPED_TEST( VectorTest, sqrt ) +{ + using VectorType = typename TestFixture::VectorType; + using ViewType = typename TestFixture::ViewType; + const int size = VECTOR_TEST_SIZE; + + VectorType _u( size ), _v( size ); + ViewType u( _u ), v( _v ); + for( int i = 0; i < size; i++ ) + { + u[ i ] = i; + v[ i ] = sqrt( u[ i ] ); + } + + EXPECT_EQ( sqrt( u ), v ); +} + +TYPED_TEST( VectorTest, cbrt ) +{ + using VectorType = typename TestFixture::VectorType; + using ViewType = typename TestFixture::ViewType; + const int size = VECTOR_TEST_SIZE; + + VectorType _u( size ), _v( size ); + ViewType u( _u ), v( _v ); + for( int i = 0; i < size; i++ ) + { + u[ i ] = i; + v[ i ] = cbrt( u[ i ] ); + } + + EXPECT_EQ( cbrt( u ), v ); +} + +TYPED_TEST( VectorTest, pow ) +{ + using VectorType = typename TestFixture::VectorType; + using ViewType = typename TestFixture::ViewType; + using RealType = typename VectorType::RealType; + const int size = VECTOR_TEST_SIZE; + + VectorType _u( size ), _v( size ), _w( size ); + ViewType u( _u ), v( _v ), w( _w ); + for( int i = 0; i < size; i++ ) + { + u[ i ] = i - size / 2; + v[ i ] = pow( u[ i ], 2.0 ); + w[ i ] = pow( u[ i ], 3.0 ); + } + + EXPECT_EQ( pow( u, 2.0 ), v ); + EXPECT_EQ( pow( u, 3.0 ), w ); +} + +TYPED_TEST( VectorTest, floor ) +{ + using VectorType = typename TestFixture::VectorType; + using ViewType = typename TestFixture::ViewType; + const int size = VECTOR_TEST_SIZE; + + VectorType _u( size ), _v( size ); + ViewType u( _u ), v( _v ); + for( int i = 0; i < size; i++ ) + { + u[ i ] = i - size / 2; + v[ i ] = floor( u[ i ] ); + } + + EXPECT_EQ( floor( u ), v ); +} + +TYPED_TEST( VectorTest, ceil ) +{ + using VectorType = typename TestFixture::VectorType; + using ViewType = typename TestFixture::ViewType; + const int size = VECTOR_TEST_SIZE; + + VectorType _u( size ), _v( size ); + ViewType u( _u ), v( _v ); + for( int i = 0; i < size; i++ ) + { + u[ i ] = i - size / 2; + v[ i ] = ceil( u[ i ] ); + } + + EXPECT_EQ( ceil( u ), v ); +} + +TYPED_TEST( VectorTest, acos ) +{ + using VectorType = typename TestFixture::VectorType; + using ViewType = typename TestFixture::ViewType; + const int size = VECTOR_TEST_SIZE; + + VectorType _u( size ), _v( size ); + ViewType u( _u ), v( _v ); + for( int i = 0; i < size; i++ ) + { + u[ i ] = ( double )( i - size / 2 ) / ( double ) size; + v[ i ] = acos( u[ i ] ); + } + + EXPECT_EQ( acos( u ), v ); +} + +TYPED_TEST( VectorTest, asin ) +{ + using VectorType = typename TestFixture::VectorType; + using ViewType = typename TestFixture::ViewType; + const int size = VECTOR_TEST_SIZE; + + VectorType _u( size ), _v( size ); + ViewType u( _u ), v( _v ); + for( int i = 0; i < size; i++ ) + { + u[ i ] = ( double ) ( i - size / 2 ) / ( double ) size; + v[ i ] = asin( u[ i ] ); + } + + EXPECT_EQ( asin( u ), v ); +} + +TYPED_TEST( VectorTest, atan ) +{ + using VectorType = typename TestFixture::VectorType; + using ViewType = typename TestFixture::ViewType; + const int size = VECTOR_TEST_SIZE; + + VectorType _u( size ), _v( size ); + ViewType u( _u ), v( _v ); + for( int i = 0; i < size; i++ ) + { + u[ i ] = i - size / 2; + v[ i ] = atan( u[ i ] ); + } + + EXPECT_EQ( atan( u ), v ); +} + +TYPED_TEST( VectorTest, cosh ) +{ + using VectorType = typename TestFixture::VectorType; + using ViewType = typename TestFixture::ViewType; + const int size = VECTOR_TEST_SIZE; + + VectorType _u( size ), _v( size ); + ViewType u( _u ), v( _v ); + for( int i = 0; i < size; i++ ) + { + u[ i ] = i - size / 2; + v[ i ] = cosh( u[ i ] ); + } + + // EXPECT_EQ( cosh( u ), v ) does not work here for float, maybe because + // of some fast-math optimization + for( int i = 0; i < size; i++ ) + EXPECT_NEAR( cosh( u )[ i ], v[ i ], 1.0e-6 ); +} + +TYPED_TEST( VectorTest, tanh ) +{ + using VectorType = typename TestFixture::VectorType; + using ViewType = typename TestFixture::ViewType; + const int size = VECTOR_TEST_SIZE; + + VectorType _u( size ), _v( size ); + ViewType u( _u ), v( _v ); + for( int i = 0; i < size; i++ ) + { + u[ i ] = i - size / 2; + v[ i ] = tanh( u[ i ] ); + } + + EXPECT_EQ( tanh( u ), v ); +} + +TYPED_TEST( VectorTest, log ) +{ + using VectorType = typename TestFixture::VectorType; + using ViewType = typename TestFixture::ViewType; + const int size = VECTOR_TEST_SIZE; + + VectorType _u( size ), _v( size ); + ViewType u( _u ), v( _v ); + for( int i = 0; i < size; i++ ) + { + u[ i ] = i + 1; + v[ i ] = log( u[ i ] ); + } + + EXPECT_EQ( log( u ), v ); +} + +TYPED_TEST( VectorTest, log10 ) +{ + using VectorType = typename TestFixture::VectorType; + using ViewType = typename TestFixture::ViewType; + const int size = VECTOR_TEST_SIZE; + + VectorType _u( size ), _v( size ); + ViewType u( _u ), v( _v ); + for( int i = 0; i < size; i++ ) + { + u[ i ] = i + 1; + v[ i ] = log10( u[ i ] ); + } + + // EXPECT_EQ( log10( u ), v ) does not work here for float, maybe because + // of some fast-math optimization + for( int i = 0; i < size; i++ ) + EXPECT_NEAR( log10( u )[ i ], v[ i ], 1.0e-6 ); +} + +TYPED_TEST( VectorTest, log2 ) +{ + using VectorType = typename TestFixture::VectorType; + using ViewType = typename TestFixture::ViewType; + const int size = VECTOR_TEST_SIZE; + + VectorType _u( size ), _v( size ); + ViewType u( _u ), v( _v ); + for( int i = 0; i < size; i++ ) + { + u[ i ] = i + 1; + v[ i ] = log2( u[ i ] ); + } + + EXPECT_EQ( log2( u ), v ); +} + +TYPED_TEST( VectorTest, exp ) +{ + using VectorType = typename TestFixture::VectorType; + using ViewType = typename TestFixture::ViewType; + const int size = VECTOR_TEST_SIZE; + + VectorType _u( size ), _v( size ); + ViewType u( _u ), v( _v ); + for( int i = 0; i < size; i++ ) + { + u[ i ] = i - size / 2; + v[ i ] = exp( u[ i ] ); + } + + EXPECT_EQ( exp( u ), v ); +} + +TYPED_TEST( VectorTest, sign ) +{ + using VectorType = typename TestFixture::VectorType; + using ViewType = typename TestFixture::ViewType; + const int size = VECTOR_TEST_SIZE; + + VectorType _u( size ), _v( size ); + ViewType u( _u ), v( _v ); + for( int i = 0; i < size; i++ ) + { + u[ i ] = i - size / 2; + v[ i ] = sign( u[ i ] ); + } + + EXPECT_EQ( sign( u ), v ); +} + // TODO: test prefix sum with custom begin and end parameters TEST( VectorSpecialCasesTest, sumOfBoolVector ) @@ -764,6 +1080,15 @@ TEST( VectorSpecialCasesTest, assignmentThroughView ) using VectorType = Containers::Vector< int, Devices::Host >; using ViewType = VectorView< int, Devices::Host >; + using T = decltype(std::declval< VectorType >()[0]); + //:T t( 0 ); + + static_assert( Algorithms::Details::HasGetArrayData< Array< int, Devices::Host> >::value == true, "Subscript operator detection by SFINAE does not work for Vector." ); + static_assert( Algorithms::Details::HasSubscriptOperator< StaticVector< 3, double> >::value, "Subscript operator detection by SFINAE does not work for Vector." ); + static_assert( Algorithms::Details::HasSubscriptOperator< Array< int, Devices::Host> >::value == true, "Subscript operator detection by SFINAE does not work for Vector." ); + static_assert( Algorithms::Details::HasSubscriptOperator< VectorType >::value, "Subscript operator detection by SFINAE does not work for Vector." ); + static_assert( Algorithms::Details::HasSubscriptOperator< ViewType >::value, "Subscript operator detection by SFINAE does not work for VectorView." ); + VectorType u( 100 ), v( 100 ); ViewType u_view( u ), v_view( v ); @@ -841,6 +1166,10 @@ TEST( VectorSpecialCasesTest, defaultConstructors ) EXPECT_EQ( v_view.getData(), a_view.getData() ); } + + + + #endif // HAVE_GTEST -- GitLab From 0ed6586d89edc3d8a621d6eca8e75c158cd4c6d2 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Mon, 29 Apr 2019 16:13:37 +0200 Subject: [PATCH 17/93] Fixing expression templates for VectorView. --- src/TNL/Containers/Algorithms/VectorAssignment.h | 9 +++++---- src/TNL/Containers/StaticVector_impl.h | 4 ++-- src/TNL/Containers/Vector.h | 4 +++- src/TNL/Containers/Vector.hpp | 12 +++++++++++- src/TNL/Containers/VectorView_impl.h | 1 + src/UnitTests/Containers/StaticVectorTest.cpp | 2 ++ src/UnitTests/Containers/VectorTest.h | 14 +++++++------- 7 files changed, 31 insertions(+), 15 deletions(-) diff --git a/src/TNL/Containers/Algorithms/VectorAssignment.h b/src/TNL/Containers/Algorithms/VectorAssignment.h index 0379622b7..a87d0bc1b 100644 --- a/src/TNL/Containers/Algorithms/VectorAssignment.h +++ b/src/TNL/Containers/Algorithms/VectorAssignment.h @@ -21,6 +21,7 @@ namespace Algorithms { namespace Details { /** * SFINAE for checking if T has getSize method + * TODO: We should better test operator[] but we need to know the indexing type. */ template< typename T > class HasSubscriptOperator @@ -29,7 +30,7 @@ private: typedef char YesType[1]; typedef char NoType[2]; - template< typename C > static YesType& test( decltype(std::declval< C >()[0]) ); + template< typename C > static YesType& test( decltype(std::declval< C >().getSize() ) ); template< typename C > static NoType& test(...); public: @@ -68,13 +69,13 @@ struct VectorAssignment< Vector, T, true > using RealType = typename Vector::RealType; using DeviceType = typename Vector::DeviceType; using IndexType = typename Vector::IndexType; - + RealType* data = v.getData(); auto ass = [=] __cuda_callable__ ( IndexType i ) { data[ i ] = t[ i ]; }; - ParallelFor< DeviceType >::exec( 0, v.getSize(), ass ); + ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), ass ); }; }; @@ -98,7 +99,7 @@ struct VectorAssignment< Vector, T, false > for( decltype( v.getSize() ) i = 0; i < v.getSize(); i ++ ) v[ i ] = t; }; - + static void assign( Vector& v, const T& t ) { TNL_ASSERT_EQ( v.getSize(), t.getSize(), "The sizes of the vectors must be equal." ); diff --git a/src/TNL/Containers/StaticVector_impl.h b/src/TNL/Containers/StaticVector_impl.h index 4f92c56aa..763fb1e3e 100644 --- a/src/TNL/Containers/StaticVector_impl.h +++ b/src/TNL/Containers/StaticVector_impl.h @@ -58,7 +58,7 @@ template< int Size, typename Real > StaticVector< Size, Real >::StaticVector( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& op ) { static_assert( Expressions::BinaryExpressionTemplate< T1, T2, Operation >::isStatic(), "Attempt to assign non-static expression to static vector." ); - Algorithms::VectorAssignment< StaticVector< Size, Real >, Expressions::BinaryExpressionTemplate< T1, T2, Operation > >::assign( *this, op ); + Algorithms::VectorAssignment< StaticVector< Size, Real >, Expressions::BinaryExpressionTemplate< T1, T2, Operation > >::assignStatic( *this, op ); }; template< int Size, @@ -69,7 +69,7 @@ __cuda_callable__ StaticVector< Size, Real >::StaticVector( const Expressions::UnaryExpressionTemplate< T, Operation >& op ) { static_assert( Expressions::UnaryExpressionTemplate< T, Operation >::isStatic(), "Attempt to assign non-static expression to static vector." ); - Algorithms::VectorAssignment< StaticVector< Size, Real >, Expressions::UnaryExpressionTemplate< T, Operation > >::assign( *this, op ); + Algorithms::VectorAssignment< StaticVector< Size, Real >, Expressions::UnaryExpressionTemplate< T, Operation > >::assignStatic( *this, op ); }; template< int Size, typename Real > diff --git a/src/TNL/Containers/Vector.h b/src/TNL/Containers/Vector.h index 2c3a88cd4..782f03ff7 100644 --- a/src/TNL/Containers/Vector.h +++ b/src/TNL/Containers/Vector.h @@ -168,11 +168,13 @@ public: void addElement( const IndexType i, const RealType& value, const Scalar thisElementMultiplicator ); - + __cuda_callable__ Real& operator[]( const Index& i ); __cuda_callable__ const Real& operator[]( const Index& i ) const; + Vector& operator = ( const Vector& v ); + template< typename VectorExpression > Vector& operator = ( const VectorExpression& expression ); diff --git a/src/TNL/Containers/Vector.hpp b/src/TNL/Containers/Vector.hpp index cad3b758d..271d43e71 100644 --- a/src/TNL/Containers/Vector.hpp +++ b/src/TNL/Containers/Vector.hpp @@ -208,6 +208,16 @@ operator[]( const Index& i ) const return this->operator[]( i ); } +template< typename Real, + typename Device, + typename Index > +Vector< Real, Device, Index >& +Vector< Real, Device, Index >::operator = ( const Vector& vector ) +{ + Array< Real, Device, Index >::operator=( vector ); + return *this; +} + template< typename Real, typename Device, typename Index > @@ -216,9 +226,9 @@ Vector< Real, Device, Index >& Vector< Real, Device, Index >::operator = ( const VectorExpression& expression ) { Algorithms::VectorAssignment< Vector< Real, Device, Index >, VectorExpression >::assign( *this, expression ); + return *this; } - template< typename Real, typename Device, typename Index > diff --git a/src/TNL/Containers/VectorView_impl.h b/src/TNL/Containers/VectorView_impl.h index 0a4c5b590..4bb1d8f7f 100644 --- a/src/TNL/Containers/VectorView_impl.h +++ b/src/TNL/Containers/VectorView_impl.h @@ -158,6 +158,7 @@ VectorView< Real, Device, Index >& VectorView< Real, Device, Index >::operator = ( const VectorExpression& expression ) { Algorithms::VectorAssignment< VectorView< Real, Device, Index >, VectorExpression >::assign( *this, expression ); + return *this; } template< typename Real, diff --git a/src/UnitTests/Containers/StaticVectorTest.cpp b/src/UnitTests/Containers/StaticVectorTest.cpp index c092a6b97..1e1d00d7d 100644 --- a/src/UnitTests/Containers/StaticVectorTest.cpp +++ b/src/UnitTests/Containers/StaticVectorTest.cpp @@ -94,6 +94,8 @@ TYPED_TEST( StaticVectorTest, operators ) using VectorType = typename TestFixture::VectorType; constexpr int size = VectorType::size; + static_assert( Algorithms::Details::HasSubscriptOperator< VectorType >::value, "Subscript operator detection by SFINAE does not work for StaticVector." ); + VectorType u1( 1 ), u2( 2 ), u3( 3 ); u1 += u2; diff --git a/src/UnitTests/Containers/VectorTest.h b/src/UnitTests/Containers/VectorTest.h index 4bc4d9229..93abef4c8 100644 --- a/src/UnitTests/Containers/VectorTest.h +++ b/src/UnitTests/Containers/VectorTest.h @@ -150,8 +150,14 @@ TYPED_TEST( VectorTest, constructors ) using VectorType = typename TestFixture::VectorType; const int size = VECTOR_TEST_SIZE; + // TODO: Does not work yet. + /*VectorType empty_u; + VectorType empty_v( empty_u ); + EXPECT_EQ( empty_u.getSize(), 0 ); + EXPECT_EQ( empty_v.getSize(), 0 );*/ + VectorType u( size ); - EXPECT_EQ( u.getSize(), 0 ); + EXPECT_EQ( u.getSize(), size ); VectorType v( 10 ); EXPECT_EQ( v.getSize(), 10 ); @@ -1080,12 +1086,6 @@ TEST( VectorSpecialCasesTest, assignmentThroughView ) using VectorType = Containers::Vector< int, Devices::Host >; using ViewType = VectorView< int, Devices::Host >; - using T = decltype(std::declval< VectorType >()[0]); - //:T t( 0 ); - - static_assert( Algorithms::Details::HasGetArrayData< Array< int, Devices::Host> >::value == true, "Subscript operator detection by SFINAE does not work for Vector." ); - static_assert( Algorithms::Details::HasSubscriptOperator< StaticVector< 3, double> >::value, "Subscript operator detection by SFINAE does not work for Vector." ); - static_assert( Algorithms::Details::HasSubscriptOperator< Array< int, Devices::Host> >::value == true, "Subscript operator detection by SFINAE does not work for Vector." ); static_assert( Algorithms::Details::HasSubscriptOperator< VectorType >::value, "Subscript operator detection by SFINAE does not work for Vector." ); static_assert( Algorithms::Details::HasSubscriptOperator< ViewType >::value, "Subscript operator detection by SFINAE does not work for VectorView." ); -- GitLab From 19cd813bc18596cf6a3464a5ab52fa3efb4d9c50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Mon, 29 Apr 2019 20:58:35 +0200 Subject: [PATCH 18/93] Fixes of expression templates for VectorView. --- .../Containers/Algorithms/VectorAssignment.h | 3 +- src/TNL/Containers/ArrayView.hpp | 2 +- src/TNL/Containers/Expressions/Comparison.h | 159 ++++++++++++++++++ .../Expressions/ExpressionTemplates.h | 2 +- .../Containers/Expressions/StaticComparison.h | 81 --------- src/TNL/Containers/Partitioner.h | 4 +- src/TNL/Containers/StaticVectorExpressions.h | 4 +- src/TNL/Containers/Vector.hpp | 4 +- src/TNL/Containers/VectorViewExpressions.h | 38 ++--- src/TNL/Containers/VectorView_impl.h | 56 ------ src/UnitTests/Containers/ArrayViewTest.h | 12 +- src/UnitTests/Containers/VectorTest.h | 17 +- 12 files changed, 208 insertions(+), 174 deletions(-) create mode 100644 src/TNL/Containers/Expressions/Comparison.h delete mode 100644 src/TNL/Containers/Expressions/StaticComparison.h diff --git a/src/TNL/Containers/Algorithms/VectorAssignment.h b/src/TNL/Containers/Algorithms/VectorAssignment.h index a87d0bc1b..5cde22f18 100644 --- a/src/TNL/Containers/Algorithms/VectorAssignment.h +++ b/src/TNL/Containers/Algorithms/VectorAssignment.h @@ -102,7 +102,6 @@ struct VectorAssignment< Vector, T, false > static void assign( Vector& v, const T& t ) { - TNL_ASSERT_EQ( v.getSize(), t.getSize(), "The sizes of the vectors must be equal." ); using RealType = typename Vector::RealType; using DeviceType = typename Vector::DeviceType; using IndexType = typename Vector::IndexType; @@ -112,7 +111,7 @@ struct VectorAssignment< Vector, T, false > { data[ i ] = t; }; - ParallelFor< DeviceType >::exec( 0, v.getSize(), ass ); + ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), ass ); } }; diff --git a/src/TNL/Containers/ArrayView.hpp b/src/TNL/Containers/ArrayView.hpp index 83cc81db2..f3a0b4ecf 100644 --- a/src/TNL/Containers/ArrayView.hpp +++ b/src/TNL/Containers/ArrayView.hpp @@ -127,7 +127,7 @@ template< typename Value, template< typename T > ArrayView< Value, Device, Index >& ArrayView< Value, Device, Index >:: -operator = ( const T& data ) +operator=( const T& data ) { Algorithms::ArrayAssignment< ArrayView, T >::assign( *this, data ); return *this; diff --git a/src/TNL/Containers/Expressions/Comparison.h b/src/TNL/Containers/Expressions/Comparison.h new file mode 100644 index 000000000..7265adb79 --- /dev/null +++ b/src/TNL/Containers/Expressions/Comparison.h @@ -0,0 +1,159 @@ +/*************************************************************************** + StaticComparison.h - description + ------------------- + begin : Apr 19, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include +#include + +namespace TNL { + namespace Containers { + namespace Expressions { + +template< typename T1, + typename T2 > +__cuda_callable__ +bool StaticComparisonEQ( const T1& a, const T2& b ) +{ + TNL_ASSERT_EQ( a.getSize(), b.getSize(), "Sizes of expressions to be compared do not fit." ); + for( int i = 0; i < a.getSize(); i++ ) + if( a[ i ] != b[ i ] ) + return false; + return true; +} + +template< typename T1, + typename T2 > +__cuda_callable__ +bool StaticComparisonNE( const T1& a, const T2& b ) +{ + return ! StaticComparisonEQ( a, b ); +} + +template< typename T1, + typename T2 > +__cuda_callable__ +bool StaticComparisonGT( const T1& a, const T2& b ) +{ + TNL_ASSERT_EQ( a.getSize(), b.getSize(), "Sizes of expressions to be compared do not fit." ); + for( int i = 0; i < a.getSize(); i++ ) + if( a[ i ] <= b[ i ] ) + return false; + return true; +} + +template< typename T1, + typename T2 > +__cuda_callable__ +bool StaticComparisonLE( const T1& a, const T2& b ) +{ + return ! StaticComparisonGT( a, b ); +} + +template< typename T1, + typename T2 > +__cuda_callable__ +bool StaticComparisonLT( const T1& a, const T2& b ) +{ + TNL_ASSERT_EQ( a.getSize(), b.getSize(), "Sizes of expressions to be compared do not fit." ); + for( int i = 0; i < a.getSize(); i++ ) + if( a[ i ] >= b[ i ] ) + return false; + return true; +} + +template< typename T1, + typename T2 > +__cuda_callable__ +bool StaticComparisonGE( const T1& a, const T2& b ) +{ + return ! StaticComparisonLT( a, b ); +} + +//// +// Non-static comparison +template< typename T1, + typename T2 > +__cuda_callable__ +bool ComparisonEQ( const T1& a, const T2& b ) +{ + if( a.getSize() != b.getSize() ) + return false; + if( a.getSize() == 0 ) + return true; + + using DeviceType = typename T1::DeviceType; + using IndexType = typename T1::IndexType; + + auto fetch = [=] __cuda_callable__ ( IndexType i ) -> bool { return ( a[ i ] == b[ i ] ); }; + auto reduction = [=] __cuda_callable__ ( bool& a, const bool& b ) { a &= b; }; + auto volatileReduction = [=] __cuda_callable__ ( volatile bool& a, volatile bool& b ) { a &= b; }; + return Algorithms::Reduction< DeviceType >::reduce( a.getSize(), reduction, volatileReduction, fetch, true ); +} + +template< typename T1, + typename T2 > +__cuda_callable__ +bool ComparisonNE( const T1& a, const T2& b ) +{ + return ! ComparisonEQ( a, b ); +} + +template< typename T1, + typename T2 > +__cuda_callable__ +bool ComparisonGT( const T1& a, const T2& b ) +{ + TNL_ASSERT_EQ( a.getSize(), b.getSize(), "Sizes of expressions to be compared do not fit." ); + + using DeviceType = typename T1::DeviceType; + using IndexType = typename T1::IndexType; + + auto fetch = [=] __cuda_callable__ ( IndexType i ) -> bool { return ( a[ i ] > b[ i ] ); }; + auto reduction = [=] __cuda_callable__ ( bool& a, const bool& b ) { a &= b; }; + auto volatileReduction = [=] __cuda_callable__ ( volatile bool& a, volatile bool& b ) { a &= b; }; + return Algorithms::Reduction< DeviceType >::reduce( a.getSize(), reduction, volatileReduction, fetch, true ); +} + +template< typename T1, + typename T2 > +__cuda_callable__ +bool ComparisonLE( const T1& a, const T2& b ) +{ + return ! ComparisonGT( a, b ); +} + +template< typename T1, + typename T2 > +__cuda_callable__ +bool ComparisonLT( const T1& a, const T2& b ) +{ + TNL_ASSERT_EQ( a.getSize(), b.getSize(), "Sizes of expressions to be compared do not fit." ); + + using DeviceType = typename T1::DeviceType; + using IndexType = typename T1::IndexType; + + auto fetch = [=] __cuda_callable__ ( IndexType i ) -> bool { return ( a[ i ] < b[ i ] ); }; + auto reduction = [=] __cuda_callable__ ( bool& a, const bool& b ) { a &= b; }; + auto volatileReduction = [=] __cuda_callable__ ( volatile bool& a, volatile bool& b ) { a &= b; }; + return Algorithms::Reduction< DeviceType >::reduce( a.getSize(), reduction, volatileReduction, fetch, true ); +} + +template< typename T1, + typename T2 > +__cuda_callable__ +bool ComparisonGE( const T1& a, const T2& b ) +{ + return ! ComparisonLT( a, b ); +} + + } //namespace Expressions + } // namespace Containers +} // namespace TNL diff --git a/src/TNL/Containers/Expressions/ExpressionTemplates.h b/src/TNL/Containers/Expressions/ExpressionTemplates.h index cdc998684..b8376b71d 100644 --- a/src/TNL/Containers/Expressions/ExpressionTemplates.h +++ b/src/TNL/Containers/Expressions/ExpressionTemplates.h @@ -13,7 +13,7 @@ #include #include #include -#include +#include namespace TNL { namespace Containers { diff --git a/src/TNL/Containers/Expressions/StaticComparison.h b/src/TNL/Containers/Expressions/StaticComparison.h deleted file mode 100644 index 3f254fbd0..000000000 --- a/src/TNL/Containers/Expressions/StaticComparison.h +++ /dev/null @@ -1,81 +0,0 @@ -/*************************************************************************** - StaticComparison.h - description - ------------------- - begin : Apr 19, 2019 - copyright : (C) 2019 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/* See Copyright Notice in tnl/Copyright */ - -#pragma once - -#include - -namespace TNL { - namespace Containers { - namespace Expressions { - -template< typename T1, - typename T2 > -__cuda_callable__ -bool StaticComparisonEQ( const T1& a, const T2& b ) -{ - TNL_ASSERT_EQ( a.getSize(), b.getSize(), "Sizes of expressions to be compared do not fit." ); - for( int i = 0; i < a.getSize(); i++ ) - if( a[ i ] != b[ i ] ) - return false; - return true; -} - -template< typename T1, - typename T2 > -__cuda_callable__ -bool StaticComparisonNE( const T1& a, const T2& b ) -{ - return ! StaticComparisonEQ( a, b ); -} - -template< typename T1, - typename T2 > -__cuda_callable__ -bool StaticComparisonGT( const T1& a, const T2& b ) -{ - TNL_ASSERT_EQ( a.getSize(), b.getSize(), "Sizes of expressions to be compared do not fit." ); - for( int i = 0; i < a.getSize(); i++ ) - if( a[ i ] <= b[ i ] ) - return false; - return true; -} - -template< typename T1, - typename T2 > -__cuda_callable__ -bool StaticComparisonLE( const T1& a, const T2& b ) -{ - return ! StaticComparisonGT( a, b ); -} - -template< typename T1, - typename T2 > -__cuda_callable__ -bool StaticComparisonLT( const T1& a, const T2& b ) -{ - TNL_ASSERT_EQ( a.getSize(), b.getSize(), "Sizes of expressions to be compared do not fit." ); - for( int i = 0; i < a.getSize(); i++ ) - if( a[ i ] >= b[ i ] ) - return false; - return true; -} - -template< typename T1, - typename T2 > -__cuda_callable__ -bool StaticComparisonGE( const T1& a, const T2& b ) -{ - return ! StaticComparisonLT( a, b ); -} - - } //namespace Expressions - } // namespace Containers -} // namespace TNL \ No newline at end of file diff --git a/src/TNL/Containers/Partitioner.h b/src/TNL/Containers/Partitioner.h index b886c40a6..f0b507475 100644 --- a/src/TNL/Containers/Partitioner.h +++ b/src/TNL/Containers/Partitioner.h @@ -31,8 +31,8 @@ public: if( group != Communicator::NullGroup ) { const int rank = Communicator::GetRank( group ); const int partitions = Communicator::GetSize( group ); - const Index begin = min( globalSize, rank * globalSize / partitions ); - const Index end = min( globalSize, (rank + 1) * globalSize / partitions ); + const Index begin = TNL::min( globalSize, rank * globalSize / partitions ); + const Index end = TNL::min( globalSize, (rank + 1) * globalSize / partitions ); return SubrangeType( begin, end ); } else diff --git a/src/TNL/Containers/StaticVectorExpressions.h b/src/TNL/Containers/StaticVectorExpressions.h index 4d7eefd94..95d52d024 100644 --- a/src/TNL/Containers/StaticVectorExpressions.h +++ b/src/TNL/Containers/StaticVectorExpressions.h @@ -12,9 +12,7 @@ #include #include -#include - -#include "Expressions/StaticComparison.h" +#include namespace TNL { namespace Containers { diff --git a/src/TNL/Containers/Vector.hpp b/src/TNL/Containers/Vector.hpp index 271d43e71..14c47b5eb 100644 --- a/src/TNL/Containers/Vector.hpp +++ b/src/TNL/Containers/Vector.hpp @@ -194,7 +194,7 @@ inline Real& Vector< Real, Device, Index >:: operator[]( const Index& i ) { - return this->operator[]( i ); + return Array< Real, Device, Index >::operator[]( i ); } template< typename Real, @@ -205,7 +205,7 @@ inline const Real& Vector< Real, Device, Index >:: operator[]( const Index& i ) const { - return this->operator[]( i ); + return Array< Real, Device, Index >::operator[]( i ); } template< typename Real, diff --git a/src/TNL/Containers/VectorViewExpressions.h b/src/TNL/Containers/VectorViewExpressions.h index c44b72712..f807a7ba2 100644 --- a/src/TNL/Containers/VectorViewExpressions.h +++ b/src/TNL/Containers/VectorViewExpressions.h @@ -12,7 +12,7 @@ #include #include -#include +#include namespace TNL { namespace Containers { @@ -179,21 +179,21 @@ template< typename Real, typename Device, typename Index, typename ET > __cuda_callable__ bool operator==( const VectorView< Real, Device, Index >& a, const ET& b ) { - return Expressions::StaticComparisonEQ( a, b ); + return Expressions::ComparisonEQ( a, b ); } template< typename ET, typename Real, typename Device, typename Index > __cuda_callable__ bool operator==( const ET& a, const VectorView< Real, Device, Index >& b ) { - return Expressions::StaticComparisonEQ( a, b ); + return Expressions::ComparisonEQ( a, b ); } template< typename Real1, typename Real2, typename Device, typename Index > __cuda_callable__ bool operator==( const VectorView< Real1, Device, Index >& a, const VectorView< Real2, Device, Index >& b ) { - return Expressions::StaticComparisonEQ( a, b ); + return Expressions::ComparisonEQ( a, b ); } //// @@ -202,21 +202,21 @@ template< typename Real, typename Device, typename Index, typename ET > __cuda_callable__ bool operator!=( const VectorView< Real, Device, Index >& a, const ET& b ) { - return Expressions::StaticComparisonNE( a, b ); + return Expressions::ComparisonNE( a, b ); } template< typename ET, typename Real, typename Device, typename Index > __cuda_callable__ bool operator!=( const ET& a, const VectorView< Real, Device, Index >& b ) { - return Expressions::StaticComparisonNE( a, b ); + return Expressions::ComparisonNE( a, b ); } template< typename Real1, typename Real2, typename Device, typename Index > __cuda_callable__ bool operator!=( const VectorView< Real1, Device, Index >& a, const VectorView< Real2, Device, Index >& b ) { - return Expressions::StaticComparisonNE( a, b ); + return Expressions::ComparisonNE( a, b ); } //// @@ -225,21 +225,21 @@ template< typename Real, typename Device, typename Index, typename ET > __cuda_callable__ bool operator<( const VectorView< Real, Device, Index >& a, const ET& b ) { - return Expressions::StaticComparisonLT( a, b ); + return Expressions::ComparisonLT( a, b ); } template< typename ET, typename Real, typename Device, typename Index > __cuda_callable__ bool operator<( const ET& a, const VectorView< Real, Device, Index >& b ) { - return Expressions::StaticComparisonLT( a, b ); + return Expressions::ComparisonLT( a, b ); } template< typename Real1, typename Real2, typename Device, typename Index > __cuda_callable__ bool operator<( const VectorView< Real1, Device, Index >& a, const VectorView< Real2, Device, Index >& b ) { - return Expressions::StaticComparisonLT( a, b ); + return Expressions::ComparisonLT( a, b ); } //// @@ -248,21 +248,21 @@ template< typename Real, typename Device, typename Index, typename ET > __cuda_callable__ bool operator<=( const VectorView< Real, Device, Index >& a, const ET& b ) { - return Expressions::StaticComparisonLE( a, b ); + return Expressions::ComparisonLE( a, b ); } template< typename ET, typename Real, typename Device, typename Index > __cuda_callable__ bool operator<=( const ET& a, const VectorView< Real, Device, Index >& b ) { - return Expressions::StaticComparisonLE( a, b ); + return Expressions::ComparisonLE( a, b ); } template< typename Real1, typename Real2, typename Device, typename Index > __cuda_callable__ bool operator<=( const VectorView< Real1, Device, Index >& a, const VectorView< Real2, Device, Index >& b ) { - return Expressions::StaticComparisonLE( a, b ); + return Expressions::ComparisonLE( a, b ); } //// @@ -271,21 +271,21 @@ template< typename Real, typename Device, typename Index, typename ET > __cuda_callable__ bool operator>( const VectorView< Real, Device, Index >& a, const ET& b ) { - return Expressions::StaticComparisonGT( a, b ); + return Expressions::ComparisonGT( a, b ); } template< typename ET, typename Real, typename Device, typename Index > __cuda_callable__ bool operator>( const ET& a, const VectorView< Real, Device, Index >& b ) { - return Expressions::StaticComparisonGT( a, b ); + return Expressions::ComparisonGT( a, b ); } template< typename Real1, typename Real2, typename Device, typename Index > __cuda_callable__ bool operator>( const VectorView< Real1, Device, Index >& a, const VectorView< Real2, Device, Index >& b ) { - return Expressions::StaticComparisonGT( a, b ); + return Expressions::ComparisonGT( a, b ); } //// @@ -294,21 +294,21 @@ template< typename Real, typename Device, typename Index, typename ET > __cuda_callable__ bool operator>=( const VectorView< Real, Device, Index >& a, const ET& b ) { - return Expressions::StaticComparisonGE( a, b ); + return Expressions::ComparisonGE( a, b ); } template< typename ET, typename Real, typename Device, typename Index > __cuda_callable__ bool operator>=( const ET& a, const VectorView< Real, Device, Index >& b ) { - return Expressions::StaticComparisonGE( a, b ); + return Expressions::ComparisonGE( a, b ); } template< typename Real1, typename Real2, typename Device, typename Index > __cuda_callable__ bool operator>=( const VectorView< Real1, Device, Index >& a, const VectorView< Real2, Device, Index >& b ) { - return Expressions::StaticComparisonGE( a, b ); + return Expressions::ComparisonGE( a, b ); } //// diff --git a/src/TNL/Containers/VectorView_impl.h b/src/TNL/Containers/VectorView_impl.h index 4bb1d8f7f..3d53a77ff 100644 --- a/src/TNL/Containers/VectorView_impl.h +++ b/src/TNL/Containers/VectorView_impl.h @@ -73,62 +73,6 @@ getType() TNL::getType< Index >() + " >"; } - -template< typename Real, - typename Device, - typename Index > -template< typename VectorOperationType > -void -VectorView< Real, Device, Index >:: -evaluate( const VectorOperationType& vo ) -{ - Real* dt = this->data; - auto assign = [=] __cuda_callable__ ( Index i ) - { - dt[ i ] = vo[ i ]; - }; - ParallelFor< DeviceType >::exec( 0, this->getSize(), assign ); -} - -template< typename Real, - typename Device, - typename Index > -template< typename VectorOperationType > -void -VectorView< Real, Device, Index >:: -evaluateFor( const VectorOperationType& vo ) -{ - if( std::is_same< DeviceType, Devices::Host >::value ) - { - for( int i = 0; i < this->getSize(); i++ ) - { - this->data[ i ] = vo[ i ]; - } - } - /* - if( std::is_same< DeviceType, Devices::Cuda >::value ) - { - Real* dt; - VectorOperationType* expression; - - cudaMallocManaged(&dt, this->getSize * sizeof(Real)); - cudaMallocManaged(&expression, this->getSize * sizeof(Real)); - - dt = this->data; - expression = vo; - - expressionTemplatesKernel<<<(this->getSize()+255)/256, 256>>>( dt, this->getSize(), expression ); - TNL_CHECK_CUDA_DEVICE; - //cudaDeviceSynchronize(); - - //error check - - cudaFree(dt); - cudaFree(expression); - } - */ -} - template< typename Real, typename Device, typename Index > diff --git a/src/UnitTests/Containers/ArrayViewTest.h b/src/UnitTests/Containers/ArrayViewTest.h index 914d5a581..6a128db07 100644 --- a/src/UnitTests/Containers/ArrayViewTest.h +++ b/src/UnitTests/Containers/ArrayViewTest.h @@ -10,7 +10,7 @@ #pragma once -#ifdef HAVE_GTEST +#ifdef HAVE_GTEST #include #include @@ -174,10 +174,12 @@ TYPED_TEST( ArrayViewTest, constructors ) EXPECT_EQ( b_view.getData(), b.getData() ); ConstViewType const_a_view = a.getConstView(); EXPECT_EQ( const_a_view.getData(), a.getData() ); + EXPECT_EQ( const_a_view.getSize(), a.getSize() ); // test initialization of const view by non-const view ConstViewType const_b_view( b_view ); - EXPECT_EQ( const_b_view.getData(), b_view.getData() ); + EXPECT_EQ( const_b_view.getData(), b.getData() ); + EXPECT_EQ( const_b_view.getSize(), b.getSize() ); } TYPED_TEST( ArrayViewTest, bind ) @@ -317,7 +319,7 @@ void ArrayViewEvaluateTest( ArrayType& u ) { return 3 * i % 4; }; - + v.evaluate( f ); for( int i = 0; i < 10; i++ ) { @@ -373,8 +375,8 @@ TYPED_TEST( ArrayViewTest, containsOnlyValue ) TYPED_TEST( ArrayViewTest, comparisonOperator ) { - using ArrayType = typename TestFixture::ArrayType; - using ViewType = typename TestFixture::ViewType; + using ArrayType = Vector< double >; //typename TestFixture::ArrayType; + using ViewType = VectorView< double >; //typename TestFixture::ViewType; ArrayType a( 10 ), b( 10 ); typename ArrayType::HostType a_host( 10 ); diff --git a/src/UnitTests/Containers/VectorTest.h b/src/UnitTests/Containers/VectorTest.h index 93abef4c8..1c9586013 100644 --- a/src/UnitTests/Containers/VectorTest.h +++ b/src/UnitTests/Containers/VectorTest.h @@ -909,13 +909,15 @@ TYPED_TEST( VectorTest, cosh ) { using VectorType = typename TestFixture::VectorType; using ViewType = typename TestFixture::ViewType; + using RealType = typename VectorType::RealType; const int size = VECTOR_TEST_SIZE; + RealType h = 2.0 / ( RealType ) size; VectorType _u( size ), _v( size ); ViewType u( _u ), v( _v ); for( int i = 0; i < size; i++ ) { - u[ i ] = i - size / 2; + u[ i ] = i * h - ( RealType ) 1.0; v[ i ] = cosh( u[ i ] ); } @@ -1173,4 +1175,15 @@ TEST( VectorSpecialCasesTest, defaultConstructors ) #endif // HAVE_GTEST -#include "../main.h" +#include "../GtestMissingError.h" +int main( int argc, char* argv[] ) +{ + //Test(); + //return 0; +#ifdef HAVE_GTEST + ::testing::InitGoogleTest( &argc, argv ); + return RUN_ALL_TESTS(); +#else + throw GtestMissingError(); +#endif +} -- GitLab From 957e577d1e3fa682fb9a6f48001add7c63b27bc8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Tue, 30 Apr 2019 00:04:32 +0200 Subject: [PATCH 19/93] Fixing Vector assignment. --- src/TNL/Containers/Algorithms/VectorAssignment.h | 2 +- src/TNL/Containers/Vector.h | 3 +++ src/TNL/Containers/Vector.hpp | 11 +++++++++++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/src/TNL/Containers/Algorithms/VectorAssignment.h b/src/TNL/Containers/Algorithms/VectorAssignment.h index 5cde22f18..bc30bd495 100644 --- a/src/TNL/Containers/Algorithms/VectorAssignment.h +++ b/src/TNL/Containers/Algorithms/VectorAssignment.h @@ -63,7 +63,7 @@ struct VectorAssignment< Vector, T, true > v[ i ] = t[ i ]; }; - static void assign( Vector& v, const T& t ) + static void assign( Vector& v, const T t ) { TNL_ASSERT_EQ( v.getSize(), t.getSize(), "The sizes of the vectors must be equal." ); using RealType = typename Vector::RealType; diff --git a/src/TNL/Containers/Vector.h b/src/TNL/Containers/Vector.h index 782f03ff7..d1a2521b4 100644 --- a/src/TNL/Containers/Vector.h +++ b/src/TNL/Containers/Vector.h @@ -174,6 +174,9 @@ public: __cuda_callable__ const Real& operator[]( const Index& i ) const; Vector& operator = ( const Vector& v ); + + template< typename Real_, typename Device_, typename Index_ > + Vector& operator = ( const Vector< Real_, Device_, Index_ >& v ); template< typename VectorExpression > Vector& operator = ( const VectorExpression& expression ); diff --git a/src/TNL/Containers/Vector.hpp b/src/TNL/Containers/Vector.hpp index 14c47b5eb..ab5ceb697 100644 --- a/src/TNL/Containers/Vector.hpp +++ b/src/TNL/Containers/Vector.hpp @@ -241,6 +241,17 @@ operator -= ( const VectorT& vector ) return *this; } +template< typename Real, + typename Device, + typename Index > + template< typename Real_, typename Device_, typename Index_ > +Vector< Real, Device, Index >& +Vector< Real, Device, Index >::operator = ( const Vector< Real_, Device_, Index_ >& vector ) +{ + Array< Real, Device, Index >::operator=( vector ); + return *this; +} + template< typename Real, typename Device, typename Index > -- GitLab From c05c4da2c1dee955195c3ec24065601d15742994 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Tue, 30 Apr 2019 08:46:20 +0200 Subject: [PATCH 20/93] Fixing Vector with ET. --- src/TNL/Containers/Vector.h | 3 +++ src/TNL/Containers/Vector.hpp | 11 +++++++++++ 2 files changed, 14 insertions(+) diff --git a/src/TNL/Containers/Vector.h b/src/TNL/Containers/Vector.h index d1a2521b4..d3177235f 100644 --- a/src/TNL/Containers/Vector.h +++ b/src/TNL/Containers/Vector.h @@ -80,6 +80,9 @@ public: */ explicit Vector( const Vector& vector ); + template< typename Real_, typename Device_, typename Index_ > + Vector( const Vector< Real_, Device_, Index_ >& vector ); + /** * \brief Bind constructor . * diff --git a/src/TNL/Containers/Vector.hpp b/src/TNL/Containers/Vector.hpp index ab5ceb697..97b5f17fe 100644 --- a/src/TNL/Containers/Vector.hpp +++ b/src/TNL/Containers/Vector.hpp @@ -52,6 +52,17 @@ Vector( const Vector< Real, Device, Index >& vector ) { } +template< typename Real, + typename Device, + typename Index > + template< typename Real_, typename Device_, typename Index_ > +Vector< Real, Device, Index >:: +Vector( const Vector< Real_, Device_, Index_ >& vector ) +: Array< Real, Device, Index >( vector ) +{ +} + + template< typename Real, typename Device, typename Index > -- GitLab From 8cbc1e8a8c5702db627f153aff9fdb7a23be6d6a Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Tue, 30 Apr 2019 20:13:42 +0200 Subject: [PATCH 21/93] Fixing ET with CUDA. --- src/TNL/Containers/VectorView.h | 6 ++++ src/TNL/Containers/VectorView_impl.h | 23 ++++++++++++++ src/UnitTests/Containers/VectorTest.h | 43 +++++++++++++++------------ 3 files changed, 53 insertions(+), 19 deletions(-) diff --git a/src/TNL/Containers/VectorView.h b/src/TNL/Containers/VectorView.h index 5d3197494..6610f93f1 100644 --- a/src/TNL/Containers/VectorView.h +++ b/src/TNL/Containers/VectorView.h @@ -113,6 +113,12 @@ public: template< typename Scalar > VectorView& operator/=( Scalar c ); + template< typename Real_, typename Device_, typename Index_ > + bool operator==( const VectorView< Real_, Device_, Index_ >& v ); + + template< typename Real_, typename Device_, typename Index_ > + bool operator!=( const VectorView< Real_, Device_, Index_ >& v ); + NonConstReal max() const; NonConstReal min() const; diff --git a/src/TNL/Containers/VectorView_impl.h b/src/TNL/Containers/VectorView_impl.h index 3d53a77ff..3efcbe148 100644 --- a/src/TNL/Containers/VectorView_impl.h +++ b/src/TNL/Containers/VectorView_impl.h @@ -153,6 +153,29 @@ operator/=( Scalar c ) return *this; } +template< typename Real, + typename Device, + typename Index > + template< typename Real_, typename Device_, typename Index_ > +bool +VectorView< Real, Device, Index >:: +operator==( const VectorView< Real_, Device_, Index_ >& v ) +{ + return ArrayView< Real, Device, Index >::operator ==( v ); +} + +template< typename Real, + typename Device, + typename Index > + template< typename Real_, typename Device_, typename Index_ > +bool +VectorView< Real, Device, Index >:: +operator!=( const VectorView< Real_, Device_, Index_ >& v ) +{ + return !ArrayView< Real, Device, Index >::operator ==( v ); +} + + template< typename Real, typename Device, typename Index > diff --git a/src/UnitTests/Containers/VectorTest.h b/src/UnitTests/Containers/VectorTest.h index 1c9586013..737b07965 100644 --- a/src/UnitTests/Containers/VectorTest.h +++ b/src/UnitTests/Containers/VectorTest.h @@ -726,12 +726,16 @@ TYPED_TEST( VectorTest, sin ) for( int i = 0; i < size; i++ ) { u[ i ] = i - size / 2; - v[ i ] = sin( u[ i ] ); + v[ i ] = TNL::sin( u[ i ] ); } EXPECT_EQ( sin( u ), v ); } +//// +// Performing all test leads to extremely long compilation time with nvcc +// TODO: Try to fix it somehow +/* TYPED_TEST( VectorTest, cos ) { using VectorType = typename TestFixture::VectorType; @@ -743,7 +747,7 @@ TYPED_TEST( VectorTest, cos ) for( int i = 0; i < size; i++ ) { u[ i ] = i - size / 2; - v[ i ] = cos( u[ i ] ); + v[ i ] = TNL::cos( u[ i ] ); } EXPECT_EQ( cos( u ), v ); @@ -760,7 +764,7 @@ TYPED_TEST( VectorTest, tan ) for( int i = 0; i < size; i++ ) { u[ i ] = i - size / 2; - v[ i ] = tan( u[ i ] ); + v[ i ] = TNL::tan( u[ i ] ); } EXPECT_EQ( tan( u ), v ); @@ -777,7 +781,7 @@ TYPED_TEST( VectorTest, sqrt ) for( int i = 0; i < size; i++ ) { u[ i ] = i; - v[ i ] = sqrt( u[ i ] ); + v[ i ] = TNL::sqrt( u[ i ] ); } EXPECT_EQ( sqrt( u ), v ); @@ -794,7 +798,7 @@ TYPED_TEST( VectorTest, cbrt ) for( int i = 0; i < size; i++ ) { u[ i ] = i; - v[ i ] = cbrt( u[ i ] ); + v[ i ] = TNL::cbrt( u[ i ] ); } EXPECT_EQ( cbrt( u ), v ); @@ -812,8 +816,8 @@ TYPED_TEST( VectorTest, pow ) for( int i = 0; i < size; i++ ) { u[ i ] = i - size / 2; - v[ i ] = pow( u[ i ], 2.0 ); - w[ i ] = pow( u[ i ], 3.0 ); + v[ i ] = TNL::pow( u[ i ], 2.0 ); + w[ i ] = TNL::pow( u[ i ], 3.0 ); } EXPECT_EQ( pow( u, 2.0 ), v ); @@ -831,7 +835,7 @@ TYPED_TEST( VectorTest, floor ) for( int i = 0; i < size; i++ ) { u[ i ] = i - size / 2; - v[ i ] = floor( u[ i ] ); + v[ i ] = TNL::floor( u[ i ] ); } EXPECT_EQ( floor( u ), v ); @@ -848,7 +852,7 @@ TYPED_TEST( VectorTest, ceil ) for( int i = 0; i < size; i++ ) { u[ i ] = i - size / 2; - v[ i ] = ceil( u[ i ] ); + v[ i ] = TNL::ceil( u[ i ] ); } EXPECT_EQ( ceil( u ), v ); @@ -865,7 +869,7 @@ TYPED_TEST( VectorTest, acos ) for( int i = 0; i < size; i++ ) { u[ i ] = ( double )( i - size / 2 ) / ( double ) size; - v[ i ] = acos( u[ i ] ); + v[ i ] = TNL::acos( u[ i ] ); } EXPECT_EQ( acos( u ), v ); @@ -882,7 +886,7 @@ TYPED_TEST( VectorTest, asin ) for( int i = 0; i < size; i++ ) { u[ i ] = ( double ) ( i - size / 2 ) / ( double ) size; - v[ i ] = asin( u[ i ] ); + v[ i ] = TNL::asin( u[ i ] ); } EXPECT_EQ( asin( u ), v ); @@ -899,7 +903,7 @@ TYPED_TEST( VectorTest, atan ) for( int i = 0; i < size; i++ ) { u[ i ] = i - size / 2; - v[ i ] = atan( u[ i ] ); + v[ i ] = TNL::atan( u[ i ] ); } EXPECT_EQ( atan( u ), v ); @@ -918,7 +922,7 @@ TYPED_TEST( VectorTest, cosh ) for( int i = 0; i < size; i++ ) { u[ i ] = i * h - ( RealType ) 1.0; - v[ i ] = cosh( u[ i ] ); + v[ i ] = TNL::cosh( u[ i ] ); } // EXPECT_EQ( cosh( u ), v ) does not work here for float, maybe because @@ -938,7 +942,7 @@ TYPED_TEST( VectorTest, tanh ) for( int i = 0; i < size; i++ ) { u[ i ] = i - size / 2; - v[ i ] = tanh( u[ i ] ); + v[ i ] = TNL::tanh( u[ i ] ); } EXPECT_EQ( tanh( u ), v ); @@ -955,7 +959,7 @@ TYPED_TEST( VectorTest, log ) for( int i = 0; i < size; i++ ) { u[ i ] = i + 1; - v[ i ] = log( u[ i ] ); + v[ i ] = TNL::log( u[ i ] ); } EXPECT_EQ( log( u ), v ); @@ -972,7 +976,7 @@ TYPED_TEST( VectorTest, log10 ) for( int i = 0; i < size; i++ ) { u[ i ] = i + 1; - v[ i ] = log10( u[ i ] ); + v[ i ] = TNL::log10( u[ i ] ); } // EXPECT_EQ( log10( u ), v ) does not work here for float, maybe because @@ -992,7 +996,7 @@ TYPED_TEST( VectorTest, log2 ) for( int i = 0; i < size; i++ ) { u[ i ] = i + 1; - v[ i ] = log2( u[ i ] ); + v[ i ] = TNL::log2( u[ i ] ); } EXPECT_EQ( log2( u ), v ); @@ -1009,7 +1013,7 @@ TYPED_TEST( VectorTest, exp ) for( int i = 0; i < size; i++ ) { u[ i ] = i - size / 2; - v[ i ] = exp( u[ i ] ); + v[ i ] = TNL::exp( u[ i ] ); } EXPECT_EQ( exp( u ), v ); @@ -1026,11 +1030,12 @@ TYPED_TEST( VectorTest, sign ) for( int i = 0; i < size; i++ ) { u[ i ] = i - size / 2; - v[ i ] = sign( u[ i ] ); + v[ i ] = TNL::sign( u[ i ] ); } EXPECT_EQ( sign( u ), v ); } +*/ // TODO: test prefix sum with custom begin and end parameters -- GitLab From a06d7b8ec5220b984d8a9d690c43161d58bc208d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Wed, 1 May 2019 16:23:28 +0200 Subject: [PATCH 22/93] Expression templates are working. --- .../Containers/Algorithms/VectorAssignment.h | 4 +- .../Expressions/ExpressionTemplates.h | 58 +- src/TNL/Containers/Vector.h | 6 + src/TNL/Containers/Vector.hpp | 12 + src/TNL/Containers/VectorView.h | 8 +- src/TNL/Containers/VectorView_impl.h | 24 +- src/UnitTests/Containers/ArrayViewTest.h | 11 +- src/UnitTests/Containers/CMakeLists.txt | 43 +- src/UnitTests/Containers/VectorTest-1.cpp | 11 + src/UnitTests/Containers/VectorTest-1.cu | 11 + src/UnitTests/Containers/VectorTest-1.h | 180 +++ src/UnitTests/Containers/VectorTest-2.cpp | 11 + src/UnitTests/Containers/VectorTest-2.cu | 11 + src/UnitTests/Containers/VectorTest-2.h | 165 +++ src/UnitTests/Containers/VectorTest-3.cpp | 11 + src/UnitTests/Containers/VectorTest-3.cu | 11 + src/UnitTests/Containers/VectorTest-3.h | 183 +++ .../{VectorTest.cu => VectorTest-4.cpp} | 8 +- .../{VectorTest.cpp => VectorTest-4.cu} | 8 +- src/UnitTests/Containers/VectorTest-4.h | 238 ++++ src/UnitTests/Containers/VectorTest-5.cpp | 11 + src/UnitTests/Containers/VectorTest-5.cu | 11 + src/UnitTests/Containers/VectorTest-5.h | 564 ++++++++ src/UnitTests/Containers/VectorTest.h | 1194 ----------------- src/UnitTests/Containers/VectorTestSetup.h | 143 ++ 25 files changed, 1716 insertions(+), 1221 deletions(-) create mode 100644 src/UnitTests/Containers/VectorTest-1.cpp create mode 100644 src/UnitTests/Containers/VectorTest-1.cu create mode 100644 src/UnitTests/Containers/VectorTest-1.h create mode 100644 src/UnitTests/Containers/VectorTest-2.cpp create mode 100644 src/UnitTests/Containers/VectorTest-2.cu create mode 100644 src/UnitTests/Containers/VectorTest-2.h create mode 100644 src/UnitTests/Containers/VectorTest-3.cpp create mode 100644 src/UnitTests/Containers/VectorTest-3.cu create mode 100644 src/UnitTests/Containers/VectorTest-3.h rename src/UnitTests/Containers/{VectorTest.cu => VectorTest-4.cpp} (67%) rename src/UnitTests/Containers/{VectorTest.cpp => VectorTest-4.cu} (68%) create mode 100644 src/UnitTests/Containers/VectorTest-4.h create mode 100644 src/UnitTests/Containers/VectorTest-5.cpp create mode 100644 src/UnitTests/Containers/VectorTest-5.cu create mode 100644 src/UnitTests/Containers/VectorTest-5.h delete mode 100644 src/UnitTests/Containers/VectorTest.h create mode 100644 src/UnitTests/Containers/VectorTestSetup.h diff --git a/src/TNL/Containers/Algorithms/VectorAssignment.h b/src/TNL/Containers/Algorithms/VectorAssignment.h index bc30bd495..9a01fa1a8 100644 --- a/src/TNL/Containers/Algorithms/VectorAssignment.h +++ b/src/TNL/Containers/Algorithms/VectorAssignment.h @@ -63,7 +63,7 @@ struct VectorAssignment< Vector, T, true > v[ i ] = t[ i ]; }; - static void assign( Vector& v, const T t ) + static void assign( Vector& v, const T& t ) { TNL_ASSERT_EQ( v.getSize(), t.getSize(), "The sizes of the vectors must be equal." ); using RealType = typename Vector::RealType; @@ -76,6 +76,7 @@ struct VectorAssignment< Vector, T, true > data[ i ] = t[ i ]; }; ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), ass ); + TNL_CHECK_CUDA_DEVICE; }; }; @@ -112,6 +113,7 @@ struct VectorAssignment< Vector, T, false > data[ i ] = t; }; ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), ass ); + TNL_CHECK_CUDA_DEVICE; } }; diff --git a/src/TNL/Containers/Expressions/ExpressionTemplates.h b/src/TNL/Containers/Expressions/ExpressionTemplates.h index b8376b71d..537a74e6b 100644 --- a/src/TNL/Containers/Expressions/ExpressionTemplates.h +++ b/src/TNL/Containers/Expressions/ExpressionTemplates.h @@ -89,6 +89,11 @@ struct BinaryExpressionTemplate< T1, T2, Operation, VectorVariable, VectorVariab return BinaryExpressionTemplate( a, b ); } + RealType getElement( const int i ) const + { + return Operation< typename T1::RealType, typename T2::RealType >::evaluate( op1[ i ], op2[ i ] ); + } + __cuda_callable__ RealType operator[]( const int i ) const { @@ -124,6 +129,11 @@ struct BinaryExpressionTemplate< T1, T2, Operation, VectorVariable, ArithmeticVa return BinaryExpressionTemplate( a, b ); } + RealType getElement( const int i ) const + { + return Operation< typename T1::RealType, T2 >::evaluate( op1[ i ], op2 ); + } + __cuda_callable__ RealType operator[]( const int i ) const { @@ -160,6 +170,11 @@ struct BinaryExpressionTemplate< T1, T2, Operation, ArithmeticVariable, VectorVa return BinaryExpressionTemplate( a, b ); } + RealType getElement( const int i ) const + { + return Operation< T1, typename T2::RealType >::evaluate( op1, op2[ i ] ); + } + __cuda_callable__ RealType operator[]( const int i ) const { @@ -200,6 +215,11 @@ struct BinaryExpressionTemplate< T1, T2, Operation, VectorVariable, VectorVariab return BinaryExpressionTemplate( a, b ); } + RealType getElement( const int i ) const + { + return Operation< typename T1::RealType, typename T2::RealType >::evaluate( op1.getElement( i ), op2.getElement( i ) ); + } + __cuda_callable__ RealType operator[]( const int i ) const { @@ -236,6 +256,11 @@ struct BinaryExpressionTemplate< T1, T2, Operation, VectorVariable, ArithmeticVa return BinaryExpressionTemplate( a, b ); } + RealType getElement( const int i ) const + { + return Operation< typename T1::RealType, typename T2::RealType >::evaluate( op1.getElement( i ), op2 ); + } + __cuda_callable__ RealType operator[]( const int i ) const { @@ -272,6 +297,11 @@ struct BinaryExpressionTemplate< T1, T2, Operation, ArithmeticVariable, VectorVa return BinaryExpressionTemplate( a, b ); } + RealType getElement( const int i ) const + { + return Operation< typename T1::RealType, typename T2::RealType >::evaluate( op1, op2.getElement( i ) ); + } + __cuda_callable__ RealType operator[]( const int i ) const { @@ -315,6 +345,11 @@ struct UnaryExpressionTemplate< T1, Operation, Parameter, VectorVariable, true > return UnaryExpressionTemplate( a ); } + RealType getElement( const int i ) const + { + return Operation< typename T1::RealType >::evaluate( operand[ i ], parameter ); + } + __cuda_callable__ RealType operator[]( const int i ) const { @@ -355,6 +390,11 @@ struct UnaryExpressionTemplate< T1, Operation, void, VectorVariable, true > return UnaryExpressionTemplate( a ); } + RealType getElement( const int i ) const + { + return Operation< typename T1::RealType >::evaluate( operand[ i ] ); + } + __cuda_callable__ RealType operator[]( const int i ) const { @@ -395,6 +435,11 @@ struct UnaryExpressionTemplate< T1, Operation, Parameter, VectorVariable, false return UnaryExpressionTemplate( a ); } + RealType getElement( const int i ) const + { + return Operation< typename T1::RealType >::evaluate( operand.getElement( i ), parameter ); + } + __cuda_callable__ RealType operator[]( const int i ) const { @@ -436,6 +481,11 @@ struct UnaryExpressionTemplate< T1, Operation, void, VectorVariable, false > return UnaryExpressionTemplate( a ); } + RealType getElement( const int i ) const + { + return Operation< typename T1::RealType >::evaluate( operand.getElement( i ) ); + } + __cuda_callable__ RealType operator[]( const int i ) const { @@ -1832,8 +1882,8 @@ std::ostream& operator << ( std::ostream& str, const BinaryExpressionTemplate< T { str << "[ "; for( int i = 0; i < expression.getSize() - 1; i++ ) - str << expression[ i ] << ", "; - str << expression[ expression.getSize() - 1 ] << " ]"; + str << expression.getElement( i ) << ", "; + str << expression.getElement( expression.getSize() - 1 ) << " ]"; return str; } @@ -1844,8 +1894,8 @@ std::ostream& operator << ( std::ostream& str, const UnaryExpressionTemplate< T, { str << "[ "; for( int i = 0; i < expression.getSize() - 1; i++ ) - str << expression[ i ] << ", "; - str << expression[ expression.getSize() - 1 ] << " ]"; + str << expression.getElement( i ) << ", "; + str << expression.getElement( expression.getSize() - 1 ) << " ]"; return str; } } //namespace Expressions diff --git a/src/TNL/Containers/Vector.h b/src/TNL/Containers/Vector.h index d3177235f..457a556e2 100644 --- a/src/TNL/Containers/Vector.h +++ b/src/TNL/Containers/Vector.h @@ -16,6 +16,9 @@ namespace TNL { namespace Containers { +template< typename Real, typename Device, typename Index > +class VectorView; + /** * \brief This class extends TNL::Array with algebraic operations. * @@ -181,6 +184,9 @@ public: template< typename Real_, typename Device_, typename Index_ > Vector& operator = ( const Vector< Real_, Device_, Index_ >& v ); + template< typename Real_, typename Device_, typename Index_ > + Vector& operator = ( const VectorView< Real_, Device_, Index_ >& v ); + template< typename VectorExpression > Vector& operator = ( const VectorExpression& expression ); diff --git a/src/TNL/Containers/Vector.hpp b/src/TNL/Containers/Vector.hpp index 97b5f17fe..dc6fce8b2 100644 --- a/src/TNL/Containers/Vector.hpp +++ b/src/TNL/Containers/Vector.hpp @@ -12,6 +12,7 @@ #include #include +#include namespace TNL { namespace Containers { @@ -263,6 +264,17 @@ Vector< Real, Device, Index >::operator = ( const Vector< Real_, Device_, Index_ return *this; } +template< typename Real, + typename Device, + typename Index > + template< typename Real_, typename Device_, typename Index_ > +Vector< Real, Device, Index >& +Vector< Real, Device, Index >::operator = ( const VectorView< Real_, Device_, Index_ >& vector ) +{ + Array< Real, Device, Index >::operator=( vector ); + return *this; +} + template< typename Real, typename Device, typename Index > diff --git a/src/TNL/Containers/VectorView.h b/src/TNL/Containers/VectorView.h index 6610f93f1..f941d9958 100644 --- a/src/TNL/Containers/VectorView.h +++ b/src/TNL/Containers/VectorView.h @@ -98,8 +98,14 @@ public: RealType value, Scalar thisElementMultiplicator ); + template< typename Real_, typename Device_, typename Index_ > + VectorView& operator=( const VectorView< Real_, Device_, Index_ >& v ); + + template< typename Real_, typename Device_, typename Index_ > + VectorView& operator=( const Vector< Real_, Device_, Index_ >& v ); + template< typename VectorExpression > - VectorView& operator = ( const VectorExpression& expression ); + VectorView& operator=( const VectorExpression& expression ); template< typename Vector > VectorView& operator-=( const Vector& vector ); diff --git a/src/TNL/Containers/VectorView_impl.h b/src/TNL/Containers/VectorView_impl.h index 3efcbe148..d8b927d51 100644 --- a/src/TNL/Containers/VectorView_impl.h +++ b/src/TNL/Containers/VectorView_impl.h @@ -94,12 +94,34 @@ addElement( IndexType i, RealType value, Scalar thisElementMultiplicator ) Algorithms::VectorOperations< Device >::addElement( *this, i, value, thisElementMultiplicator ); } +template< typename Real, + typename Device, + typename Index > + template< typename Real_, typename Device_, typename Index_ > +VectorView< Real, Device, Index >& +VectorView< Real, Device, Index >::operator=( const VectorView< Real_, Device_, Index_ >& v ) +{ + ArrayView< Real, Device, Index >::operator=( v ); + return *this; +} + +template< typename Real, + typename Device, + typename Index > + template< typename Real_, typename Device_, typename Index_ > +VectorView< Real, Device, Index >& +VectorView< Real, Device, Index >::operator=( const Vector< Real_, Device_, Index_ >& v ) +{ + ArrayView< Real, Device, Index >::operator=( v ); + return *this; +} + template< typename Real, typename Device, typename Index > template< typename VectorExpression > VectorView< Real, Device, Index >& -VectorView< Real, Device, Index >::operator = ( const VectorExpression& expression ) +VectorView< Real, Device, Index >::operator=( const VectorExpression& expression ) { Algorithms::VectorAssignment< VectorView< Real, Device, Index >, VectorExpression >::assign( *this, expression ); return *this; diff --git a/src/UnitTests/Containers/ArrayViewTest.h b/src/UnitTests/Containers/ArrayViewTest.h index 6a128db07..1d422f20f 100644 --- a/src/UnitTests/Containers/ArrayViewTest.h +++ b/src/UnitTests/Containers/ArrayViewTest.h @@ -472,21 +472,24 @@ TYPED_TEST( ArrayViewTest, assignmentOperator ) EXPECT_EQ( v.getData(), b.getData() ); // assignment from host to device - v.setValue( 0 ); + //v.setValue( 0 ); + v = 0; v = u_host; EXPECT_EQ( u, v ); EXPECT_EQ( v.getData(), b.getData() ); // assignment from device to host - u_host.setValue( 0 ); + /*u_host.setValue( 0 ); u_host = u; - EXPECT_EQ( u_host, u ); + + EXPECT_TRUE( u_host == u ); + //EXPECT_EQ( u_host, u ); TODO: this is not accepted by nvcc 10, because nvcc is cockot EXPECT_EQ( u_host.getData(), a_host.getData() ); // assignment of const view to non-const view v.setValue( 0 ); ConstViewType c( u ); - v = c; + v = c;*/ } // test works only for arithmetic types diff --git a/src/UnitTests/Containers/CMakeLists.txt b/src/UnitTests/Containers/CMakeLists.txt index 99e4d677b..a5b81a698 100644 --- a/src/UnitTests/Containers/CMakeLists.txt +++ b/src/UnitTests/Containers/CMakeLists.txt @@ -22,9 +22,21 @@ TARGET_LINK_LIBRARIES( ArrayViewTest ${GTEST_BOTH_LIBRARIES} ) # NOTE: Vector = Array + VectorOperations, VectorView = ArrayView + VectorOperations, # so we test Vector, VectorView and VectorOperations at the same time -ADD_EXECUTABLE( VectorTest VectorTest.cpp ) -TARGET_COMPILE_OPTIONS( VectorTest PRIVATE ${CXX_TESTS_FLAGS} ) -TARGET_LINK_LIBRARIES( VectorTest ${GTEST_BOTH_LIBRARIES} ) +ADD_EXECUTABLE( VectorTest-1 VectorTest-1.cpp ) +ADD_EXECUTABLE( VectorTest-2 VectorTest-2.cpp ) +ADD_EXECUTABLE( VectorTest-3 VectorTest-3.cpp ) +ADD_EXECUTABLE( VectorTest-4 VectorTest-4.cpp ) +ADD_EXECUTABLE( VectorTest-5 VectorTest-5.cpp ) +TARGET_COMPILE_OPTIONS( VectorTest-1 PRIVATE ${CXX_TESTS_FLAGS} ) +TARGET_COMPILE_OPTIONS( VectorTest-2 PRIVATE ${CXX_TESTS_FLAGS} ) +TARGET_COMPILE_OPTIONS( VectorTest-3 PRIVATE ${CXX_TESTS_FLAGS} ) +TARGET_COMPILE_OPTIONS( VectorTest-4 PRIVATE ${CXX_TESTS_FLAGS} ) +TARGET_COMPILE_OPTIONS( VectorTest-5 PRIVATE ${CXX_TESTS_FLAGS} ) +TARGET_LINK_LIBRARIES( VectorTest-1 ${GTEST_BOTH_LIBRARIES} ) +TARGET_LINK_LIBRARIES( VectorTest-2 ${GTEST_BOTH_LIBRARIES} ) +TARGET_LINK_LIBRARIES( VectorTest-3 ${GTEST_BOTH_LIBRARIES} ) +TARGET_LINK_LIBRARIES( VectorTest-4 ${GTEST_BOTH_LIBRARIES} ) +TARGET_LINK_LIBRARIES( VectorTest-5 ${GTEST_BOTH_LIBRARIES} ) IF( BUILD_CUDA ) CUDA_ADD_EXECUTABLE( ArrayTestCuda ArrayTest.cu @@ -35,9 +47,16 @@ IF( BUILD_CUDA ) OPTIONS ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( ArrayViewTestCuda ${GTEST_BOTH_LIBRARIES} ) - CUDA_ADD_EXECUTABLE( VectorTestCuda VectorTest.cu - OPTIONS ${CXX_TESTS_FLAGS} ) - TARGET_LINK_LIBRARIES( VectorTestCuda ${GTEST_BOTH_LIBRARIES} ) + CUDA_ADD_EXECUTABLE( VectorTestCuda-1 VectorTest-1.cu OPTIONS ${CXX_TESTS_FLAGS} ) + CUDA_ADD_EXECUTABLE( VectorTestCuda-2 VectorTest-2.cu OPTIONS ${CXX_TESTS_FLAGS} ) + CUDA_ADD_EXECUTABLE( VectorTestCuda-3 VectorTest-3.cu OPTIONS ${CXX_TESTS_FLAGS} ) + CUDA_ADD_EXECUTABLE( VectorTestCuda-4 VectorTest-4.cu OPTIONS ${CXX_TESTS_FLAGS} ) + CUDA_ADD_EXECUTABLE( VectorTestCuda-5 VectorTest-5.cu OPTIONS ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( VectorTestCuda-1 ${GTEST_BOTH_LIBRARIES} ) + TARGET_LINK_LIBRARIES( VectorTestCuda-2 ${GTEST_BOTH_LIBRARIES} ) + TARGET_LINK_LIBRARIES( VectorTestCuda-3 ${GTEST_BOTH_LIBRARIES} ) + TARGET_LINK_LIBRARIES( VectorTestCuda-4 ${GTEST_BOTH_LIBRARIES} ) + TARGET_LINK_LIBRARIES( VectorTestCuda-5 ${GTEST_BOTH_LIBRARIES} ) ENDIF( BUILD_CUDA ) IF( BUILD_CUDA ) @@ -63,11 +82,19 @@ ADD_TEST( ListTest ${EXECUTABLE_OUTPUT_PATH}/ListTest${CMAKE_EXECUTABLE_SUFFIX} ADD_TEST( ArrayOperationsTest ${EXECUTABLE_OUTPUT_PATH}/ArrayOperationsTest${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( ArrayTest ${EXECUTABLE_OUTPUT_PATH}/ArrayTest${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( ArrayViewTest ${EXECUTABLE_OUTPUT_PATH}/ArrayViewTest${CMAKE_EXECUTABLE_SUFFIX} ) -ADD_TEST( VectorTest ${EXECUTABLE_OUTPUT_PATH}/VectorTest${CMAKE_EXECUTABLE_SUFFIX} ) +ADD_TEST( VectorTest-1 ${EXECUTABLE_OUTPUT_PATH}/VectorTest-1${CMAKE_EXECUTABLE_SUFFIX} ) +ADD_TEST( VectorTest-2 ${EXECUTABLE_OUTPUT_PATH}/VectorTest-2${CMAKE_EXECUTABLE_SUFFIX} ) +ADD_TEST( VectorTest-3 ${EXECUTABLE_OUTPUT_PATH}/VectorTest-3${CMAKE_EXECUTABLE_SUFFIX} ) +ADD_TEST( VectorTest-4 ${EXECUTABLE_OUTPUT_PATH}/VectorTest-4${CMAKE_EXECUTABLE_SUFFIX} ) +ADD_TEST( VectorTest-5 ${EXECUTABLE_OUTPUT_PATH}/VectorTest-5${CMAKE_EXECUTABLE_SUFFIX} ) IF( BUILD_CUDA ) ADD_TEST( ArrayTestCuda ${EXECUTABLE_OUTPUT_PATH}/ArrayTestCuda${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( ArrayViewTestCuda ${EXECUTABLE_OUTPUT_PATH}/ArrayViewTestCuda${CMAKE_EXECUTABLE_SUFFIX} ) - ADD_TEST( VectorTestCuda ${EXECUTABLE_OUTPUT_PATH}/VectorTestCuda${CMAKE_EXECUTABLE_SUFFIX} ) + ADD_TEST( VectorTestCuda-1 ${EXECUTABLE_OUTPUT_PATH}/VectorTestCuda-1${CMAKE_EXECUTABLE_SUFFIX} ) + ADD_TEST( VectorTestCuda-2 ${EXECUTABLE_OUTPUT_PATH}/VectorTestCuda-2${CMAKE_EXECUTABLE_SUFFIX} ) + ADD_TEST( VectorTestCuda-3 ${EXECUTABLE_OUTPUT_PATH}/VectorTestCuda-3${CMAKE_EXECUTABLE_SUFFIX} ) + ADD_TEST( VectorTestCuda-4 ${EXECUTABLE_OUTPUT_PATH}/VectorTestCuda-4${CMAKE_EXECUTABLE_SUFFIX} ) + ADD_TEST( VectorTestCuda-5 ${EXECUTABLE_OUTPUT_PATH}/VectorTestCuda-5${CMAKE_EXECUTABLE_SUFFIX} ) ENDIF() ADD_TEST( MultireductionTest ${EXECUTABLE_OUTPUT_PATH}/MultireductionTest${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( StaticArrayTest ${EXECUTABLE_OUTPUT_PATH}/StaticArrayTest${CMAKE_EXECUTABLE_SUFFIX} ) diff --git a/src/UnitTests/Containers/VectorTest-1.cpp b/src/UnitTests/Containers/VectorTest-1.cpp new file mode 100644 index 000000000..b84e78f02 --- /dev/null +++ b/src/UnitTests/Containers/VectorTest-1.cpp @@ -0,0 +1,11 @@ +/*************************************************************************** + VectorTest-1.cpp - description + ------------------- + begin : Apr 30, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include "VectorTest-1.h" diff --git a/src/UnitTests/Containers/VectorTest-1.cu b/src/UnitTests/Containers/VectorTest-1.cu new file mode 100644 index 000000000..e3a4a6c14 --- /dev/null +++ b/src/UnitTests/Containers/VectorTest-1.cu @@ -0,0 +1,11 @@ +/*************************************************************************** + VectorTest-1.cu - description + ------------------- + begin : Apr 30, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include "VectorTest-1.h" diff --git a/src/UnitTests/Containers/VectorTest-1.h b/src/UnitTests/Containers/VectorTest-1.h new file mode 100644 index 000000000..292a403b4 --- /dev/null +++ b/src/UnitTests/Containers/VectorTest-1.h @@ -0,0 +1,180 @@ +/*************************************************************************** + VectorTest.h - description + ------------------- + begin : Oct 25, 2010 + copyright : (C) 2010 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +// NOTE: Vector = Array + VectorOperations, so we test Vector and VectorOperations at the same time + +#pragma once + +#ifdef HAVE_GTEST +#include + +#include +#include +#include +#include "VectorTestSetup.h" + +#include "gtest/gtest.h" + +using namespace TNL; +using namespace TNL::Containers; +using namespace TNL::Containers::Algorithms; +using namespace TNL::Arithmetics; + +// should be small enough to have fast tests, but larger than minGPUReductionDataSize +// and large enough to require multiple CUDA blocks for reduction +constexpr int VECTOR_TEST_SIZE = 5000; + +TYPED_TEST( VectorTest, constructors ) +{ + using VectorType = typename TestFixture::VectorType; + const int size = VECTOR_TEST_SIZE; + + // TODO: Does not work yet. + /*VectorType empty_u; + VectorType empty_v( empty_u ); + EXPECT_EQ( empty_u.getSize(), 0 ); + EXPECT_EQ( empty_v.getSize(), 0 );*/ + + VectorType u( size ); + EXPECT_EQ( u.getSize(), size ); + + VectorType v( 10 ); + EXPECT_EQ( v.getSize(), 10 ); + + if( std::is_same< typename VectorType::DeviceType, Devices::Host >::value ) { + typename VectorType::ValueType data[ 10 ]; + VectorType w( data, 10 ); + EXPECT_EQ( w.getData(), data ); + + VectorType z1( w ); + //EXPECT_EQ( z1.getData(), data ); + EXPECT_EQ( z1.getSize(), 10 ); + + VectorType z2( w, 1 ); + EXPECT_EQ( z2.getData(), data + 1 ); + EXPECT_EQ( z2.getSize(), 9 ); + + VectorType z3( w, 2, 3 ); + EXPECT_EQ( z3.getData(), data + 2 ); + EXPECT_EQ( z3.getSize(), 3 ); + } + + v = 1; + VectorType w( v ); + EXPECT_EQ( w.getSize(), v.getSize() ); + for( int i = 0; i < 10; i++ ) + EXPECT_EQ( v.getElement( i ), w.getElement( i ) ); + v.reset(); + EXPECT_EQ( w.getSize(), 10 ); + + VectorType a1 { 1, 2, 3 }; + EXPECT_EQ( a1.getElement( 0 ), 1 ); + EXPECT_EQ( a1.getElement( 1 ), 2 ); + EXPECT_EQ( a1.getElement( 2 ), 3 ); + + std::list< int > l = { 4, 5, 6 }; + VectorType a2( l ); + EXPECT_EQ( a2.getElement( 0 ), 4 ); + EXPECT_EQ( a2.getElement( 1 ), 5 ); + EXPECT_EQ( a2.getElement( 2 ), 6 ); + + std::vector< int > q = { 7, 8, 9 }; + + VectorType a3( q ); + EXPECT_EQ( a3.getElement( 0 ), 7 ); + EXPECT_EQ( a3.getElement( 1 ), 8 ); + EXPECT_EQ( a3.getElement( 2 ), 9 ); +} + +TYPED_TEST( VectorTest, max ) +{ + using VectorType = typename TestFixture::VectorType; + using VectorOperations = typename TestFixture::VectorOperations; + using ViewType = typename TestFixture::ViewType; + const int size = VECTOR_TEST_SIZE; + + VectorType v; + v.setSize( size ); + ViewType v_view( v ); + setLinearSequence( v ); + + EXPECT_EQ( v.max(), size - 1 ); + EXPECT_EQ( v_view.max(), size - 1 ); + EXPECT_EQ( VectorOperations::getVectorMax( v ), size - 1 ); +} + +TYPED_TEST( VectorTest, min ) +{ + using VectorType = typename TestFixture::VectorType; + using VectorOperations = typename TestFixture::VectorOperations; + using ViewType = typename TestFixture::ViewType; + const int size = VECTOR_TEST_SIZE; + + VectorType v; + v.setSize( size ); + ViewType v_view( v ); + setLinearSequence( v ); + + EXPECT_EQ( v.min(), 0 ); + EXPECT_EQ( v_view.min(), 0 ); + EXPECT_EQ( VectorOperations::getVectorMin( v ), 0 ); +} + +TYPED_TEST( VectorTest, absMax ) +{ + using VectorType = typename TestFixture::VectorType; + using VectorOperations = typename TestFixture::VectorOperations; + using ViewType = typename TestFixture::ViewType; + const int size = VECTOR_TEST_SIZE; + + VectorType v; + v.setSize( size ); + ViewType v_view( v ); + setNegativeLinearSequence( v ); + + EXPECT_EQ( v.absMax(), size - 1 ); + EXPECT_EQ( v_view.absMax(), size - 1 ); + EXPECT_EQ( VectorOperations::getVectorAbsMax( v ), size - 1 ); +} + +TYPED_TEST( VectorTest, absMin ) +{ + using VectorType = typename TestFixture::VectorType; + using VectorOperations = typename TestFixture::VectorOperations; + using ViewType = typename TestFixture::ViewType; + const int size = VECTOR_TEST_SIZE; + + VectorType v; + v.setSize( size ); + ViewType v_view( v ); + setNegativeLinearSequence( v ); + + EXPECT_EQ( v.absMin(), 0 ); + EXPECT_EQ( v_view.absMin(), 0 ); + EXPECT_EQ( VectorOperations::getVectorAbsMin( v ), 0 ); +} + + + +#endif // HAVE_GTEST + + +#include "../GtestMissingError.h" +int main( int argc, char* argv[] ) +{ + //Test(); + //return 0; +#ifdef HAVE_GTEST + ::testing::InitGoogleTest( &argc, argv ); + return RUN_ALL_TESTS(); +#else + throw GtestMissingError(); +#endif +} diff --git a/src/UnitTests/Containers/VectorTest-2.cpp b/src/UnitTests/Containers/VectorTest-2.cpp new file mode 100644 index 000000000..02a112889 --- /dev/null +++ b/src/UnitTests/Containers/VectorTest-2.cpp @@ -0,0 +1,11 @@ +/*************************************************************************** + VectorTest-2.cpp - description + ------------------- + begin : Apr 30, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include "VectorTest-2.h" diff --git a/src/UnitTests/Containers/VectorTest-2.cu b/src/UnitTests/Containers/VectorTest-2.cu new file mode 100644 index 000000000..d7f43ab6f --- /dev/null +++ b/src/UnitTests/Containers/VectorTest-2.cu @@ -0,0 +1,11 @@ +/*************************************************************************** + VectorTest-2.cu - description + ------------------- + begin : Apr 30, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include "VectorTest-2.h" diff --git a/src/UnitTests/Containers/VectorTest-2.h b/src/UnitTests/Containers/VectorTest-2.h new file mode 100644 index 000000000..6c32fbd7e --- /dev/null +++ b/src/UnitTests/Containers/VectorTest-2.h @@ -0,0 +1,165 @@ +/*************************************************************************** + VectorTest.h - description + ------------------- + begin : Oct 25, 2010 + copyright : (C) 2010 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +// NOTE: Vector = Array + VectorOperations, so we test Vector and VectorOperations at the same time + +#pragma once + +#ifdef HAVE_GTEST +#include + +#include +#include +#include +#include "VectorTestSetup.h" + +#include "gtest/gtest.h" + +using namespace TNL; +using namespace TNL::Containers; +using namespace TNL::Containers::Algorithms; +using namespace TNL::Arithmetics; + +// should be small enough to have fast tests, but larger than minGPUReductionDataSize +// and large enough to require multiple CUDA blocks for reduction +constexpr int VECTOR_TEST_SIZE = 5000; + +TYPED_TEST( VectorTest, lpNorm ) +{ + using VectorType = typename TestFixture::VectorType; + using RealType = typename VectorType::RealType; + using VectorOperations = typename TestFixture::VectorOperations; + using ViewType = typename TestFixture::ViewType; + const int size = VECTOR_TEST_SIZE; + const RealType epsilon = 64 * std::numeric_limits< RealType >::epsilon(); + + VectorType v; + v.setSize( size ); + ViewType v_view( v ); + setConstantSequence( v, 1 ); + + const RealType expectedL1norm = size; + const RealType expectedL2norm = std::sqrt( size ); + const RealType expectedL3norm = std::cbrt( size ); + EXPECT_EQ( v.lpNorm( 1.0 ), expectedL1norm ); + EXPECT_EQ( v.lpNorm( 2.0 ), expectedL2norm ); + EXPECT_NEAR( v.lpNorm( 3.0 ), expectedL3norm, epsilon ); + EXPECT_EQ( v_view.lpNorm( 1.0 ), expectedL1norm ); + EXPECT_EQ( v_view.lpNorm( 2.0 ), expectedL2norm ); + EXPECT_NEAR( v_view.lpNorm( 3.0 ), expectedL3norm, epsilon ); + EXPECT_EQ( VectorOperations::getVectorLpNorm( v, 1.0 ), expectedL1norm ); + EXPECT_EQ( VectorOperations::getVectorLpNorm( v, 2.0 ), expectedL2norm ); + EXPECT_NEAR( VectorOperations::getVectorLpNorm( v, 3.0 ), expectedL3norm, epsilon ); +} + +TYPED_TEST( VectorTest, sum ) +{ + using VectorType = typename TestFixture::VectorType; + using VectorOperations = typename TestFixture::VectorOperations; + using ViewType = typename TestFixture::ViewType; + // this test expect an even size + const int size = VECTOR_TEST_SIZE % 2 ? VECTOR_TEST_SIZE - 1 : VECTOR_TEST_SIZE; + + VectorType v; + v.setSize( size ); + ViewType v_view( v ); + + setConstantSequence( v, 1 ); + EXPECT_EQ( v.sum(), size ); + EXPECT_EQ( v_view.sum(), size ); + EXPECT_EQ( VectorOperations::getVectorSum( v ), size ); + + setLinearSequence( v ); + EXPECT_EQ( v.sum(), 0.5 * size * ( size - 1 ) ); + EXPECT_EQ( v_view.sum(), 0.5 * size * ( size - 1 ) ); + EXPECT_EQ( VectorOperations::getVectorSum( v ), 0.5 * size * ( size - 1 ) ); + + setNegativeLinearSequence( v ); + EXPECT_EQ( v.sum(), - 0.5 * size * ( size - 1 ) ); + EXPECT_EQ( v_view.sum(), - 0.5 * size * ( size - 1 ) ); + EXPECT_EQ( VectorOperations::getVectorSum( v ), - 0.5 * size * ( size - 1 ) ); + + setOscilatingSequence( v, 1.0 ); + EXPECT_EQ( v.sum(), 0 ); + EXPECT_EQ( v_view.sum(), 0 ); + EXPECT_EQ( VectorOperations::getVectorSum( v ), 0 ); +} + +TYPED_TEST( VectorTest, differenceMax ) +{ + using VectorType = typename TestFixture::VectorType; + using VectorOperations = typename TestFixture::VectorOperations; + using ViewType = typename TestFixture::ViewType; + const int size = VECTOR_TEST_SIZE; + + VectorType u( size ), v( size ); + ViewType u_view( u ), v_view( v ); + setLinearSequence( u ); + setConstantSequence( v, size / 2 ); + + EXPECT_EQ( u.differenceMax( v ), size - 1 - size / 2 ); + EXPECT_EQ( u_view.differenceMax( v_view ), size - 1 - size / 2 ); + EXPECT_EQ( VectorOperations::getVectorDifferenceMax( u, v ), size - 1 - size / 2 ); +} + +TYPED_TEST( VectorTest, differenceMin ) +{ + using VectorType = typename TestFixture::VectorType; + using VectorOperations = typename TestFixture::VectorOperations; + using ViewType = typename TestFixture::ViewType; + const int size = VECTOR_TEST_SIZE; + + VectorType u( size ), v( size ); + ViewType u_view( u ), v_view( v ); + setLinearSequence( u ); + setConstantSequence( v, size / 2 ); + + EXPECT_EQ( u.differenceMin( v ), - size / 2 ); + EXPECT_EQ( u_view.differenceMin( v_view ), - size / 2 ); + EXPECT_EQ( VectorOperations::getVectorDifferenceMin( u, v ), - size / 2 ); + EXPECT_EQ( v.differenceMin( u ), size / 2 - size + 1 ); + EXPECT_EQ( v_view.differenceMin( u_view ), size / 2 - size + 1 ); + EXPECT_EQ( VectorOperations::getVectorDifferenceMin( v, u ), size / 2 - size + 1 ); +} + +TYPED_TEST( VectorTest, differenceAbsMax ) +{ + using VectorType = typename TestFixture::VectorType; + using VectorOperations = typename TestFixture::VectorOperations; + using ViewType = typename TestFixture::ViewType; + // this test expects an odd size + const int size = VECTOR_TEST_SIZE % 2 ? VECTOR_TEST_SIZE : VECTOR_TEST_SIZE - 1; + + VectorType u( size ), v( size ); + ViewType u_view( u ), v_view( v ); + setNegativeLinearSequence( u ); + setConstantSequence( v, - size / 2 ); + + EXPECT_EQ( u.differenceAbsMax( v ), size - 1 - size / 2 ); + EXPECT_EQ( u_view.differenceAbsMax( v_view ), size - 1 - size / 2 ); + EXPECT_EQ( VectorOperations::getVectorDifferenceAbsMax( u, v ), size - 1 - size / 2 ); +} + + +#endif // HAVE_GTEST + + +#include "../GtestMissingError.h" +int main( int argc, char* argv[] ) +{ + //Test(); + //return 0; +#ifdef HAVE_GTEST + ::testing::InitGoogleTest( &argc, argv ); + return RUN_ALL_TESTS(); +#else + throw GtestMissingError(); +#endif +} diff --git a/src/UnitTests/Containers/VectorTest-3.cpp b/src/UnitTests/Containers/VectorTest-3.cpp new file mode 100644 index 000000000..ccc29314d --- /dev/null +++ b/src/UnitTests/Containers/VectorTest-3.cpp @@ -0,0 +1,11 @@ +/*************************************************************************** + VectorTest-3.cpp - description + ------------------- + begin : Apr 30, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include "VectorTest-3.h" diff --git a/src/UnitTests/Containers/VectorTest-3.cu b/src/UnitTests/Containers/VectorTest-3.cu new file mode 100644 index 000000000..b922dbced --- /dev/null +++ b/src/UnitTests/Containers/VectorTest-3.cu @@ -0,0 +1,11 @@ +/*************************************************************************** + VectorTest-3.cu - description + ------------------- + begin : Apr 30, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include "VectorTest-3.h" diff --git a/src/UnitTests/Containers/VectorTest-3.h b/src/UnitTests/Containers/VectorTest-3.h new file mode 100644 index 000000000..53c6e5ef7 --- /dev/null +++ b/src/UnitTests/Containers/VectorTest-3.h @@ -0,0 +1,183 @@ +/*************************************************************************** + VectorTest.h - description + ------------------- + begin : Oct 25, 2010 + copyright : (C) 2010 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +// NOTE: Vector = Array + VectorOperations, so we test Vector and VectorOperations at the same time + +#pragma once + +#ifdef HAVE_GTEST +#include + +#include +#include +#include +#include "VectorTestSetup.h" + +#include "gtest/gtest.h" + +using namespace TNL; +using namespace TNL::Containers; +using namespace TNL::Containers::Algorithms; +using namespace TNL::Arithmetics; + +// should be small enough to have fast tests, but larger than minGPUReductionDataSize +// and large enough to require multiple CUDA blocks for reduction +constexpr int VECTOR_TEST_SIZE = 5000; + +TYPED_TEST( VectorTest, differenceAbsMin ) +{ + using VectorType = typename TestFixture::VectorType; + using VectorOperations = typename TestFixture::VectorOperations; + using ViewType = typename TestFixture::ViewType; + const int size = VECTOR_TEST_SIZE; + + VectorType u( size ), v( size ); + ViewType u_view( u ), v_view( v ); + setNegativeLinearSequence( u ); + setConstantSequence( v, - size / 2 ); + + EXPECT_EQ( u.differenceAbsMin( v ), 0 ); + EXPECT_EQ( u_view.differenceAbsMin( v_view ), 0 ); + EXPECT_EQ( VectorOperations::getVectorDifferenceAbsMin( u, v ), 0 ); + EXPECT_EQ( v.differenceAbsMin( u ), 0 ); + EXPECT_EQ( v_view.differenceAbsMin( u_view ), 0 ); + EXPECT_EQ( VectorOperations::getVectorDifferenceAbsMin( v, u ), 0 ); +} + +TYPED_TEST( VectorTest, differenceLpNorm ) +{ + using VectorType = typename TestFixture::VectorType; + using RealType = typename VectorType::RealType; + using VectorOperations = typename TestFixture::VectorOperations; + using ViewType = typename TestFixture::ViewType; + const int size = VECTOR_TEST_SIZE; + const RealType epsilon = 64 * std::numeric_limits< RealType >::epsilon(); + + VectorType u( size ), v( size ); + ViewType u_view( u ), v_view( v ); + u.setValue( 3.0 ); + v.setValue( 1.0 ); + + const RealType expectedL1norm = 2.0 * size; + const RealType expectedL2norm = std::sqrt( 4.0 * size ); + const RealType expectedL3norm = std::cbrt( 8.0 * size ); + EXPECT_EQ( u.differenceLpNorm( v, 1.0 ), expectedL1norm ); + EXPECT_EQ( u.differenceLpNorm( v, 2.0 ), expectedL2norm ); + EXPECT_NEAR( u.differenceLpNorm( v, 3.0 ), expectedL3norm, epsilon ); + EXPECT_EQ( u_view.differenceLpNorm( v_view, 1.0 ), expectedL1norm ); + EXPECT_EQ( u_view.differenceLpNorm( v_view, 2.0 ), expectedL2norm ); + EXPECT_NEAR( u_view.differenceLpNorm( v_view, 3.0 ), expectedL3norm, epsilon ); + EXPECT_EQ( VectorOperations::getVectorDifferenceLpNorm( u, v, 1.0 ), expectedL1norm ); + EXPECT_EQ( VectorOperations::getVectorDifferenceLpNorm( u, v, 2.0 ), expectedL2norm ); + EXPECT_NEAR( VectorOperations::getVectorDifferenceLpNorm( u, v, 3.0 ), expectedL3norm, epsilon ); +} + +TYPED_TEST( VectorTest, differenceSum ) +{ + using VectorType = typename TestFixture::VectorType; + using VectorOperations = typename TestFixture::VectorOperations; + using ViewType = typename TestFixture::ViewType; + // this test expect an even size + const int size = VECTOR_TEST_SIZE % 2 ? VECTOR_TEST_SIZE - 1 : VECTOR_TEST_SIZE; + + VectorType u( size ), v( size ); + ViewType u_view( u ), v_view( v ); + v.setValue( 1.0 ); + + setConstantSequence( u, 2 ); + EXPECT_EQ( u.differenceSum( v ), size ); + EXPECT_EQ( u_view.differenceSum( v_view ), size ); + EXPECT_EQ( VectorOperations::getVectorDifferenceSum( u, v ), size ); + + setLinearSequence( u ); + EXPECT_EQ( u.differenceSum( v ), 0.5 * size * ( size - 1 ) - size ); + EXPECT_EQ( u_view.differenceSum( v_view ), 0.5 * size * ( size - 1 ) - size ); + EXPECT_EQ( VectorOperations::getVectorDifferenceSum( u, v ), 0.5 * size * ( size - 1 ) - size ); + + setNegativeLinearSequence( u ); + EXPECT_EQ( u.differenceSum( v ), - 0.5 * size * ( size - 1 ) - size ); + EXPECT_EQ( u_view.differenceSum( v_view ), - 0.5 * size * ( size - 1 ) - size ); + EXPECT_EQ( VectorOperations::getVectorDifferenceSum( u, v ), - 0.5 * size * ( size - 1 ) - size ); + + setOscilatingSequence( u, 1.0 ); + EXPECT_EQ( u.differenceSum( v ), - size ); + EXPECT_EQ( u_view.differenceSum( v_view ), - size ); + EXPECT_EQ( VectorOperations::getVectorDifferenceSum( u, v ), - size ); +} + +TYPED_TEST( VectorTest, scalarMultiplication ) +{ + using VectorType = typename TestFixture::VectorType; + using VectorOperations = typename TestFixture::VectorOperations; + using ViewType = typename TestFixture::ViewType; + const int size = VECTOR_TEST_SIZE; + + VectorType u( size ); + ViewType u_view( u ); + + typename VectorType::HostType expected; + expected.setSize( size ); + for( int i = 0; i < size; i++ ) + expected[ i ] = 2.0 * i; + + setLinearSequence( u ); + VectorOperations::vectorScalarMultiplication( u, 2.0 ); + EXPECT_EQ( u, expected ); + + setLinearSequence( u ); + u.scalarMultiplication( 2.0 ); + EXPECT_EQ( u, expected ); + + setLinearSequence( u ); + u_view.scalarMultiplication( 2.0 ); + EXPECT_EQ( u, expected ); + + setLinearSequence( u ); + u *= 2.0; + EXPECT_EQ( u, expected ); + + setLinearSequence( u ); + u_view *= 2.0; + EXPECT_EQ( u, expected ); +} + +TYPED_TEST( VectorTest, scalarProduct ) +{ + using VectorType = typename TestFixture::VectorType; + using VectorOperations = typename TestFixture::VectorOperations; + using ViewType = typename TestFixture::ViewType; + // this test expects an odd size + const int size = VECTOR_TEST_SIZE % 2 ? VECTOR_TEST_SIZE : VECTOR_TEST_SIZE - 1; + + VectorType u( size ), v( size ); + ViewType u_view( u ), v_view( v ); + setOscilatingSequence( u, 1.0 ); + setConstantSequence( v, 1 ); + + EXPECT_EQ( u.scalarProduct( v ), 1.0 ); + EXPECT_EQ( u_view.scalarProduct( v_view ), 1.0 ); + EXPECT_EQ( VectorOperations::getScalarProduct( u, v ), 1.0 ); +} + +#endif // HAVE_GTEST + + +#include "../GtestMissingError.h" +int main( int argc, char* argv[] ) +{ + //Test(); + //return 0; +#ifdef HAVE_GTEST + ::testing::InitGoogleTest( &argc, argv ); + return RUN_ALL_TESTS(); +#else + throw GtestMissingError(); +#endif +} diff --git a/src/UnitTests/Containers/VectorTest.cu b/src/UnitTests/Containers/VectorTest-4.cpp similarity index 67% rename from src/UnitTests/Containers/VectorTest.cu rename to src/UnitTests/Containers/VectorTest-4.cpp index f173d4a5e..9bfb4a9f9 100644 --- a/src/UnitTests/Containers/VectorTest.cu +++ b/src/UnitTests/Containers/VectorTest-4.cpp @@ -1,11 +1,11 @@ /*************************************************************************** - VectorTest.cu - description + VectorTest-4.cpp - description ------------------- - begin : Jul 20, 2013 - copyright : (C) 2013 by Tomas Oberhuber + begin : Apr 30, 2019 + copyright : (C) 2019 by Tomas Oberhuber email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ /* See Copyright Notice in tnl/Copyright */ -#include "VectorTest.h" +#include "VectorTest-4.h" diff --git a/src/UnitTests/Containers/VectorTest.cpp b/src/UnitTests/Containers/VectorTest-4.cu similarity index 68% rename from src/UnitTests/Containers/VectorTest.cpp rename to src/UnitTests/Containers/VectorTest-4.cu index 49b580561..0a918fc5a 100644 --- a/src/UnitTests/Containers/VectorTest.cpp +++ b/src/UnitTests/Containers/VectorTest-4.cu @@ -1,11 +1,11 @@ /*************************************************************************** - VectorTest.cpp - description + VectorTest-4.cu - description ------------------- - begin : Jul 20, 2013 - copyright : (C) 2013 by Tomas Oberhuber + begin : Apr 30, 2019 + copyright : (C) 2019 by Tomas Oberhuber email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ /* See Copyright Notice in tnl/Copyright */ -#include "VectorTest.h" +#include "VectorTest-4.h" diff --git a/src/UnitTests/Containers/VectorTest-4.h b/src/UnitTests/Containers/VectorTest-4.h new file mode 100644 index 000000000..e683d1b2b --- /dev/null +++ b/src/UnitTests/Containers/VectorTest-4.h @@ -0,0 +1,238 @@ +/*************************************************************************** + VectorTest.h - description + ------------------- + begin : Oct 25, 2010 + copyright : (C) 2010 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +// NOTE: Vector = Array + VectorOperations, so we test Vector and VectorOperations at the same time + +#pragma once + +#ifdef HAVE_GTEST +#include + +#include +#include +#include +#include "VectorTestSetup.h" + +#include "gtest/gtest.h" + +using namespace TNL; +using namespace TNL::Containers; +using namespace TNL::Containers::Algorithms; +using namespace TNL::Arithmetics; + +// should be small enough to have fast tests, but larger than minGPUReductionDataSize +// and large enough to require multiple CUDA blocks for reduction +constexpr int VECTOR_TEST_SIZE = 5000; + +TYPED_TEST( VectorTest, addVector ) +{ + using VectorType = typename TestFixture::VectorType; + using VectorOperations = typename TestFixture::VectorOperations; + using ViewType = typename TestFixture::ViewType; + const int size = VECTOR_TEST_SIZE; + + VectorType x, y; + x.setSize( size ); + y.setSize( size ); + ViewType x_view( x ), y_view( y ); + + typename VectorType::HostType expected1, expected2; + expected1.setSize( size ); + expected2.setSize( size ); + for( int i = 0; i < size; i++ ) { + expected1[ i ] = 2.0 + 3.0 * i; + expected2[ i ] = 1.0 + 3.0 * i; + } + + setConstantSequence( x, 1 ); + setLinearSequence( y ); + VectorOperations::addVector( x, y, 3.0, 2.0 ); + EXPECT_EQ( x, expected1 ); + + setConstantSequence( x, 1 ); + setLinearSequence( y ); + x.addVector( y, 3.0, 1.0 ); + EXPECT_EQ( x, expected2 ); + + setConstantSequence( x, 1 ); + setLinearSequence( y ); + x_view.addVector( y_view, 3.0, 1.0 ); + EXPECT_EQ( x, expected2 ); + + // multiplication by floating-point scalars which produces integer values + setConstantSequence( x, 2 ); + setConstantSequence( y, 4 ); + x.addVector( y, 2.5, -1.5 ); + EXPECT_EQ( x.min(), 7 ); + EXPECT_EQ( x.max(), 7 ); +} + +TYPED_TEST( VectorTest, addVectors ) +{ + using VectorType = typename TestFixture::VectorType; + using VectorOperations = typename TestFixture::VectorOperations; + using ViewType = typename TestFixture::ViewType; + const int size = VECTOR_TEST_SIZE; + + VectorType x, y, z; + x.setSize( size ); + y.setSize( size ); + z.setSize( size ); + ViewType x_view( x ), y_view( y ), z_view( z ); + + typename VectorType::HostType expected1, expected2; + expected1.setSize( size ); + expected2.setSize( size ); + for( int i = 0; i < size; i++ ) { + expected1[ i ] = 1.0 + 3.0 * i + 2.0; + expected2[ i ] = 2.0 + 3.0 * i + 2.0; + } + + setConstantSequence( x, 1 ); + setLinearSequence( y ); + setConstantSequence( z, 2 ); + VectorOperations::addVectors( x, y, 3.0, z, 1.0, 1.0 ); + EXPECT_EQ( x, expected1 ); + + setConstantSequence( x, 1 ); + setLinearSequence( y ); + setConstantSequence( z, 2 ); + x.addVectors( y, 3.0, z, 1.0, 2.0 ); + EXPECT_EQ( x, expected2 ); + + setConstantSequence( x, 1 ); + setLinearSequence( y ); + setConstantSequence( z, 2 ); + x_view.addVectors( y_view, 3.0, z_view, 1.0, 2.0 ); + EXPECT_EQ( x, expected2 ); + + // multiplication by floating-point scalars which produces integer values + setConstantSequence( x, 2 ); + setConstantSequence( y, 4 ); + setConstantSequence( z, 6 ); + x.addVectors( y, 2.5, z, -1.5, -1.5 ); + EXPECT_EQ( x.min(), -2 ); + EXPECT_EQ( x.max(), -2 ); +} + +TYPED_TEST( VectorTest, prefixSum ) +{ + using VectorType = typename TestFixture::VectorType; + using VectorOperations = typename TestFixture::VectorOperations; + using ViewType = typename TestFixture::ViewType; + const int size = VECTOR_TEST_SIZE; + + VectorType v; + v.setSize( size ); + ViewType v_view( v ); + + setConstantSequence( v, 1 ); + v.computePrefixSum(); + for( int i = 0; i < size; i++ ) + EXPECT_EQ( v.getElement( i ), i + 1 ); + + v.setValue( 0 ); + v.computePrefixSum(); + for( int i = 0; i < size; i++ ) + EXPECT_EQ( v.getElement( i ), 0 ); + + setLinearSequence( v ); + v.computePrefixSum(); + for( int i = 1; i < size; i++ ) + EXPECT_EQ( v.getElement( i ) - v.getElement( i - 1 ), i ); + + setConstantSequence( v, 1 ); + v_view.computePrefixSum(); + for( int i = 0; i < size; i++ ) + EXPECT_EQ( v.getElement( i ), i + 1 ); + + v.setValue( 0 ); + v_view.computePrefixSum(); + for( int i = 0; i < size; i++ ) + EXPECT_EQ( v.getElement( i ), 0 ); + + setLinearSequence( v ); + v_view.computePrefixSum(); + for( int i = 1; i < size; i++ ) + EXPECT_EQ( v.getElement( i ) - v.getElement( i - 1 ), i ); +} + +TYPED_TEST( VectorTest, exclusivePrefixSum ) +{ + using VectorType = typename TestFixture::VectorType; + using VectorOperations = typename TestFixture::VectorOperations; + using ViewType = typename TestFixture::ViewType; + const int size = VECTOR_TEST_SIZE; + + VectorType v; + v.setSize( size ); + ViewType v_view( v ); + + setConstantSequence( v, 1 ); + v.computeExclusivePrefixSum(); + for( int i = 0; i < size; i++ ) + EXPECT_EQ( v.getElement( i ), i ); + + v.setValue( 0 ); + v.computeExclusivePrefixSum(); + for( int i = 0; i < size; i++ ) + EXPECT_EQ( v.getElement( i ), 0 ); + + setLinearSequence( v ); + v.computeExclusivePrefixSum(); + for( int i = 1; i < size; i++ ) + EXPECT_EQ( v.getElement( i ) - v.getElement( i - 1 ), i - 1 ); + + setConstantSequence( v, 1 ); + v_view.computeExclusivePrefixSum(); + for( int i = 0; i < size; i++ ) + EXPECT_EQ( v.getElement( i ), i ); + + v.setValue( 0 ); + v_view.computeExclusivePrefixSum(); + for( int i = 0; i < size; i++ ) + EXPECT_EQ( v.getElement( i ), 0 ); + + setLinearSequence( v ); + v_view.computeExclusivePrefixSum(); + for( int i = 1; i < size; i++ ) + EXPECT_EQ( v.getElement( i ) - v.getElement( i - 1 ), i - 1 ); +} + +TYPED_TEST( VectorTest, abs ) +{ + using VectorType = typename TestFixture::VectorType; + using ViewType = typename TestFixture::ViewType; + const int size = VECTOR_TEST_SIZE; + + VectorType _u( size ), _v( size ); + ViewType u( _u ), v( _v ); + for( int i = 0; i < size; i++ ) + u.setElement( i, i ); + + v = -u; + EXPECT_EQ( abs( v ), u ); +} + +#endif // HAVE_GTEST + + +#include "../GtestMissingError.h" +int main( int argc, char* argv[] ) +{ + //Test(); + //return 0; +#ifdef HAVE_GTEST + ::testing::InitGoogleTest( &argc, argv ); + return RUN_ALL_TESTS(); +#else + throw GtestMissingError(); +#endif +} diff --git a/src/UnitTests/Containers/VectorTest-5.cpp b/src/UnitTests/Containers/VectorTest-5.cpp new file mode 100644 index 000000000..06b1e03a9 --- /dev/null +++ b/src/UnitTests/Containers/VectorTest-5.cpp @@ -0,0 +1,11 @@ +/*************************************************************************** + VectorTest-5.cpp - description + ------------------- + begin : Apr 30, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include "VectorTest-5.h" diff --git a/src/UnitTests/Containers/VectorTest-5.cu b/src/UnitTests/Containers/VectorTest-5.cu new file mode 100644 index 000000000..31eecc8c7 --- /dev/null +++ b/src/UnitTests/Containers/VectorTest-5.cu @@ -0,0 +1,11 @@ +/*************************************************************************** + VectorTest-5.cu - description + ------------------- + begin : Apr 30, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include "VectorTest-5.h" diff --git a/src/UnitTests/Containers/VectorTest-5.h b/src/UnitTests/Containers/VectorTest-5.h new file mode 100644 index 000000000..6d4bc218d --- /dev/null +++ b/src/UnitTests/Containers/VectorTest-5.h @@ -0,0 +1,564 @@ +/*************************************************************************** + VectorTest.h - description + ------------------- + begin : Oct 25, 2010 + copyright : (C) 2010 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +// NOTE: Vector = Array + VectorOperations, so we test Vector and VectorOperations at the same time + +#pragma once + +#ifdef HAVE_GTEST +#include + +#include +#include +#include +#include "VectorTestSetup.h" + +#include "gtest/gtest.h" + +using namespace TNL; +using namespace TNL::Containers; +using namespace TNL::Containers::Algorithms; +using namespace TNL::Arithmetics; + +// should be small enough to have fast tests, but larger than minGPUReductionDataSize +// and large enough to require multiple CUDA blocks for reduction +constexpr int VECTOR_TEST_SIZE = 5000; + +TYPED_TEST( VectorTest, sin ) +{ + using VectorType = typename TestFixture::VectorType; + using ViewType = typename TestFixture::ViewType; + using RealType = typename VectorType::RealType; + const int size = VECTOR_TEST_SIZE; + + VectorType _u( size ), _v( size ); + ViewType u( _u ), v( _v ); + for( int i = 0; i < size; i++ ) + { + u.setElement( i, ( RealType ) i - ( RealType ) size / 2 ); + v.setElement( i, TNL::sin( ( RealType ) i - ( RealType ) size / 2 ) ); + } + + //EXPECT_EQ( sin( u ), v ); + for( int i = 0; i < size; i++ ) + EXPECT_NEAR( sin( u ).getElement( i ), v.getElement( i ), 1.0e-6 ); +} + +TYPED_TEST( VectorTest, cos ) +{ + using VectorType = typename TestFixture::VectorType; + using ViewType = typename TestFixture::ViewType; + using RealType = typename VectorType::RealType; + const int size = VECTOR_TEST_SIZE; + + VectorType _u( size ), _v( size ); + ViewType u( _u ), v( _v ); + for( int i = 0; i < size; i++ ) + { + u.setElement( i, ( RealType ) i - ( RealType ) size / 2 ); + v.setElement( i, TNL::cos( ( RealType ) i - ( RealType ) size / 2 ) ); + } + + //EXPECT_EQ( cos( u ), v ); + for( int i = 0; i < size; i++ ) + EXPECT_NEAR( cos( u ).getElement( i ), v.getElement( i ), 1.0e-6 ); +} + +TYPED_TEST( VectorTest, tan ) +{ + using VectorType = typename TestFixture::VectorType; + using ViewType = typename TestFixture::ViewType; + using RealType = typename VectorType::RealType; + const int size = VECTOR_TEST_SIZE; + const double h = 10.0 / size; + + VectorType _u( size ), _v( size ); + ViewType u( _u ), v( _v ); + for( int i = 0; i < size; i++ ) + { + const RealType x = -5.0 + i * h; + u.setElement( i, x ); + v.setElement( i, TNL::tan( x ) ); + } + + //EXPECT_EQ( tan( u ), v ); + for( int i = 0; i < size; i++ ) + EXPECT_NEAR( tan( u ).getElement( i ), v.getElement( i ), 1.0e-6 ); + +} + +TYPED_TEST( VectorTest, sqrt ) +{ + using VectorType = typename TestFixture::VectorType; + using ViewType = typename TestFixture::ViewType; + using RealType = typename VectorType::RealType; + const int size = VECTOR_TEST_SIZE; + + VectorType _u( size ), _v( size ); + ViewType u( _u ), v( _v ); + for( int i = 0; i < size; i++ ) + { + u.setElement( i, ( RealType ) i ); + v.setElement( i, TNL::sqrt( ( RealType ) i ) ); + } + + //EXPECT_EQ( sqrt( u ), v ); + for( int i = 0; i < size; i++ ) + EXPECT_NEAR( sqrt( u ).getElement( i ), v.getElement( i ), 1.0e-6 ); + +} + +TYPED_TEST( VectorTest, cbrt ) +{ + using VectorType = typename TestFixture::VectorType; + using ViewType = typename TestFixture::ViewType; + using RealType = typename VectorType::RealType; + const int size = VECTOR_TEST_SIZE; + + VectorType _u( size ), _v( size ); + ViewType u( _u ), v( _v ); + for( int i = 0; i < size; i++ ) + { + u.setElement( i, ( RealType ) i ); + v.setElement( i, TNL::cbrt( ( RealType ) i ) ); + } + + //EXPECT_EQ( cbrt( u ), v ); + for( int i = 0; i < size; i++ ) + EXPECT_NEAR( cbrt( u ).getElement( i ), v.getElement( i ), 1.0e-6 ); + +} + +TYPED_TEST( VectorTest, pow ) +{ + using VectorType = typename TestFixture::VectorType; + using ViewType = typename TestFixture::ViewType; + using RealType = typename VectorType::RealType; + const int size = VECTOR_TEST_SIZE; + + VectorType _u( size ), _v( size ), _w( size ); + ViewType u( _u ), v( _v ), w( _w ); + for( int i = 0; i < size; i++ ) + { + u.setElement( i, ( RealType ) i - ( RealType ) size / 2 ); + v.setElement( i, TNL::pow( ( RealType ) i - ( RealType ) size / 2, 2.0 ) ); + w.setElement( i, TNL::pow( ( RealType ) i - ( RealType ) size / 2, 3.0 ) ); + } + + //EXPECT_EQ( pow( u, 2.0 ), v ); + //EXPECT_EQ( pow( u, 3.0 ), w ); + for( int i = 0; i < size; i++ ) + EXPECT_NEAR( pow( u, 2.0 ).getElement( i ), v.getElement( i ), 1.0e-6 ); + for( int i = 0; i < size; i++ ) + EXPECT_NEAR( pow( u, 3.0 ).getElement( i ), w.getElement( i ), 1.0e-6 ); + + +} + +TYPED_TEST( VectorTest, floor ) +{ + using VectorType = typename TestFixture::VectorType; + using ViewType = typename TestFixture::ViewType; + using RealType = typename VectorType::RealType; + const int size = VECTOR_TEST_SIZE; + + VectorType _u( size ), _v( size ); + ViewType u( _u ), v( _v ); + for( int i = 0; i < size; i++ ) + { + u.setElement( i, ( RealType ) i - ( RealType ) size / 2 ); + v.setElement( i, TNL::floor( ( RealType ) i - ( RealType ) size / 2 ) ); + } + + //EXPECT_EQ( floor( u ), v ); + for( int i = 0; i < size; i++ ) + EXPECT_NEAR( floor( u ).getElement( i ), v.getElement( i ), 1.0e-6 ); + +} + +TYPED_TEST( VectorTest, ceil ) +{ + using VectorType = typename TestFixture::VectorType; + using ViewType = typename TestFixture::ViewType; + using RealType = typename VectorType::RealType; + const int size = VECTOR_TEST_SIZE; + + VectorType _u( size ), _v( size ); + ViewType u( _u ), v( _v ); + for( int i = 0; i < size; i++ ) + { + u.setElement( i, ( RealType ) i - ( RealType ) size / 2 ); + v.setElement( i, TNL::ceil( ( RealType ) i - ( RealType ) size / 2 ) ); + } + + //EXPECT_EQ( ceil( u ), v ); + for( int i = 0; i < size; i++ ) + EXPECT_NEAR( ceil( u ).getElement( i ), v.getElement( i ), 1.0e-6 ); + +} + +TYPED_TEST( VectorTest, acos ) +{ + using VectorType = typename TestFixture::VectorType; + using ViewType = typename TestFixture::ViewType; + using RealType = typename VectorType::RealType; + const int size = VECTOR_TEST_SIZE; + + VectorType _u( size ), _v( size ); + ViewType u( _u ), v( _v ); + for( int i = 0; i < size; i++ ) + { + u.setElement( i, ( RealType )( i - size / 2 ) / ( RealType ) size ); + v.setElement( i, TNL::acos( ( RealType )( i - size / 2 ) / ( RealType ) size ) ); + } + + //EXPECT_EQ( acos( u ), v ); + for( int i = 0; i < size; i++ ) + EXPECT_NEAR( acos( u ).getElement( i ), v.getElement( i ), 1.0e-6 ); +} + +TYPED_TEST( VectorTest, asin ) +{ + using VectorType = typename TestFixture::VectorType; + using ViewType = typename TestFixture::ViewType; + using RealType = typename VectorType::RealType; + const int size = VECTOR_TEST_SIZE; + + VectorType _u( size ), _v( size ); + ViewType u( _u ), v( _v ); + for( int i = 0; i < size; i++ ) + { + u.setElement( i, ( RealType ) ( i - size / 2 ) / ( RealType ) size ); + v.setElement( i, TNL::asin( ( RealType )( i - size / 2 ) / ( RealType ) size ) ); + } + + //EXPECT_EQ( asin( u ), v ); + for( int i = 0; i < size; i++ ) + EXPECT_NEAR( asin( u ).getElement( i ), v.getElement( i ), 1.0e-6 ); +} + +TYPED_TEST( VectorTest, atan ) +{ + using VectorType = typename TestFixture::VectorType; + using ViewType = typename TestFixture::ViewType; + using RealType = typename VectorType::RealType; + const int size = VECTOR_TEST_SIZE; + + VectorType _u( size ), _v( size ); + ViewType u( _u ), v( _v ); + for( int i = 0; i < size; i++ ) + { + u.setElement( i, ( RealType ) i - ( RealType ) size / 2 ); + v.setElement( i, TNL::atan( ( RealType ) i - ( RealType ) size / 2 ) ); + } + + //EXPECT_EQ( atan( u ), v ); + for( int i = 0; i < size; i++ ) + EXPECT_NEAR( atan( u ).getElement( i ), v.getElement( i ), 1.0e-6 ); +} + +TYPED_TEST( VectorTest, cosh ) +{ + using VectorType = typename TestFixture::VectorType; + using ViewType = typename TestFixture::ViewType; + using RealType = typename VectorType::RealType; + const int size = VECTOR_TEST_SIZE; + + RealType h = 2.0 / ( RealType ) size; + VectorType _u( size ), _v( size ); + ViewType u( _u ), v( _v ); + for( int i = 0; i < size; i++ ) + { + u.setElement( i, i * h - ( RealType ) 1.0 ); + v.setElement( i, TNL::cosh( i * h - ( RealType ) 1.0 ) ); + } + + // EXPECT_EQ( cosh( u ), v ) does not work here for float, maybe because + // of some fast-math optimization + for( int i = 0; i < size; i++ ) + EXPECT_NEAR( cosh( u ).getElement( i ), v.getElement( i ), 1.0e-6 ); +} + +TYPED_TEST( VectorTest, tanh ) +{ + using VectorType = typename TestFixture::VectorType; + using ViewType = typename TestFixture::ViewType; + using RealType = typename VectorType::RealType; + const int size = VECTOR_TEST_SIZE; + + VectorType _u( size ), _v( size ); + ViewType u( _u ), v( _v ); + for( int i = 0; i < size; i++ ) + { + u.setElement( i, ( RealType ) i - ( RealType ) size / 2 ); + v.setElement( i, TNL::tanh( ( RealType ) i - ( RealType ) size / 2 ) ); + } + + //EXPECT_EQ( tanh( u ), v ); + for( int i = 0; i < size; i++ ) + EXPECT_NEAR( tanh( u ).getElement( i ), v.getElement( i ), 1.0e-6 ); +} + +TYPED_TEST( VectorTest, log ) +{ + using VectorType = typename TestFixture::VectorType; + using ViewType = typename TestFixture::ViewType; + using RealType = typename VectorType::RealType; + const int size = VECTOR_TEST_SIZE; + + VectorType _u( size ), _v( size ); + ViewType u( _u ), v( _v ); + for( int i = 0; i < size; i++ ) + { + u.setElement( i, ( RealType ) i + 1 ); + v.setElement( i, TNL::log( ( RealType ) i + 1 ) ); + } + + //EXPECT_EQ( log( u ), v ); + for( int i = 0; i < size; i++ ) + EXPECT_NEAR( log( u ).getElement( i ), v.getElement( i ), 1.0e-6 ); + +} + +TYPED_TEST( VectorTest, log10 ) +{ + using VectorType = typename TestFixture::VectorType; + using ViewType = typename TestFixture::ViewType; + using RealType = typename VectorType::RealType; + const int size = VECTOR_TEST_SIZE; + + VectorType _u( size ), _v( size ); + ViewType u( _u ), v( _v ); + for( int i = 0; i < size; i++ ) + { + u.setElement( i, ( RealType ) i + 1 ); + v.setElement( i, TNL::log10( ( RealType ) i + 1 ) ); + } + + // EXPECT_EQ( log10( u ), v ) does not work here for float, maybe because + // of some fast-math optimization + for( int i = 0; i < size; i++ ) + EXPECT_NEAR( log10( u ).getElement( i ), v.getElement( i ), 1.0e-6 ); +} + +TYPED_TEST( VectorTest, log2 ) +{ + using VectorType = typename TestFixture::VectorType; + using ViewType = typename TestFixture::ViewType; + using RealType = typename VectorType::RealType; + const int size = VECTOR_TEST_SIZE; + + VectorType _u( size ), _v( size ); + ViewType u( _u ), v( _v ); + for( int i = 0; i < size; i++ ) + { + u.setElement( i, ( RealType ) i + 1 ); + v.setElement( i, TNL::log2( ( RealType ) i + 1 ) ); + } + + //EXPECT_EQ( log2( u ), v ); + for( int i = 0; i < size; i++ ) + EXPECT_NEAR( log2( u ).getElement( i ), v.getElement( i ), 1.0e-6 ); + +} + +TYPED_TEST( VectorTest, exp ) +{ + using VectorType = typename TestFixture::VectorType; + using ViewType = typename TestFixture::ViewType; + using RealType = typename VectorType::RealType; + const int size = VECTOR_TEST_SIZE; + const double h = 10.0 / size; + + VectorType _u( size ), _v( size ); + ViewType u( _u ), v( _v ); + for( int i = 0; i < size; i++ ) + { + const RealType x = -5.0 + i * h; + u.setElement( i, x ); + v.setElement( i, TNL::exp( x ) ); + } + + //EXPECT_EQ( exp( u ), v ); + for( int i = 0; i < size; i++ ) + EXPECT_NEAR( exp( u ).getElement( i ), v.getElement( i ), 1.0e-6 ); +} + +TYPED_TEST( VectorTest, sign ) +{ + using VectorType = typename TestFixture::VectorType; + using ViewType = typename TestFixture::ViewType; + using RealType = typename VectorType::RealType; + const int size = VECTOR_TEST_SIZE; + + VectorType _u( size ), _v( size ); + ViewType u( _u ), v( _v ); + for( int i = 0; i < size; i++ ) + { + u.setElement( i, ( RealType ) i - ( RealType ) size / 2 ); + v.setElement( i, TNL::sign( ( RealType ) i - ( RealType ) size / 2 ) ); + } + + //EXPECT_EQ( sign( u ), v ); + for( int i = 0; i < size; i++ ) + EXPECT_NEAR( sign( u ).getElement( i ), v.getElement( i ), 1.0e-6 ); +} + +// TODO: test prefix sum with custom begin and end parameters + +TEST( VectorSpecialCasesTest, sumOfBoolVector ) +{ + using VectorType = Containers::Vector< bool, Devices::Host >; + using ViewType = VectorView< bool, Devices::Host >; + const float epsilon = 64 * std::numeric_limits< float >::epsilon(); + + VectorType v( 512 ), w( 512 ); + ViewType v_view( v ), w_view( w ); + v.setValue( true ); + w.setValue( false ); + + const int sum = v.sum< int >(); + const int l1norm = v.lpNorm< int >( 1.0 ); + const float l2norm = v.lpNorm< float >( 2.0 ); + const float l3norm = v.lpNorm< float >( 3.0 ); + EXPECT_EQ( sum, 512 ); + EXPECT_EQ( l1norm, 512 ); + EXPECT_NEAR( l2norm, std::sqrt( 512 ), epsilon ); + EXPECT_NEAR( l3norm, std::cbrt( 512 ), epsilon ); + + const int diff_sum = v.differenceSum< int >( w ); + const int diff_l1norm = v.differenceLpNorm< int >( w, 1.0 ); + const float diff_l2norm = v.differenceLpNorm< float >( w, 2.0 ); + const float diff_l3norm = v.differenceLpNorm< float >( w, 3.0 ); + EXPECT_EQ( diff_sum, 512 ); + EXPECT_EQ( diff_l1norm, 512 ); + EXPECT_NEAR( diff_l2norm, std::sqrt( 512 ), epsilon ); + EXPECT_NEAR( diff_l3norm, std::cbrt( 512 ), epsilon ); + + // test views + const int sum_view = v_view.sum< int >(); + const int l1norm_view = v_view.lpNorm< int >( 1.0 ); + const float l2norm_view = v_view.lpNorm< float >( 2.0 ); + const float l3norm_view = v_view.lpNorm< float >( 3.0 ); + EXPECT_EQ( sum_view, 512 ); + EXPECT_EQ( l1norm_view, 512 ); + EXPECT_NEAR( l2norm_view, std::sqrt( 512 ), epsilon ); + EXPECT_NEAR( l3norm_view, std::cbrt( 512 ), epsilon ); + + const int diff_sum_view = v_view.differenceSum< int >( w_view ); + const int diff_l1norm_view = v_view.differenceLpNorm< int >( w_view, 1.0 ); + const float diff_l2norm_view = v_view.differenceLpNorm< float >( w_view, 2.0 ); + const float diff_l3norm_view = v_view.differenceLpNorm< float >( w_view, 3.0 ); + EXPECT_EQ( diff_sum_view, 512 ); + EXPECT_EQ( diff_l1norm_view, 512 ); + EXPECT_NEAR( diff_l2norm_view, std::sqrt( 512 ), epsilon ); + EXPECT_NEAR( diff_l3norm_view, std::cbrt( 512 ), epsilon ); +} + +TEST( VectorSpecialCasesTest, assignmentThroughView ) +{ + using VectorType = Containers::Vector< int, Devices::Host >; + using ViewType = VectorView< int, Devices::Host >; + + static_assert( Algorithms::Details::HasSubscriptOperator< VectorType >::value, "Subscript operator detection by SFINAE does not work for Vector." ); + static_assert( Algorithms::Details::HasSubscriptOperator< ViewType >::value, "Subscript operator detection by SFINAE does not work for VectorView." ); + + VectorType u( 100 ), v( 100 ); + ViewType u_view( u ), v_view( v ); + + u.setValue( 42 ); + v.setValue( 0 ); + v_view = u_view; + EXPECT_EQ( u_view.getData(), u.getData() ); + EXPECT_EQ( v_view.getData(), v.getData() ); + for( int i = 0; i < 100; i++ ) + EXPECT_EQ( v_view[ i ], 42 ); + + u.setValue( 42 ); + v.setValue( 0 ); + v_view = u; + EXPECT_EQ( u_view.getData(), u.getData() ); + EXPECT_EQ( v_view.getData(), v.getData() ); + for( int i = 0; i < 100; i++ ) + EXPECT_EQ( v_view[ i ], 42 ); +} + +TEST( VectorSpecialCasesTest, operationsOnConstView ) +{ + using VectorType = Containers::Vector< int, Devices::Host >; + using ViewType = VectorView< const int, Devices::Host >; + + VectorType u( 100 ), v( 100 ); + ViewType u_view( u ), v_view( v ); + + u.setValue( 1 ); + v.setValue( 1 ); + + EXPECT_EQ( u_view.max(), 1 ); + EXPECT_EQ( u_view.min(), 1 ); + EXPECT_EQ( u_view.absMax(), 1 ); + EXPECT_EQ( u_view.absMin(), 1 ); + EXPECT_EQ( u_view.lpNorm( 1 ), 100 ); + EXPECT_EQ( u_view.differenceMax( v_view ), 0 ); + EXPECT_EQ( u_view.differenceMin( v_view ), 0 ); + EXPECT_EQ( u_view.differenceAbsMax( v_view ), 0 ); + EXPECT_EQ( u_view.differenceAbsMin( v_view ), 0 ); + EXPECT_EQ( u_view.differenceLpNorm( v_view, 1 ), 0 ); + EXPECT_EQ( u_view.differenceSum( v_view ), 0 ); + EXPECT_EQ( u_view.scalarProduct( v_view ), 100 ); +} + +TEST( VectorSpecialCasesTest, initializationOfVectorViewByArrayView ) +{ + using ArrayType = Containers::Array< int, Devices::Host >; + using VectorViewType = VectorView< const int, Devices::Host >; + using ArrayViewType = ArrayView< int, Devices::Host >; + + ArrayType a( 100 ); + a.setValue( 0 ); + ArrayViewType a_view( a ); + + VectorViewType v_view( a_view ); + EXPECT_EQ( v_view.getData(), a_view.getData() ); + EXPECT_EQ( v_view.sum(), 0 ); +} + +TEST( VectorSpecialCasesTest, defaultConstructors ) +{ + using ArrayType = Containers::Array< int, Devices::Host >; + using VectorViewType = VectorView< int, Devices::Host >; + using ArrayViewType = ArrayView< int, Devices::Host >; + + ArrayType a( 100 ); + a.setValue( 0 ); + + ArrayViewType a_view; + a_view.bind( a ); + + VectorViewType v_view; + v_view.bind( a ); + EXPECT_EQ( v_view.getData(), a_view.getData() ); +} + +#endif // HAVE_GTEST + + +#include "../GtestMissingError.h" +int main( int argc, char* argv[] ) +{ + //Test(); + //return 0; +#ifdef HAVE_GTEST + ::testing::InitGoogleTest( &argc, argv ); + return RUN_ALL_TESTS(); +#else + throw GtestMissingError(); +#endif +} diff --git a/src/UnitTests/Containers/VectorTest.h b/src/UnitTests/Containers/VectorTest.h deleted file mode 100644 index 737b07965..000000000 --- a/src/UnitTests/Containers/VectorTest.h +++ /dev/null @@ -1,1194 +0,0 @@ -/*************************************************************************** - VectorTest.h - description - ------------------- - begin : Oct 25, 2010 - copyright : (C) 2010 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/* See Copyright Notice in tnl/Copyright */ - -// NOTE: Vector = Array + VectorOperations, so we test Vector and VectorOperations at the same time - -#pragma once - -#ifdef HAVE_GTEST -#include - -#include -#include -#include - -#include "gtest/gtest.h" - -using namespace TNL; -using namespace TNL::Containers; -using namespace TNL::Containers::Algorithms; -using namespace TNL::Arithmetics; - -// should be small enough to have fast tests, but larger than minGPUReductionDataSize -// and large enough to require multiple CUDA blocks for reduction -constexpr int VECTOR_TEST_SIZE = 5000; - - -template< typename Vector > -void setLinearSequence( Vector& deviceVector ) -{ - typename Vector::HostType a; - a.setLike( deviceVector ); - for( int i = 0; i < a.getSize(); i++ ) - a[ i ] = i; - deviceVector = a; -} - -template< typename Vector > -void setConstantSequence( Vector& deviceVector, - typename Vector::RealType v ) -{ - deviceVector.setValue( v ); -} - -template< typename Vector > -void setNegativeLinearSequence( Vector& deviceVector ) -{ - typename Vector::HostType a; - a.setLike( deviceVector ); - for( int i = 0; i < a.getSize(); i++ ) - a[ i ] = -i; - deviceVector = a; -} - -template< typename Vector > -void setOscilatingSequence( Vector& deviceVector, - typename Vector::RealType v ) -{ - typename Vector::HostType a; - a.setLike( deviceVector ); - a[ 0 ] = v; - for( int i = 1; i < a.getSize(); i++ ) - a[ i ] = a[ i-1 ] * -1; - deviceVector = a; -} - - -// TODO: test everything with OpenMP with different number of threads - -// test fixture for typed tests -template< typename Vector > -class VectorTest : public ::testing::Test -{ -protected: - using VectorType = Vector; - using VectorOperations = Algorithms::VectorOperations< typename VectorType::DeviceType >; - using ViewType = VectorView< typename Vector::RealType, typename Vector::DeviceType, typename Vector::IndexType >; -}; - -// types for which VectorTest is instantiated -// TODO: Quad must be fixed -using VectorTypes = ::testing::Types< -#ifndef HAVE_CUDA - Vector< int, Devices::Host, short >, - Vector< long, Devices::Host, short >, - Vector< float, Devices::Host, short >, - Vector< double, Devices::Host, short >, - //Vector< Quad< float >, Devices::Host, short >, - //Vector< Quad< double >, Devices::Host, short >, - Vector< int, Devices::Host, int >, - Vector< long, Devices::Host, int >, - Vector< float, Devices::Host, int >, - Vector< double, Devices::Host, int >, - //Vector< Quad< float >, Devices::Host, int >, - //Vector< Quad< double >, Devices::Host, int >, - Vector< int, Devices::Host, long >, - Vector< long, Devices::Host, long >, - Vector< float, Devices::Host, long >, - Vector< double, Devices::Host, long > - //Vector< Quad< float >, Devices::Host, long >, - //Vector< Quad< double >, Devices::Host, long > -#endif -#ifdef HAVE_CUDA - Vector< int, Devices::Cuda, short >, - Vector< long, Devices::Cuda, short >, - Vector< float, Devices::Cuda, short >, - Vector< double, Devices::Cuda, short >, - //Vector< Quad< float >, Devices::Cuda, short >, - //Vector< Quad< double >, Devices::Cuda, short >, - Vector< int, Devices::Cuda, int >, - Vector< long, Devices::Cuda, int >, - Vector< float, Devices::Cuda, int >, - Vector< double, Devices::Cuda, int >, - //Vector< Quad< float >, Devices::Cuda, int >, - //Vector< Quad< double >, Devices::Cuda, int >, - Vector< int, Devices::Cuda, long >, - Vector< long, Devices::Cuda, long >, - Vector< float, Devices::Cuda, long >, - Vector< double, Devices::Cuda, long > - //Vector< Quad< float >, Devices::Cuda, long >, - //Vector< Quad< double >, Devices::Cuda, long > -#endif -#ifdef HAVE_MIC - , - Vector< int, Devices::MIC, short >, - Vector< long, Devices::MIC, short >, - Vector< float, Devices::MIC, short >, - Vector< double, Devices::MIC, short >, - Vector< int, Devices::MIC, int >, - Vector< long, Devices::MIC, int >, - Vector< float, Devices::MIC, int >, - Vector< double, Devices::MIC, int >, - Vector< int, Devices::MIC, long >, - Vector< long, Devices::MIC, long >, - Vector< float, Devices::MIC, long >, - Vector< double, Devices::MIC, long > -#endif ->; - -TYPED_TEST_SUITE( VectorTest, VectorTypes ); - -TYPED_TEST( VectorTest, constructors ) -{ - using VectorType = typename TestFixture::VectorType; - const int size = VECTOR_TEST_SIZE; - - // TODO: Does not work yet. - /*VectorType empty_u; - VectorType empty_v( empty_u ); - EXPECT_EQ( empty_u.getSize(), 0 ); - EXPECT_EQ( empty_v.getSize(), 0 );*/ - - VectorType u( size ); - EXPECT_EQ( u.getSize(), size ); - - VectorType v( 10 ); - EXPECT_EQ( v.getSize(), 10 ); - - if( std::is_same< typename VectorType::DeviceType, Devices::Host >::value ) { - typename VectorType::ValueType data[ 10 ]; - VectorType w( data, 10 ); - EXPECT_EQ( w.getData(), data ); - - VectorType z1( w ); - //EXPECT_EQ( z1.getData(), data ); - EXPECT_EQ( z1.getSize(), 10 ); - - VectorType z2( w, 1 ); - EXPECT_EQ( z2.getData(), data + 1 ); - EXPECT_EQ( z2.getSize(), 9 ); - - VectorType z3( w, 2, 3 ); - EXPECT_EQ( z3.getData(), data + 2 ); - EXPECT_EQ( z3.getSize(), 3 ); - } - - VectorType w( v ); - EXPECT_EQ( w.getSize(), v.getSize() ); - for( int i = 0; i < 10; i++ ) - EXPECT_EQ( v.getElement( i ), w.getElement( i ) ); - v.reset(); - EXPECT_EQ( w.getSize(), 10 ); - - VectorType a1 { 1, 2, 3 }; - EXPECT_EQ( a1.getElement( 0 ), 1 ); - EXPECT_EQ( a1.getElement( 1 ), 2 ); - EXPECT_EQ( a1.getElement( 2 ), 3 ); - - std::list< int > l = { 4, 5, 6 }; - VectorType a2( l ); - EXPECT_EQ( a2.getElement( 0 ), 4 ); - EXPECT_EQ( a2.getElement( 1 ), 5 ); - EXPECT_EQ( a2.getElement( 2 ), 6 ); - - std::vector< int > q = { 7, 8, 9 }; - - VectorType a3( q ); - EXPECT_EQ( a3.getElement( 0 ), 7 ); - EXPECT_EQ( a3.getElement( 1 ), 8 ); - EXPECT_EQ( a3.getElement( 2 ), 9 ); -} - -TYPED_TEST( VectorTest, max ) -{ - using VectorType = typename TestFixture::VectorType; - using VectorOperations = typename TestFixture::VectorOperations; - using ViewType = typename TestFixture::ViewType; - const int size = VECTOR_TEST_SIZE; - - VectorType v; - v.setSize( size ); - ViewType v_view( v ); - setLinearSequence( v ); - - EXPECT_EQ( v.max(), size - 1 ); - EXPECT_EQ( v_view.max(), size - 1 ); - EXPECT_EQ( VectorOperations::getVectorMax( v ), size - 1 ); -} - -TYPED_TEST( VectorTest, min ) -{ - using VectorType = typename TestFixture::VectorType; - using VectorOperations = typename TestFixture::VectorOperations; - using ViewType = typename TestFixture::ViewType; - const int size = VECTOR_TEST_SIZE; - - VectorType v; - v.setSize( size ); - ViewType v_view( v ); - setLinearSequence( v ); - - EXPECT_EQ( v.min(), 0 ); - EXPECT_EQ( v_view.min(), 0 ); - EXPECT_EQ( VectorOperations::getVectorMin( v ), 0 ); -} - -TYPED_TEST( VectorTest, absMax ) -{ - using VectorType = typename TestFixture::VectorType; - using VectorOperations = typename TestFixture::VectorOperations; - using ViewType = typename TestFixture::ViewType; - const int size = VECTOR_TEST_SIZE; - - VectorType v; - v.setSize( size ); - ViewType v_view( v ); - setNegativeLinearSequence( v ); - - EXPECT_EQ( v.absMax(), size - 1 ); - EXPECT_EQ( v_view.absMax(), size - 1 ); - EXPECT_EQ( VectorOperations::getVectorAbsMax( v ), size - 1 ); -} - -TYPED_TEST( VectorTest, absMin ) -{ - using VectorType = typename TestFixture::VectorType; - using VectorOperations = typename TestFixture::VectorOperations; - using ViewType = typename TestFixture::ViewType; - const int size = VECTOR_TEST_SIZE; - - VectorType v; - v.setSize( size ); - ViewType v_view( v ); - setNegativeLinearSequence( v ); - - EXPECT_EQ( v.absMin(), 0 ); - EXPECT_EQ( v_view.absMin(), 0 ); - EXPECT_EQ( VectorOperations::getVectorAbsMin( v ), 0 ); -} - -TYPED_TEST( VectorTest, lpNorm ) -{ - using VectorType = typename TestFixture::VectorType; - using RealType = typename VectorType::RealType; - using VectorOperations = typename TestFixture::VectorOperations; - using ViewType = typename TestFixture::ViewType; - const int size = VECTOR_TEST_SIZE; - const RealType epsilon = 64 * std::numeric_limits< RealType >::epsilon(); - - VectorType v; - v.setSize( size ); - ViewType v_view( v ); - setConstantSequence( v, 1 ); - - const RealType expectedL1norm = size; - const RealType expectedL2norm = std::sqrt( size ); - const RealType expectedL3norm = std::cbrt( size ); - EXPECT_EQ( v.lpNorm( 1.0 ), expectedL1norm ); - EXPECT_EQ( v.lpNorm( 2.0 ), expectedL2norm ); - EXPECT_NEAR( v.lpNorm( 3.0 ), expectedL3norm, epsilon ); - EXPECT_EQ( v_view.lpNorm( 1.0 ), expectedL1norm ); - EXPECT_EQ( v_view.lpNorm( 2.0 ), expectedL2norm ); - EXPECT_NEAR( v_view.lpNorm( 3.0 ), expectedL3norm, epsilon ); - EXPECT_EQ( VectorOperations::getVectorLpNorm( v, 1.0 ), expectedL1norm ); - EXPECT_EQ( VectorOperations::getVectorLpNorm( v, 2.0 ), expectedL2norm ); - EXPECT_NEAR( VectorOperations::getVectorLpNorm( v, 3.0 ), expectedL3norm, epsilon ); -} - -TYPED_TEST( VectorTest, sum ) -{ - using VectorType = typename TestFixture::VectorType; - using VectorOperations = typename TestFixture::VectorOperations; - using ViewType = typename TestFixture::ViewType; - // this test expect an even size - const int size = VECTOR_TEST_SIZE % 2 ? VECTOR_TEST_SIZE - 1 : VECTOR_TEST_SIZE; - - VectorType v; - v.setSize( size ); - ViewType v_view( v ); - - setConstantSequence( v, 1 ); - EXPECT_EQ( v.sum(), size ); - EXPECT_EQ( v_view.sum(), size ); - EXPECT_EQ( VectorOperations::getVectorSum( v ), size ); - - setLinearSequence( v ); - EXPECT_EQ( v.sum(), 0.5 * size * ( size - 1 ) ); - EXPECT_EQ( v_view.sum(), 0.5 * size * ( size - 1 ) ); - EXPECT_EQ( VectorOperations::getVectorSum( v ), 0.5 * size * ( size - 1 ) ); - - setNegativeLinearSequence( v ); - EXPECT_EQ( v.sum(), - 0.5 * size * ( size - 1 ) ); - EXPECT_EQ( v_view.sum(), - 0.5 * size * ( size - 1 ) ); - EXPECT_EQ( VectorOperations::getVectorSum( v ), - 0.5 * size * ( size - 1 ) ); - - setOscilatingSequence( v, 1.0 ); - EXPECT_EQ( v.sum(), 0 ); - EXPECT_EQ( v_view.sum(), 0 ); - EXPECT_EQ( VectorOperations::getVectorSum( v ), 0 ); -} - -TYPED_TEST( VectorTest, differenceMax ) -{ - using VectorType = typename TestFixture::VectorType; - using VectorOperations = typename TestFixture::VectorOperations; - using ViewType = typename TestFixture::ViewType; - const int size = VECTOR_TEST_SIZE; - - VectorType u( size ), v( size ); - ViewType u_view( u ), v_view( v ); - setLinearSequence( u ); - setConstantSequence( v, size / 2 ); - - EXPECT_EQ( u.differenceMax( v ), size - 1 - size / 2 ); - EXPECT_EQ( u_view.differenceMax( v_view ), size - 1 - size / 2 ); - EXPECT_EQ( VectorOperations::getVectorDifferenceMax( u, v ), size - 1 - size / 2 ); -} - -TYPED_TEST( VectorTest, differenceMin ) -{ - using VectorType = typename TestFixture::VectorType; - using VectorOperations = typename TestFixture::VectorOperations; - using ViewType = typename TestFixture::ViewType; - const int size = VECTOR_TEST_SIZE; - - VectorType u( size ), v( size ); - ViewType u_view( u ), v_view( v ); - setLinearSequence( u ); - setConstantSequence( v, size / 2 ); - - EXPECT_EQ( u.differenceMin( v ), - size / 2 ); - EXPECT_EQ( u_view.differenceMin( v_view ), - size / 2 ); - EXPECT_EQ( VectorOperations::getVectorDifferenceMin( u, v ), - size / 2 ); - EXPECT_EQ( v.differenceMin( u ), size / 2 - size + 1 ); - EXPECT_EQ( v_view.differenceMin( u_view ), size / 2 - size + 1 ); - EXPECT_EQ( VectorOperations::getVectorDifferenceMin( v, u ), size / 2 - size + 1 ); -} - -TYPED_TEST( VectorTest, differenceAbsMax ) -{ - using VectorType = typename TestFixture::VectorType; - using VectorOperations = typename TestFixture::VectorOperations; - using ViewType = typename TestFixture::ViewType; - // this test expects an odd size - const int size = VECTOR_TEST_SIZE % 2 ? VECTOR_TEST_SIZE : VECTOR_TEST_SIZE - 1; - - VectorType u( size ), v( size ); - ViewType u_view( u ), v_view( v ); - setNegativeLinearSequence( u ); - setConstantSequence( v, - size / 2 ); - - EXPECT_EQ( u.differenceAbsMax( v ), size - 1 - size / 2 ); - EXPECT_EQ( u_view.differenceAbsMax( v_view ), size - 1 - size / 2 ); - EXPECT_EQ( VectorOperations::getVectorDifferenceAbsMax( u, v ), size - 1 - size / 2 ); -} - -TYPED_TEST( VectorTest, differenceAbsMin ) -{ - using VectorType = typename TestFixture::VectorType; - using VectorOperations = typename TestFixture::VectorOperations; - using ViewType = typename TestFixture::ViewType; - const int size = VECTOR_TEST_SIZE; - - VectorType u( size ), v( size ); - ViewType u_view( u ), v_view( v ); - setNegativeLinearSequence( u ); - setConstantSequence( v, - size / 2 ); - - EXPECT_EQ( u.differenceAbsMin( v ), 0 ); - EXPECT_EQ( u_view.differenceAbsMin( v_view ), 0 ); - EXPECT_EQ( VectorOperations::getVectorDifferenceAbsMin( u, v ), 0 ); - EXPECT_EQ( v.differenceAbsMin( u ), 0 ); - EXPECT_EQ( v_view.differenceAbsMin( u_view ), 0 ); - EXPECT_EQ( VectorOperations::getVectorDifferenceAbsMin( v, u ), 0 ); -} - -TYPED_TEST( VectorTest, differenceLpNorm ) -{ - using VectorType = typename TestFixture::VectorType; - using RealType = typename VectorType::RealType; - using VectorOperations = typename TestFixture::VectorOperations; - using ViewType = typename TestFixture::ViewType; - const int size = VECTOR_TEST_SIZE; - const RealType epsilon = 64 * std::numeric_limits< RealType >::epsilon(); - - VectorType u( size ), v( size ); - ViewType u_view( u ), v_view( v ); - u.setValue( 3.0 ); - v.setValue( 1.0 ); - - const RealType expectedL1norm = 2.0 * size; - const RealType expectedL2norm = std::sqrt( 4.0 * size ); - const RealType expectedL3norm = std::cbrt( 8.0 * size ); - EXPECT_EQ( u.differenceLpNorm( v, 1.0 ), expectedL1norm ); - EXPECT_EQ( u.differenceLpNorm( v, 2.0 ), expectedL2norm ); - EXPECT_NEAR( u.differenceLpNorm( v, 3.0 ), expectedL3norm, epsilon ); - EXPECT_EQ( u_view.differenceLpNorm( v_view, 1.0 ), expectedL1norm ); - EXPECT_EQ( u_view.differenceLpNorm( v_view, 2.0 ), expectedL2norm ); - EXPECT_NEAR( u_view.differenceLpNorm( v_view, 3.0 ), expectedL3norm, epsilon ); - EXPECT_EQ( VectorOperations::getVectorDifferenceLpNorm( u, v, 1.0 ), expectedL1norm ); - EXPECT_EQ( VectorOperations::getVectorDifferenceLpNorm( u, v, 2.0 ), expectedL2norm ); - EXPECT_NEAR( VectorOperations::getVectorDifferenceLpNorm( u, v, 3.0 ), expectedL3norm, epsilon ); -} - -TYPED_TEST( VectorTest, differenceSum ) -{ - using VectorType = typename TestFixture::VectorType; - using VectorOperations = typename TestFixture::VectorOperations; - using ViewType = typename TestFixture::ViewType; - // this test expect an even size - const int size = VECTOR_TEST_SIZE % 2 ? VECTOR_TEST_SIZE - 1 : VECTOR_TEST_SIZE; - - VectorType u( size ), v( size ); - ViewType u_view( u ), v_view( v ); - v.setValue( 1.0 ); - - setConstantSequence( u, 2 ); - EXPECT_EQ( u.differenceSum( v ), size ); - EXPECT_EQ( u_view.differenceSum( v_view ), size ); - EXPECT_EQ( VectorOperations::getVectorDifferenceSum( u, v ), size ); - - setLinearSequence( u ); - EXPECT_EQ( u.differenceSum( v ), 0.5 * size * ( size - 1 ) - size ); - EXPECT_EQ( u_view.differenceSum( v_view ), 0.5 * size * ( size - 1 ) - size ); - EXPECT_EQ( VectorOperations::getVectorDifferenceSum( u, v ), 0.5 * size * ( size - 1 ) - size ); - - setNegativeLinearSequence( u ); - EXPECT_EQ( u.differenceSum( v ), - 0.5 * size * ( size - 1 ) - size ); - EXPECT_EQ( u_view.differenceSum( v_view ), - 0.5 * size * ( size - 1 ) - size ); - EXPECT_EQ( VectorOperations::getVectorDifferenceSum( u, v ), - 0.5 * size * ( size - 1 ) - size ); - - setOscilatingSequence( u, 1.0 ); - EXPECT_EQ( u.differenceSum( v ), - size ); - EXPECT_EQ( u_view.differenceSum( v_view ), - size ); - EXPECT_EQ( VectorOperations::getVectorDifferenceSum( u, v ), - size ); -} - -TYPED_TEST( VectorTest, scalarMultiplication ) -{ - using VectorType = typename TestFixture::VectorType; - using VectorOperations = typename TestFixture::VectorOperations; - using ViewType = typename TestFixture::ViewType; - const int size = VECTOR_TEST_SIZE; - - VectorType u( size ); - ViewType u_view( u ); - - typename VectorType::HostType expected; - expected.setSize( size ); - for( int i = 0; i < size; i++ ) - expected[ i ] = 2.0 * i; - - setLinearSequence( u ); - VectorOperations::vectorScalarMultiplication( u, 2.0 ); - EXPECT_EQ( u, expected ); - - setLinearSequence( u ); - u.scalarMultiplication( 2.0 ); - EXPECT_EQ( u, expected ); - - setLinearSequence( u ); - u_view.scalarMultiplication( 2.0 ); - EXPECT_EQ( u, expected ); - - setLinearSequence( u ); - u *= 2.0; - EXPECT_EQ( u, expected ); - - setLinearSequence( u ); - u_view *= 2.0; - EXPECT_EQ( u, expected ); -} - -TYPED_TEST( VectorTest, scalarProduct ) -{ - using VectorType = typename TestFixture::VectorType; - using VectorOperations = typename TestFixture::VectorOperations; - using ViewType = typename TestFixture::ViewType; - // this test expects an odd size - const int size = VECTOR_TEST_SIZE % 2 ? VECTOR_TEST_SIZE : VECTOR_TEST_SIZE - 1; - - VectorType u( size ), v( size ); - ViewType u_view( u ), v_view( v ); - setOscilatingSequence( u, 1.0 ); - setConstantSequence( v, 1 ); - - EXPECT_EQ( u.scalarProduct( v ), 1.0 ); - EXPECT_EQ( u_view.scalarProduct( v_view ), 1.0 ); - EXPECT_EQ( VectorOperations::getScalarProduct( u, v ), 1.0 ); -} - -TYPED_TEST( VectorTest, addVector ) -{ - using VectorType = typename TestFixture::VectorType; - using VectorOperations = typename TestFixture::VectorOperations; - using ViewType = typename TestFixture::ViewType; - const int size = VECTOR_TEST_SIZE; - - VectorType x, y; - x.setSize( size ); - y.setSize( size ); - ViewType x_view( x ), y_view( y ); - - typename VectorType::HostType expected1, expected2; - expected1.setSize( size ); - expected2.setSize( size ); - for( int i = 0; i < size; i++ ) { - expected1[ i ] = 2.0 + 3.0 * i; - expected2[ i ] = 1.0 + 3.0 * i; - } - - setConstantSequence( x, 1 ); - setLinearSequence( y ); - VectorOperations::addVector( x, y, 3.0, 2.0 ); - EXPECT_EQ( x, expected1 ); - - setConstantSequence( x, 1 ); - setLinearSequence( y ); - x.addVector( y, 3.0, 1.0 ); - EXPECT_EQ( x, expected2 ); - - setConstantSequence( x, 1 ); - setLinearSequence( y ); - x_view.addVector( y_view, 3.0, 1.0 ); - EXPECT_EQ( x, expected2 ); - - // multiplication by floating-point scalars which produces integer values - setConstantSequence( x, 2 ); - setConstantSequence( y, 4 ); - x.addVector( y, 2.5, -1.5 ); - EXPECT_EQ( x.min(), 7 ); - EXPECT_EQ( x.max(), 7 ); -} - -TYPED_TEST( VectorTest, addVectors ) -{ - using VectorType = typename TestFixture::VectorType; - using VectorOperations = typename TestFixture::VectorOperations; - using ViewType = typename TestFixture::ViewType; - const int size = VECTOR_TEST_SIZE; - - VectorType x, y, z; - x.setSize( size ); - y.setSize( size ); - z.setSize( size ); - ViewType x_view( x ), y_view( y ), z_view( z ); - - typename VectorType::HostType expected1, expected2; - expected1.setSize( size ); - expected2.setSize( size ); - for( int i = 0; i < size; i++ ) { - expected1[ i ] = 1.0 + 3.0 * i + 2.0; - expected2[ i ] = 2.0 + 3.0 * i + 2.0; - } - - setConstantSequence( x, 1 ); - setLinearSequence( y ); - setConstantSequence( z, 2 ); - VectorOperations::addVectors( x, y, 3.0, z, 1.0, 1.0 ); - EXPECT_EQ( x, expected1 ); - - setConstantSequence( x, 1 ); - setLinearSequence( y ); - setConstantSequence( z, 2 ); - x.addVectors( y, 3.0, z, 1.0, 2.0 ); - EXPECT_EQ( x, expected2 ); - - setConstantSequence( x, 1 ); - setLinearSequence( y ); - setConstantSequence( z, 2 ); - x_view.addVectors( y_view, 3.0, z_view, 1.0, 2.0 ); - EXPECT_EQ( x, expected2 ); - - // multiplication by floating-point scalars which produces integer values - setConstantSequence( x, 2 ); - setConstantSequence( y, 4 ); - setConstantSequence( z, 6 ); - x.addVectors( y, 2.5, z, -1.5, -1.5 ); - EXPECT_EQ( x.min(), -2 ); - EXPECT_EQ( x.max(), -2 ); -} - -TYPED_TEST( VectorTest, prefixSum ) -{ - using VectorType = typename TestFixture::VectorType; - using VectorOperations = typename TestFixture::VectorOperations; - using ViewType = typename TestFixture::ViewType; - const int size = VECTOR_TEST_SIZE; - - VectorType v; - v.setSize( size ); - ViewType v_view( v ); - - setConstantSequence( v, 1 ); - v.computePrefixSum(); - for( int i = 0; i < size; i++ ) - EXPECT_EQ( v.getElement( i ), i + 1 ); - - v.setValue( 0 ); - v.computePrefixSum(); - for( int i = 0; i < size; i++ ) - EXPECT_EQ( v.getElement( i ), 0 ); - - setLinearSequence( v ); - v.computePrefixSum(); - for( int i = 1; i < size; i++ ) - EXPECT_EQ( v.getElement( i ) - v.getElement( i - 1 ), i ); - - setConstantSequence( v, 1 ); - v_view.computePrefixSum(); - for( int i = 0; i < size; i++ ) - EXPECT_EQ( v.getElement( i ), i + 1 ); - - v.setValue( 0 ); - v_view.computePrefixSum(); - for( int i = 0; i < size; i++ ) - EXPECT_EQ( v.getElement( i ), 0 ); - - setLinearSequence( v ); - v_view.computePrefixSum(); - for( int i = 1; i < size; i++ ) - EXPECT_EQ( v.getElement( i ) - v.getElement( i - 1 ), i ); -} - -TYPED_TEST( VectorTest, exclusivePrefixSum ) -{ - using VectorType = typename TestFixture::VectorType; - using VectorOperations = typename TestFixture::VectorOperations; - using ViewType = typename TestFixture::ViewType; - const int size = VECTOR_TEST_SIZE; - - VectorType v; - v.setSize( size ); - ViewType v_view( v ); - - setConstantSequence( v, 1 ); - v.computeExclusivePrefixSum(); - for( int i = 0; i < size; i++ ) - EXPECT_EQ( v.getElement( i ), i ); - - v.setValue( 0 ); - v.computeExclusivePrefixSum(); - for( int i = 0; i < size; i++ ) - EXPECT_EQ( v.getElement( i ), 0 ); - - setLinearSequence( v ); - v.computeExclusivePrefixSum(); - for( int i = 1; i < size; i++ ) - EXPECT_EQ( v.getElement( i ) - v.getElement( i - 1 ), i - 1 ); - - setConstantSequence( v, 1 ); - v_view.computeExclusivePrefixSum(); - for( int i = 0; i < size; i++ ) - EXPECT_EQ( v.getElement( i ), i ); - - v.setValue( 0 ); - v_view.computeExclusivePrefixSum(); - for( int i = 0; i < size; i++ ) - EXPECT_EQ( v.getElement( i ), 0 ); - - setLinearSequence( v ); - v_view.computeExclusivePrefixSum(); - for( int i = 1; i < size; i++ ) - EXPECT_EQ( v.getElement( i ) - v.getElement( i - 1 ), i - 1 ); -} - -TYPED_TEST( VectorTest, abs ) -{ - using VectorType = typename TestFixture::VectorType; - using ViewType = typename TestFixture::ViewType; - const int size = VECTOR_TEST_SIZE; - - VectorType _u( size ), _v( size ); - ViewType u( _u ), v( _v ); - for( int i = 0; i < size; i++ ) - u[ i ] = i; - - v = -u; - EXPECT_EQ( abs( v ), u ); -} - -TYPED_TEST( VectorTest, sin ) -{ - using VectorType = typename TestFixture::VectorType; - using ViewType = typename TestFixture::ViewType; - const int size = VECTOR_TEST_SIZE; - - VectorType _u( size ), _v( size ); - ViewType u( _u ), v( _v ); - for( int i = 0; i < size; i++ ) - { - u[ i ] = i - size / 2; - v[ i ] = TNL::sin( u[ i ] ); - } - - EXPECT_EQ( sin( u ), v ); -} - -//// -// Performing all test leads to extremely long compilation time with nvcc -// TODO: Try to fix it somehow -/* -TYPED_TEST( VectorTest, cos ) -{ - using VectorType = typename TestFixture::VectorType; - using ViewType = typename TestFixture::ViewType; - const int size = VECTOR_TEST_SIZE; - - VectorType _u( size ), _v( size ); - ViewType u( _u ), v( _v ); - for( int i = 0; i < size; i++ ) - { - u[ i ] = i - size / 2; - v[ i ] = TNL::cos( u[ i ] ); - } - - EXPECT_EQ( cos( u ), v ); -} - -TYPED_TEST( VectorTest, tan ) -{ - using VectorType = typename TestFixture::VectorType; - using ViewType = typename TestFixture::ViewType; - const int size = VECTOR_TEST_SIZE; - - VectorType _u( size ), _v( size ); - ViewType u( _u ), v( _v ); - for( int i = 0; i < size; i++ ) - { - u[ i ] = i - size / 2; - v[ i ] = TNL::tan( u[ i ] ); - } - - EXPECT_EQ( tan( u ), v ); -} - -TYPED_TEST( VectorTest, sqrt ) -{ - using VectorType = typename TestFixture::VectorType; - using ViewType = typename TestFixture::ViewType; - const int size = VECTOR_TEST_SIZE; - - VectorType _u( size ), _v( size ); - ViewType u( _u ), v( _v ); - for( int i = 0; i < size; i++ ) - { - u[ i ] = i; - v[ i ] = TNL::sqrt( u[ i ] ); - } - - EXPECT_EQ( sqrt( u ), v ); -} - -TYPED_TEST( VectorTest, cbrt ) -{ - using VectorType = typename TestFixture::VectorType; - using ViewType = typename TestFixture::ViewType; - const int size = VECTOR_TEST_SIZE; - - VectorType _u( size ), _v( size ); - ViewType u( _u ), v( _v ); - for( int i = 0; i < size; i++ ) - { - u[ i ] = i; - v[ i ] = TNL::cbrt( u[ i ] ); - } - - EXPECT_EQ( cbrt( u ), v ); -} - -TYPED_TEST( VectorTest, pow ) -{ - using VectorType = typename TestFixture::VectorType; - using ViewType = typename TestFixture::ViewType; - using RealType = typename VectorType::RealType; - const int size = VECTOR_TEST_SIZE; - - VectorType _u( size ), _v( size ), _w( size ); - ViewType u( _u ), v( _v ), w( _w ); - for( int i = 0; i < size; i++ ) - { - u[ i ] = i - size / 2; - v[ i ] = TNL::pow( u[ i ], 2.0 ); - w[ i ] = TNL::pow( u[ i ], 3.0 ); - } - - EXPECT_EQ( pow( u, 2.0 ), v ); - EXPECT_EQ( pow( u, 3.0 ), w ); -} - -TYPED_TEST( VectorTest, floor ) -{ - using VectorType = typename TestFixture::VectorType; - using ViewType = typename TestFixture::ViewType; - const int size = VECTOR_TEST_SIZE; - - VectorType _u( size ), _v( size ); - ViewType u( _u ), v( _v ); - for( int i = 0; i < size; i++ ) - { - u[ i ] = i - size / 2; - v[ i ] = TNL::floor( u[ i ] ); - } - - EXPECT_EQ( floor( u ), v ); -} - -TYPED_TEST( VectorTest, ceil ) -{ - using VectorType = typename TestFixture::VectorType; - using ViewType = typename TestFixture::ViewType; - const int size = VECTOR_TEST_SIZE; - - VectorType _u( size ), _v( size ); - ViewType u( _u ), v( _v ); - for( int i = 0; i < size; i++ ) - { - u[ i ] = i - size / 2; - v[ i ] = TNL::ceil( u[ i ] ); - } - - EXPECT_EQ( ceil( u ), v ); -} - -TYPED_TEST( VectorTest, acos ) -{ - using VectorType = typename TestFixture::VectorType; - using ViewType = typename TestFixture::ViewType; - const int size = VECTOR_TEST_SIZE; - - VectorType _u( size ), _v( size ); - ViewType u( _u ), v( _v ); - for( int i = 0; i < size; i++ ) - { - u[ i ] = ( double )( i - size / 2 ) / ( double ) size; - v[ i ] = TNL::acos( u[ i ] ); - } - - EXPECT_EQ( acos( u ), v ); -} - -TYPED_TEST( VectorTest, asin ) -{ - using VectorType = typename TestFixture::VectorType; - using ViewType = typename TestFixture::ViewType; - const int size = VECTOR_TEST_SIZE; - - VectorType _u( size ), _v( size ); - ViewType u( _u ), v( _v ); - for( int i = 0; i < size; i++ ) - { - u[ i ] = ( double ) ( i - size / 2 ) / ( double ) size; - v[ i ] = TNL::asin( u[ i ] ); - } - - EXPECT_EQ( asin( u ), v ); -} - -TYPED_TEST( VectorTest, atan ) -{ - using VectorType = typename TestFixture::VectorType; - using ViewType = typename TestFixture::ViewType; - const int size = VECTOR_TEST_SIZE; - - VectorType _u( size ), _v( size ); - ViewType u( _u ), v( _v ); - for( int i = 0; i < size; i++ ) - { - u[ i ] = i - size / 2; - v[ i ] = TNL::atan( u[ i ] ); - } - - EXPECT_EQ( atan( u ), v ); -} - -TYPED_TEST( VectorTest, cosh ) -{ - using VectorType = typename TestFixture::VectorType; - using ViewType = typename TestFixture::ViewType; - using RealType = typename VectorType::RealType; - const int size = VECTOR_TEST_SIZE; - - RealType h = 2.0 / ( RealType ) size; - VectorType _u( size ), _v( size ); - ViewType u( _u ), v( _v ); - for( int i = 0; i < size; i++ ) - { - u[ i ] = i * h - ( RealType ) 1.0; - v[ i ] = TNL::cosh( u[ i ] ); - } - - // EXPECT_EQ( cosh( u ), v ) does not work here for float, maybe because - // of some fast-math optimization - for( int i = 0; i < size; i++ ) - EXPECT_NEAR( cosh( u )[ i ], v[ i ], 1.0e-6 ); -} - -TYPED_TEST( VectorTest, tanh ) -{ - using VectorType = typename TestFixture::VectorType; - using ViewType = typename TestFixture::ViewType; - const int size = VECTOR_TEST_SIZE; - - VectorType _u( size ), _v( size ); - ViewType u( _u ), v( _v ); - for( int i = 0; i < size; i++ ) - { - u[ i ] = i - size / 2; - v[ i ] = TNL::tanh( u[ i ] ); - } - - EXPECT_EQ( tanh( u ), v ); -} - -TYPED_TEST( VectorTest, log ) -{ - using VectorType = typename TestFixture::VectorType; - using ViewType = typename TestFixture::ViewType; - const int size = VECTOR_TEST_SIZE; - - VectorType _u( size ), _v( size ); - ViewType u( _u ), v( _v ); - for( int i = 0; i < size; i++ ) - { - u[ i ] = i + 1; - v[ i ] = TNL::log( u[ i ] ); - } - - EXPECT_EQ( log( u ), v ); -} - -TYPED_TEST( VectorTest, log10 ) -{ - using VectorType = typename TestFixture::VectorType; - using ViewType = typename TestFixture::ViewType; - const int size = VECTOR_TEST_SIZE; - - VectorType _u( size ), _v( size ); - ViewType u( _u ), v( _v ); - for( int i = 0; i < size; i++ ) - { - u[ i ] = i + 1; - v[ i ] = TNL::log10( u[ i ] ); - } - - // EXPECT_EQ( log10( u ), v ) does not work here for float, maybe because - // of some fast-math optimization - for( int i = 0; i < size; i++ ) - EXPECT_NEAR( log10( u )[ i ], v[ i ], 1.0e-6 ); -} - -TYPED_TEST( VectorTest, log2 ) -{ - using VectorType = typename TestFixture::VectorType; - using ViewType = typename TestFixture::ViewType; - const int size = VECTOR_TEST_SIZE; - - VectorType _u( size ), _v( size ); - ViewType u( _u ), v( _v ); - for( int i = 0; i < size; i++ ) - { - u[ i ] = i + 1; - v[ i ] = TNL::log2( u[ i ] ); - } - - EXPECT_EQ( log2( u ), v ); -} - -TYPED_TEST( VectorTest, exp ) -{ - using VectorType = typename TestFixture::VectorType; - using ViewType = typename TestFixture::ViewType; - const int size = VECTOR_TEST_SIZE; - - VectorType _u( size ), _v( size ); - ViewType u( _u ), v( _v ); - for( int i = 0; i < size; i++ ) - { - u[ i ] = i - size / 2; - v[ i ] = TNL::exp( u[ i ] ); - } - - EXPECT_EQ( exp( u ), v ); -} - -TYPED_TEST( VectorTest, sign ) -{ - using VectorType = typename TestFixture::VectorType; - using ViewType = typename TestFixture::ViewType; - const int size = VECTOR_TEST_SIZE; - - VectorType _u( size ), _v( size ); - ViewType u( _u ), v( _v ); - for( int i = 0; i < size; i++ ) - { - u[ i ] = i - size / 2; - v[ i ] = TNL::sign( u[ i ] ); - } - - EXPECT_EQ( sign( u ), v ); -} -*/ - -// TODO: test prefix sum with custom begin and end parameters - -TEST( VectorSpecialCasesTest, sumOfBoolVector ) -{ - using VectorType = Containers::Vector< bool, Devices::Host >; - using ViewType = VectorView< bool, Devices::Host >; - const float epsilon = 64 * std::numeric_limits< float >::epsilon(); - - VectorType v( 512 ), w( 512 ); - ViewType v_view( v ), w_view( w ); - v.setValue( true ); - w.setValue( false ); - - const int sum = v.sum< int >(); - const int l1norm = v.lpNorm< int >( 1.0 ); - const float l2norm = v.lpNorm< float >( 2.0 ); - const float l3norm = v.lpNorm< float >( 3.0 ); - EXPECT_EQ( sum, 512 ); - EXPECT_EQ( l1norm, 512 ); - EXPECT_NEAR( l2norm, std::sqrt( 512 ), epsilon ); - EXPECT_NEAR( l3norm, std::cbrt( 512 ), epsilon ); - - const int diff_sum = v.differenceSum< int >( w ); - const int diff_l1norm = v.differenceLpNorm< int >( w, 1.0 ); - const float diff_l2norm = v.differenceLpNorm< float >( w, 2.0 ); - const float diff_l3norm = v.differenceLpNorm< float >( w, 3.0 ); - EXPECT_EQ( diff_sum, 512 ); - EXPECT_EQ( diff_l1norm, 512 ); - EXPECT_NEAR( diff_l2norm, std::sqrt( 512 ), epsilon ); - EXPECT_NEAR( diff_l3norm, std::cbrt( 512 ), epsilon ); - - // test views - const int sum_view = v_view.sum< int >(); - const int l1norm_view = v_view.lpNorm< int >( 1.0 ); - const float l2norm_view = v_view.lpNorm< float >( 2.0 ); - const float l3norm_view = v_view.lpNorm< float >( 3.0 ); - EXPECT_EQ( sum_view, 512 ); - EXPECT_EQ( l1norm_view, 512 ); - EXPECT_NEAR( l2norm_view, std::sqrt( 512 ), epsilon ); - EXPECT_NEAR( l3norm_view, std::cbrt( 512 ), epsilon ); - - const int diff_sum_view = v_view.differenceSum< int >( w_view ); - const int diff_l1norm_view = v_view.differenceLpNorm< int >( w_view, 1.0 ); - const float diff_l2norm_view = v_view.differenceLpNorm< float >( w_view, 2.0 ); - const float diff_l3norm_view = v_view.differenceLpNorm< float >( w_view, 3.0 ); - EXPECT_EQ( diff_sum_view, 512 ); - EXPECT_EQ( diff_l1norm_view, 512 ); - EXPECT_NEAR( diff_l2norm_view, std::sqrt( 512 ), epsilon ); - EXPECT_NEAR( diff_l3norm_view, std::cbrt( 512 ), epsilon ); -} - -TEST( VectorSpecialCasesTest, assignmentThroughView ) -{ - using VectorType = Containers::Vector< int, Devices::Host >; - using ViewType = VectorView< int, Devices::Host >; - - static_assert( Algorithms::Details::HasSubscriptOperator< VectorType >::value, "Subscript operator detection by SFINAE does not work for Vector." ); - static_assert( Algorithms::Details::HasSubscriptOperator< ViewType >::value, "Subscript operator detection by SFINAE does not work for VectorView." ); - - VectorType u( 100 ), v( 100 ); - ViewType u_view( u ), v_view( v ); - - u.setValue( 42 ); - v.setValue( 0 ); - v_view = u_view; - EXPECT_EQ( u_view.getData(), u.getData() ); - EXPECT_EQ( v_view.getData(), v.getData() ); - for( int i = 0; i < 100; i++ ) - EXPECT_EQ( v_view[ i ], 42 ); - - u.setValue( 42 ); - v.setValue( 0 ); - v_view = u; - EXPECT_EQ( u_view.getData(), u.getData() ); - EXPECT_EQ( v_view.getData(), v.getData() ); - for( int i = 0; i < 100; i++ ) - EXPECT_EQ( v_view[ i ], 42 ); -} - -TEST( VectorSpecialCasesTest, operationsOnConstView ) -{ - using VectorType = Containers::Vector< int, Devices::Host >; - using ViewType = VectorView< const int, Devices::Host >; - - VectorType u( 100 ), v( 100 ); - ViewType u_view( u ), v_view( v ); - - u.setValue( 1 ); - v.setValue( 1 ); - - EXPECT_EQ( u_view.max(), 1 ); - EXPECT_EQ( u_view.min(), 1 ); - EXPECT_EQ( u_view.absMax(), 1 ); - EXPECT_EQ( u_view.absMin(), 1 ); - EXPECT_EQ( u_view.lpNorm( 1 ), 100 ); - EXPECT_EQ( u_view.differenceMax( v_view ), 0 ); - EXPECT_EQ( u_view.differenceMin( v_view ), 0 ); - EXPECT_EQ( u_view.differenceAbsMax( v_view ), 0 ); - EXPECT_EQ( u_view.differenceAbsMin( v_view ), 0 ); - EXPECT_EQ( u_view.differenceLpNorm( v_view, 1 ), 0 ); - EXPECT_EQ( u_view.differenceSum( v_view ), 0 ); - EXPECT_EQ( u_view.scalarProduct( v_view ), 100 ); -} - -TEST( VectorSpecialCasesTest, initializationOfVectorViewByArrayView ) -{ - using ArrayType = Containers::Array< int, Devices::Host >; - using VectorViewType = VectorView< const int, Devices::Host >; - using ArrayViewType = ArrayView< int, Devices::Host >; - - ArrayType a( 100 ); - a.setValue( 0 ); - ArrayViewType a_view( a ); - - VectorViewType v_view( a_view ); - EXPECT_EQ( v_view.getData(), a_view.getData() ); - EXPECT_EQ( v_view.sum(), 0 ); -} - -TEST( VectorSpecialCasesTest, defaultConstructors ) -{ - using ArrayType = Containers::Array< int, Devices::Host >; - using VectorViewType = VectorView< int, Devices::Host >; - using ArrayViewType = ArrayView< int, Devices::Host >; - - ArrayType a( 100 ); - a.setValue( 0 ); - - ArrayViewType a_view; - a_view.bind( a ); - - VectorViewType v_view; - v_view.bind( a ); - EXPECT_EQ( v_view.getData(), a_view.getData() ); -} - - - - - -#endif // HAVE_GTEST - - -#include "../GtestMissingError.h" -int main( int argc, char* argv[] ) -{ - //Test(); - //return 0; -#ifdef HAVE_GTEST - ::testing::InitGoogleTest( &argc, argv ); - return RUN_ALL_TESTS(); -#else - throw GtestMissingError(); -#endif -} diff --git a/src/UnitTests/Containers/VectorTestSetup.h b/src/UnitTests/Containers/VectorTestSetup.h new file mode 100644 index 000000000..870db0f25 --- /dev/null +++ b/src/UnitTests/Containers/VectorTestSetup.h @@ -0,0 +1,143 @@ +/*************************************************************************** + VectorTestSetup.h - description + ------------------- + begin : Apr 30, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +// NOTE: Vector = Array + VectorOperations, so we test Vector and VectorOperations at the same time + +#pragma once + +#ifdef HAVE_GTEST +#include + +#include +#include +#include + +#include "gtest/gtest.h" + +using namespace TNL; +using namespace TNL::Containers; +using namespace TNL::Containers::Algorithms; +using namespace TNL::Arithmetics; + +template< typename Vector > +void setLinearSequence( Vector& deviceVector ) +{ + typename Vector::HostType a; + a.setLike( deviceVector ); + for( int i = 0; i < a.getSize(); i++ ) + a[ i ] = i; + deviceVector = a; +} + +template< typename Vector > +void setConstantSequence( Vector& deviceVector, + typename Vector::RealType v ) +{ + deviceVector.setValue( v ); +} + +template< typename Vector > +void setNegativeLinearSequence( Vector& deviceVector ) +{ + typename Vector::HostType a; + a.setLike( deviceVector ); + for( int i = 0; i < a.getSize(); i++ ) + a[ i ] = -i; + deviceVector = a; +} + +template< typename Vector > +void setOscilatingSequence( Vector& deviceVector, + typename Vector::RealType v ) +{ + typename Vector::HostType a; + a.setLike( deviceVector ); + a[ 0 ] = v; + for( int i = 1; i < a.getSize(); i++ ) + a[ i ] = a[ i-1 ] * -1; + deviceVector = a; +} + + +// TODO: test everything with OpenMP with different number of threads + +// test fixture for typed tests +template< typename Vector > +class VectorTest : public ::testing::Test +{ +protected: + using VectorType = Vector; + using VectorOperations = Algorithms::VectorOperations< typename VectorType::DeviceType >; + using ViewType = VectorView< typename Vector::RealType, typename Vector::DeviceType, typename Vector::IndexType >; +}; + +// types for which VectorTest is instantiated +// TODO: Quad must be fixed +using VectorTypes = ::testing::Types< +#ifndef HAVE_CUDA + Vector< int, Devices::Host, short >, + Vector< long, Devices::Host, short >, + Vector< float, Devices::Host, short >, + Vector< double, Devices::Host, short >, + //Vector< Quad< float >, Devices::Host, short >, + //Vector< Quad< double >, Devices::Host, short >, + Vector< int, Devices::Host, int >, + Vector< long, Devices::Host, int >, + Vector< float, Devices::Host, int >, + Vector< double, Devices::Host, int >, + //Vector< Quad< float >, Devices::Host, int >, + //Vector< Quad< double >, Devices::Host, int >, + Vector< int, Devices::Host, long >, + Vector< long, Devices::Host, long >, + Vector< float, Devices::Host, long >, + Vector< double, Devices::Host, long > + //Vector< Quad< float >, Devices::Host, long >, + //Vector< Quad< double >, Devices::Host, long > +#endif +#ifdef HAVE_CUDA + Vector< int, Devices::Cuda, short >, + Vector< long, Devices::Cuda, short >, + Vector< float, Devices::Cuda, short >, + Vector< double, Devices::Cuda, short >, + //Vector< Quad< float >, Devices::Cuda, short >, + //Vector< Quad< double >, Devices::Cuda, short >, + Vector< int, Devices::Cuda, int >, + Vector< long, Devices::Cuda, int >, + Vector< float, Devices::Cuda, int >, + Vector< double, Devices::Cuda, int >, + //Vector< Quad< float >, Devices::Cuda, int >, + //Vector< Quad< double >, Devices::Cuda, int >, + Vector< int, Devices::Cuda, long >, + Vector< long, Devices::Cuda, long >, + Vector< float, Devices::Cuda, long >, + Vector< double, Devices::Cuda, long > + //Vector< Quad< float >, Devices::Cuda, long >, + //Vector< Quad< double >, Devices::Cuda, long > +#endif +#ifdef HAVE_MIC + , + Vector< int, Devices::MIC, short >, + Vector< long, Devices::MIC, short >, + Vector< float, Devices::MIC, short >, + Vector< double, Devices::MIC, short >, + Vector< int, Devices::MIC, int >, + Vector< long, Devices::MIC, int >, + Vector< float, Devices::MIC, int >, + Vector< double, Devices::MIC, int >, + Vector< int, Devices::MIC, long >, + Vector< long, Devices::MIC, long >, + Vector< float, Devices::MIC, long >, + Vector< double, Devices::MIC, long > +#endif +>; + +TYPED_TEST_SUITE( VectorTest, VectorTypes ); + +#endif \ No newline at end of file -- GitLab From c3ee69fc1f5d406c205f359522767f57b2862a89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Wed, 1 May 2019 22:08:19 +0200 Subject: [PATCH 23/93] Splitting statuc and non-static expression templates. --- src/TNL/Containers/Expressions/Comparison.h | 2 +- .../Expressions/ExpressionTemplates.h | 340 +--- src/TNL/Containers/Expressions/IsStatic.h | 33 + .../Expressions/StaticExpressionTemplates.h | 1697 +++++++++++++++++ src/TNL/Containers/StaticVector.h | 18 +- src/TNL/Containers/StaticVector1D_impl.h | 10 +- src/TNL/Containers/StaticVector2D_impl.h | 10 +- src/TNL/Containers/StaticVector3D_impl.h | 10 +- src/TNL/Containers/StaticVectorExpressions.h | 164 +- src/TNL/Containers/StaticVector_impl.h | 10 +- src/TNL/Containers/VectorView.h | 2 + src/UnitTests/Containers/VectorTest-1.h | 16 + 12 files changed, 1915 insertions(+), 397 deletions(-) create mode 100644 src/TNL/Containers/Expressions/IsStatic.h create mode 100644 src/TNL/Containers/Expressions/StaticExpressionTemplates.h diff --git a/src/TNL/Containers/Expressions/Comparison.h b/src/TNL/Containers/Expressions/Comparison.h index 7265adb79..a39c82a4a 100644 --- a/src/TNL/Containers/Expressions/Comparison.h +++ b/src/TNL/Containers/Expressions/Comparison.h @@ -1,5 +1,5 @@ /*************************************************************************** - StaticComparison.h - description + Comparison.h - description ------------------- begin : Apr 19, 2019 copyright : (C) 2019 by Tomas Oberhuber diff --git a/src/TNL/Containers/Expressions/ExpressionTemplates.h b/src/TNL/Containers/Expressions/ExpressionTemplates.h index 537a74e6b..194b707e4 100644 --- a/src/TNL/Containers/Expressions/ExpressionTemplates.h +++ b/src/TNL/Containers/Expressions/ExpressionTemplates.h @@ -14,190 +14,37 @@ #include #include #include +#include namespace TNL { namespace Containers { namespace Expressions { -template< typename T > -struct IsStaticType -{ - static constexpr bool value = false; -}; - +//// +// Non-static unary expression template template< typename T1, template< typename > class Operation, typename Parameter = void, - ExpressionVariableType T1Type = ExpressionVariableTypeGetter< T1 >::value, - bool StaticET = IsStaticType< T1 >::value > + ExpressionVariableType T1Type = ExpressionVariableTypeGetter< T1 >::value > struct UnaryExpressionTemplate { }; +//// +// Non-static binary expression template template< typename T1, typename T2, template< typename, typename > class Operation, ExpressionVariableType T1Type = ExpressionVariableTypeGetter< T1 >::value, - ExpressionVariableType T2Type = ExpressionVariableTypeGetter< T2 >::value, - bool StaticET = IsStaticType< T1 >::value || IsStaticType< T2 >::value > + ExpressionVariableType T2Type = ExpressionVariableTypeGetter< T2 >::value > struct BinaryExpressionTemplate { }; -template< int Size, - typename Real > -struct IsStaticType< StaticVector< Size, Real > > -{ - static constexpr bool value = true; -}; - -template< typename T1, - template< typename > class Operation, - typename Parameter > -struct IsStaticType< UnaryExpressionTemplate< T1, Operation, Parameter > > -{ - static constexpr bool value = UnaryExpressionTemplate< T1, Operation, Parameter >::isStatic(); -}; - -template< typename T1, - typename T2, - template< typename, typename > class Operation > -struct IsStaticType< BinaryExpressionTemplate< T1, T2, Operation > > -{ - static constexpr bool value = BinaryExpressionTemplate< T1, T2, Operation >::isStatic(); -}; - - -//// -// Static binary expression template -template< typename T1, - typename T2, - template< typename, typename > class Operation > -struct BinaryExpressionTemplate< T1, T2, Operation, VectorVariable, VectorVariable, true > -{ - using RealType = typename T1::RealType; - using IsExpressionTemplate = bool; - static_assert( IsStaticType< T1 >::value == IsStaticType< T2 >::value, "Attempt to mix static and non-static operands in binary expression templates" ); - static constexpr bool isStatic() { return true; } - - __cuda_callable__ - BinaryExpressionTemplate( const T1& a, const T2& b ): op1( a ), op2( b ){} - - __cuda_callable__ - static BinaryExpressionTemplate evaluate( const T1& a, const T2& b ) - { - return BinaryExpressionTemplate( a, b ); - } - - RealType getElement( const int i ) const - { - return Operation< typename T1::RealType, typename T2::RealType >::evaluate( op1[ i ], op2[ i ] ); - } - - __cuda_callable__ - RealType operator[]( const int i ) const - { - return Operation< typename T1::RealType, typename T2::RealType >::evaluate( op1[ i ], op2[ i ] ); - } - - __cuda_callable__ - int getSize() const - { - return op1.getSize(); - } - - protected: - const T1 &op1; - const T2 &op2; -}; - -template< typename T1, - typename T2, - template< typename, typename > class Operation > -struct BinaryExpressionTemplate< T1, T2, Operation, VectorVariable, ArithmeticVariable, true > -{ - using RealType = typename T1::RealType; - using IsExpressionTemplate = bool; - static constexpr bool isStatic() { return true; } - - __cuda_callable__ - BinaryExpressionTemplate( const T1& a, const T2& b ): op1( a ), op2( b ){} - - __cuda_callable__ - BinaryExpressionTemplate evaluate( const T1& a, const T2& b ) - { - return BinaryExpressionTemplate( a, b ); - } - - RealType getElement( const int i ) const - { - return Operation< typename T1::RealType, T2 >::evaluate( op1[ i ], op2 ); - } - - __cuda_callable__ - RealType operator[]( const int i ) const - { - return Operation< typename T1::RealType, T2 >::evaluate( op1[ i ], op2 ); - } - - __cuda_callable__ - int getSize() const - { - return op1.getSize(); - } - - protected: - const T1 &op1; - const T2 &op2; - -}; - -template< typename T1, - typename T2, - template< typename, typename > class Operation > -struct BinaryExpressionTemplate< T1, T2, Operation, ArithmeticVariable, VectorVariable, true > -{ - using RealType = typename T2::RealType; - using IsExpressionTemplate = bool; - static constexpr bool isStatic() { return true; } - - __cuda_callable__ - BinaryExpressionTemplate( const T1& a, const T2& b ): op1( a ), op2( b ){} - - __cuda_callable__ - BinaryExpressionTemplate evaluate( const T1& a, const T2& b ) - { - return BinaryExpressionTemplate( a, b ); - } - - RealType getElement( const int i ) const - { - return Operation< T1, typename T2::RealType >::evaluate( op1, op2[ i ] ); - } - - __cuda_callable__ - RealType operator[]( const int i ) const - { - return Operation< T1, typename T2::RealType >::evaluate( op1, op2[ i ] ); - } - - __cuda_callable__ - int getSize() const - { - return op2.getSize(); - } - - protected: - const T1& op1; - const T2& op2; -}; - -//// -// Non-static binary expression template template< typename T1, typename T2, template< typename, typename > class Operation > -struct BinaryExpressionTemplate< T1, T2, Operation, VectorVariable, VectorVariable, false > +struct BinaryExpressionTemplate< T1, T2, Operation, VectorVariable, VectorVariable > { using RealType = typename T1::RealType; using DeviceType = typename T1::DeviceType; @@ -240,7 +87,7 @@ struct BinaryExpressionTemplate< T1, T2, Operation, VectorVariable, VectorVariab template< typename T1, typename T2, template< typename, typename > class Operation > -struct BinaryExpressionTemplate< T1, T2, Operation, VectorVariable, ArithmeticVariable, false > +struct BinaryExpressionTemplate< T1, T2, Operation, VectorVariable, ArithmeticVariable > { using RealType = typename T1::RealType; using DeviceType = typename T1::DeviceType; @@ -274,14 +121,16 @@ struct BinaryExpressionTemplate< T1, T2, Operation, VectorVariable, ArithmeticVa } protected: - typename OperandType< T1, DeviceType >::type op1; - typename OperandType< T2, DeviceType >::type op2; + const T1 op1; + const T2 op2; + //typename OperandType< T1, DeviceType >::type op1; + //typename OperandType< T2, DeviceType >::type op2; }; template< typename T1, typename T2, template< typename, typename > class Operation > -struct BinaryExpressionTemplate< T1, T2, Operation, ArithmeticVariable, VectorVariable, false > +struct BinaryExpressionTemplate< T1, T2, Operation, ArithmeticVariable, VectorVariable > { using RealType = typename T2::RealType; using DeviceType = typename T2::DeviceType; @@ -321,96 +170,6 @@ struct BinaryExpressionTemplate< T1, T2, Operation, ArithmeticVariable, VectorVa //typename OperandType< T2, DeviceType >::type op2; }; -//// -// Static unary expression template -// -// Parameter type serves mainly for pow( base, exp ). Here exp is parameter we need -// to pass to pow. -template< typename T1, - template< typename > class Operation, - typename Parameter > -struct UnaryExpressionTemplate< T1, Operation, Parameter, VectorVariable, true > -{ - using RealType = typename T1::RealType; - using IsExpressionTemplate = bool; - static constexpr bool isStatic() { return true; } - - __cuda_callable__ - UnaryExpressionTemplate( const T1& a, const Parameter& p ) - : operand( a ), parameter( p ) {} - - __cuda_callable__ - static UnaryExpressionTemplate evaluate( const T1& a ) - { - return UnaryExpressionTemplate( a ); - } - - RealType getElement( const int i ) const - { - return Operation< typename T1::RealType >::evaluate( operand[ i ], parameter ); - } - - __cuda_callable__ - RealType operator[]( const int i ) const - { - return Operation< typename T1::RealType >::evaluate( operand[ i ], parameter ); - } - - __cuda_callable__ - int getSize() const - { - return operand.getSize(); - } - - void set( const Parameter& p ) { parameter = p; } - - const Parameter& get() { return parameter; } - - protected: - const T1& operand; - Parameter parameter; -}; - -//// -// Static unary expression template with no parameter -template< typename T1, - template< typename > class Operation > -struct UnaryExpressionTemplate< T1, Operation, void, VectorVariable, true > -{ - using RealType = typename T1::RealType; - using IsExpressionTemplate = bool; - static constexpr bool isStatic() { return true; } - - __cuda_callable__ - UnaryExpressionTemplate( const T1& a ): operand( a ){} - - __cuda_callable__ - static UnaryExpressionTemplate evaluate( const T1& a ) - { - return UnaryExpressionTemplate( a ); - } - - RealType getElement( const int i ) const - { - return Operation< typename T1::RealType >::evaluate( operand[ i ] ); - } - - __cuda_callable__ - RealType operator[]( const int i ) const - { - return Operation< typename T1::RealType >::evaluate( operand[ i ] ); - } - - __cuda_callable__ - int getSize() const - { - return operand.getSize(); - } - - protected: - const T1& operand; -}; - //// // Non-static unary expression template // @@ -419,7 +178,7 @@ struct UnaryExpressionTemplate< T1, Operation, void, VectorVariable, true > template< typename T1, template< typename > class Operation, typename Parameter > -struct UnaryExpressionTemplate< T1, Operation, Parameter, VectorVariable, false > +struct UnaryExpressionTemplate< T1, Operation, Parameter, VectorVariable > { using RealType = typename T1::RealType; using DeviceType = typename T1::DeviceType; @@ -466,7 +225,7 @@ struct UnaryExpressionTemplate< T1, Operation, Parameter, VectorVariable, false // Non-static unary expression template with no parameter template< typename T1, template< typename > class Operation > -struct UnaryExpressionTemplate< T1, Operation, void, VectorVariable, false > +struct UnaryExpressionTemplate< T1, Operation, void, VectorVariable > { using RealType = typename T1::RealType; using DeviceType = typename T1::DeviceType; @@ -978,7 +737,7 @@ bool operator == ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, const Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) { - return Expressions::StaticComparisonEQ( a, b ); + return Expressions::ComparisonEQ( a, b ); } template< typename T1, @@ -989,7 +748,7 @@ bool operator == ( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, const typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) { - return Expressions::StaticComparisonEQ( a, b ); + return Expressions::ComparisonEQ( a, b ); } template< typename L1, @@ -1002,7 +761,7 @@ bool operator == ( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a, const typename Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) { - return Expressions::StaticComparisonEQ( a, b ); + return Expressions::ComparisonEQ( a, b ); } template< typename L1, @@ -1015,7 +774,7 @@ bool operator == ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, const typename Expressions::UnaryExpressionTemplate< R1,ROperation >& b ) { - return Expressions::StaticComparisonEQ( a, b ); + return Expressions::ComparisonEQ( a, b ); } //// @@ -1031,7 +790,7 @@ bool operator != ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, const Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) { - return Expressions::StaticComparisonNE( a, b ); + return Expressions::ComparisonNE( a, b ); } template< typename T1, @@ -1042,7 +801,7 @@ bool operator != ( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, const typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) { - return Expressions::StaticComparisonNE( a, b ); + return Expressions::ComparisonNE( a, b ); } template< typename L1, @@ -1055,7 +814,7 @@ bool operator != ( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a, const typename Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) { - return Expressions::StaticComparisonNE( a, b ); + return Expressions::ComparisonNE( a, b ); } template< typename L1, @@ -1068,7 +827,7 @@ bool operator != ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, const typename Expressions::UnaryExpressionTemplate< R1, ROperation >& b ) { - return Expressions::StaticComparisonNE( a, b ); + return Expressions::ComparisonNE( a, b ); } //// @@ -1084,7 +843,7 @@ bool operator < ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, const Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) { - return Expressions::StaticComparisonLT( a, b ); + return Expressions::ComparisonLT( a, b ); } template< typename T1, @@ -1095,7 +854,7 @@ bool operator < ( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, const typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) { - return Expressions::StaticComparisonLT( a, b ); + return Expressions::ComparisonLT( a, b ); } template< typename L1, @@ -1108,7 +867,7 @@ bool operator < ( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a, const typename Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) { - return Expressions::StaticComparisonLT( a, b ); + return Expressions::ComparisonLT( a, b ); } template< typename L1, @@ -1121,7 +880,7 @@ bool operator < ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, const typename Expressions::UnaryExpressionTemplate< R1, ROperation >& b ) { - return Expressions::StaticComparisonLT( a, b ); + return Expressions::ComparisonLT( a, b ); } //// @@ -1137,7 +896,7 @@ bool operator <= ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, const Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) { - return Expressions::StaticComparisonLE( a, b ); + return Expressions::ComparisonLE( a, b ); } template< typename T1, @@ -1148,7 +907,7 @@ bool operator <= ( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, const typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) { - return Expressions::StaticComparisonLE( a, b ); + return Expressions::ComparisonLE( a, b ); } template< typename L1, @@ -1161,7 +920,7 @@ bool operator <= ( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a, const typename Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) { - return Expressions::StaticComparisonLE( a, b ); + return Expressions::ComparisonLE( a, b ); } template< typename L1, @@ -1174,7 +933,7 @@ bool operator <= ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, const typename Expressions::UnaryExpressionTemplate< R1, ROperation >& b ) { - return Expressions::StaticComparisonLE( a, b ); + return Expressions::ComparisonLE( a, b ); } //// @@ -1190,7 +949,7 @@ bool operator > ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, const Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) { - return Expressions::StaticComparisonGT( a, b ); + return Expressions::ComparisonGT( a, b ); } template< typename T1, @@ -1201,7 +960,7 @@ bool operator > ( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, const typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) { - return Expressions::StaticComparisonGT( a, b ); + return Expressions::ComparisonGT( a, b ); } template< typename L1, @@ -1214,7 +973,7 @@ bool operator > ( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a, const typename Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) { - return Expressions::StaticComparisonGT( a, b ); + return Expressions::ComparisonGT( a, b ); } template< typename L1, @@ -1227,7 +986,7 @@ bool operator > ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, const typename Expressions::UnaryExpressionTemplate< R1, ROperation >& b ) { - return Expressions::StaticComparisonGT( a, b ); + return Expressions::ComparisonGT( a, b ); } //// @@ -1243,7 +1002,7 @@ bool operator >= ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, const Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) { - return Expressions::StaticComparisonGE( a, b ); + return Expressions::ComparisonGE( a, b ); } template< typename T1, @@ -1254,7 +1013,7 @@ bool operator >= ( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, const typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) { - return Expressions::StaticComparisonGE( a, b ); + return Expressions::ComparisonGE( a, b ); } template< typename L1, @@ -1267,7 +1026,7 @@ bool operator >= ( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a, const typename Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) { - return Expressions::StaticComparisonGE( a, b ); + return Expressions::ComparisonGE( a, b ); } template< typename L1, @@ -1280,7 +1039,7 @@ bool operator >= ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, const typename Expressions::UnaryExpressionTemplate< R1, ROperation >& b ) { - return Expressions::StaticComparisonGE( a, b ); + return Expressions::ComparisonGE( a, b ); } //// @@ -1873,6 +1632,25 @@ exp( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a ) Expressions::Exp >( a ); } +//// +// Vertical operations - min +template< typename L1, + typename L2, + template< typename, typename > class LOperation > +typename Expressions::BinaryExpressionTemplate< L1, L2, LOperation >::RealType +min( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) +{ +/* using ExpressionType = Expressions::BinaryExpressionTemplate< L1, L2, LOperation >; + using RealType = typename ExpressionType::RealType; + using IndexType = typename ExpressionType::IndexType; + + auto fetch = [=] __cuda_callable__ ( IndexType i ) { return a[ i ]; }; + auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a = TNL::min( a, b ); }; + auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a = TNL::min( a, b ); }; + return Reduction< DeviceType >::reduce( v1.getSize(), reduction, volatileReduction, fetch, std::numeric_limits< ResultType >::max() );*/ +} + + //// // Output stream template< typename T1, diff --git a/src/TNL/Containers/Expressions/IsStatic.h b/src/TNL/Containers/Expressions/IsStatic.h new file mode 100644 index 000000000..b78e42d64 --- /dev/null +++ b/src/TNL/Containers/Expressions/IsStatic.h @@ -0,0 +1,33 @@ +/*************************************************************************** + IsStatic.h - description + ------------------- + begin : May 1, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +namespace TNL { + namespace Containers { + namespace Expressions { + + +template< typename T > +struct IsStaticType +{ + static constexpr bool value = false; +}; + +template< int Size, + typename Real > +struct IsStaticType< StaticVector< Size, Real > > +{ + static constexpr bool value = true; +}; + + } //namespace Expressions + } //namespace Containers +} //namespace TNL \ No newline at end of file diff --git a/src/TNL/Containers/Expressions/StaticExpressionTemplates.h b/src/TNL/Containers/Expressions/StaticExpressionTemplates.h new file mode 100644 index 000000000..2d936dc9a --- /dev/null +++ b/src/TNL/Containers/Expressions/StaticExpressionTemplates.h @@ -0,0 +1,1697 @@ +/*************************************************************************** + StaticExpressionTemplates.h - description + ------------------- + begin : Apr 18, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include +#include +#include +#include +#include + +namespace TNL { + namespace Containers { + namespace Expressions { + + +template< typename T1, + template< typename > class Operation, + typename Parameter = void, + ExpressionVariableType T1Type = ExpressionVariableTypeGetter< T1 >::value > +struct StaticUnaryExpressionTemplate +{ +}; + +template< typename T1, + typename T2, + template< typename, typename > class Operation, + ExpressionVariableType T1Type = ExpressionVariableTypeGetter< T1 >::value, + ExpressionVariableType T2Type = ExpressionVariableTypeGetter< T2 >::value > +struct StaticBinaryExpressionTemplate +{ +}; + + +template< typename T1, + template< typename > class Operation, + typename Parameter > +struct IsStaticType< StaticUnaryExpressionTemplate< T1, Operation, Parameter > > +{ + static constexpr bool value = StaticUnaryExpressionTemplate< T1, Operation, Parameter >::isStatic(); +}; + +template< typename T1, + typename T2, + template< typename, typename > class Operation > +struct IsStaticType< StaticBinaryExpressionTemplate< T1, T2, Operation > > +{ + static constexpr bool value = StaticBinaryExpressionTemplate< T1, T2, Operation >::isStatic(); +}; + + +//// +// Static binary expression template +template< typename T1, + typename T2, + template< typename, typename > class Operation > +struct StaticBinaryExpressionTemplate< T1, T2, Operation, VectorVariable, VectorVariable > +{ + static_assert( IsStaticType< T1 >::value, "Left-hand side operand of static expression is not static, i.e. based on static vector." ); + static_assert( IsStaticType< T2 >::value, "Right-hand side operand of static expression is not static, i.e. based on static vector." ); + using RealType = typename T1::RealType; + using IsExpressionTemplate = bool; + static_assert( IsStaticType< T1 >::value == IsStaticType< T2 >::value, "Attempt to mix static and non-static operands in binary expression templates" ); + static constexpr bool isStatic() { return true; } + + __cuda_callable__ + StaticBinaryExpressionTemplate( const T1& a, const T2& b ): op1( a ), op2( b ){} + + __cuda_callable__ + static StaticBinaryExpressionTemplate evaluate( const T1& a, const T2& b ) + { + return StaticBinaryExpressionTemplate( a, b ); + } + + RealType getElement( const int i ) const + { + return Operation< typename T1::RealType, typename T2::RealType >::evaluate( op1[ i ], op2[ i ] ); + } + + __cuda_callable__ + RealType operator[]( const int i ) const + { + return Operation< typename T1::RealType, typename T2::RealType >::evaluate( op1[ i ], op2[ i ] ); + } + + __cuda_callable__ + int getSize() const + { + return op1.getSize(); + } + + protected: + const T1 &op1; + const T2 &op2; +}; + +template< typename T1, + typename T2, + template< typename, typename > class Operation > +struct StaticBinaryExpressionTemplate< T1, T2, Operation, VectorVariable, ArithmeticVariable > +{ + static_assert( IsStaticType< T1 >::value, "Left-hand side operand of static expression is not static, i.e. based on static vector." ); + + using RealType = typename T1::RealType; + using IsExpressionTemplate = bool; + static constexpr bool isStatic() { return true; } + + __cuda_callable__ + StaticBinaryExpressionTemplate( const T1& a, const T2& b ): op1( a ), op2( b ){} + + __cuda_callable__ + StaticBinaryExpressionTemplate evaluate( const T1& a, const T2& b ) + { + return StaticBinaryExpressionTemplate( a, b ); + } + + RealType getElement( const int i ) const + { + return Operation< typename T1::RealType, T2 >::evaluate( op1[ i ], op2 ); + } + + __cuda_callable__ + RealType operator[]( const int i ) const + { + return Operation< typename T1::RealType, T2 >::evaluate( op1[ i ], op2 ); + } + + __cuda_callable__ + int getSize() const + { + return op1.getSize(); + } + + protected: + const T1 &op1; + const T2 &op2; + +}; + +template< typename T1, + typename T2, + template< typename, typename > class Operation > +struct StaticBinaryExpressionTemplate< T1, T2, Operation, ArithmeticVariable, VectorVariable > +{ + static_assert( IsStaticType< T2 >::value, "Right-hand side operand of static expression is not static, i.e. based on static vector." ); + + using RealType = typename T2::RealType; + using IsExpressionTemplate = bool; + static constexpr bool isStatic() { return true; } + + __cuda_callable__ + StaticBinaryExpressionTemplate( const T1& a, const T2& b ): op1( a ), op2( b ){} + + __cuda_callable__ + StaticBinaryExpressionTemplate evaluate( const T1& a, const T2& b ) + { + return StaticBinaryExpressionTemplate( a, b ); + } + + RealType getElement( const int i ) const + { + return Operation< T1, typename T2::RealType >::evaluate( op1, op2[ i ] ); + } + + __cuda_callable__ + RealType operator[]( const int i ) const + { + return Operation< T1, typename T2::RealType >::evaluate( op1, op2[ i ] ); + } + + __cuda_callable__ + int getSize() const + { + return op2.getSize(); + } + + protected: + const T1& op1; + const T2& op2; +}; + +//// +// Static unary expression template +// +// Parameter type serves mainly for pow( base, exp ). Here exp is parameter we need +// to pass to pow. +template< typename T1, + template< typename > class Operation, + typename Parameter > +struct StaticUnaryExpressionTemplate< T1, Operation, Parameter, VectorVariable > +{ + static_assert( IsStaticType< T1 >::value, "Operand of static expression is not static, i.e. based on static vector." ); + + using RealType = typename T1::RealType; + using IsExpressionTemplate = bool; + static constexpr bool isStatic() { return true; } + + __cuda_callable__ + StaticUnaryExpressionTemplate( const T1& a, const Parameter& p ) + : operand( a ), parameter( p ) {} + + __cuda_callable__ + static StaticUnaryExpressionTemplate evaluate( const T1& a ) + { + return StaticUnaryExpressionTemplate( a ); + } + + RealType getElement( const int i ) const + { + return Operation< typename T1::RealType >::evaluate( operand[ i ], parameter ); + } + + __cuda_callable__ + RealType operator[]( const int i ) const + { + return Operation< typename T1::RealType >::evaluate( operand[ i ], parameter ); + } + + __cuda_callable__ + int getSize() const + { + return operand.getSize(); + } + + void set( const Parameter& p ) { parameter = p; } + + const Parameter& get() { return parameter; } + + protected: + const T1& operand; + Parameter parameter; +}; + +//// +// Static unary expression template with no parameter +template< typename T1, + template< typename > class Operation > +struct StaticUnaryExpressionTemplate< T1, Operation, void, VectorVariable > +{ + using RealType = typename T1::RealType; + using IsExpressionTemplate = bool; + static constexpr bool isStatic() { return true; } + + __cuda_callable__ + StaticUnaryExpressionTemplate( const T1& a ): operand( a ){} + + __cuda_callable__ + static StaticUnaryExpressionTemplate evaluate( const T1& a ) + { + return StaticUnaryExpressionTemplate( a ); + } + + RealType getElement( const int i ) const + { + return Operation< typename T1::RealType >::evaluate( operand[ i ] ); + } + + __cuda_callable__ + RealType operator[]( const int i ) const + { + return Operation< typename T1::RealType >::evaluate( operand[ i ] ); + } + + __cuda_callable__ + int getSize() const + { + return operand.getSize(); + } + + protected: + const T1& operand; +}; + +//// +// Binary expressions addition +template< typename L1, + typename L2, + template< typename, typename > class LOperation, + typename R1, + typename R2, + template< typename, typename > class ROperation > +__cuda_callable__ +const Expressions::StaticBinaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, + Expressions::Addition > +operator + ( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, + const Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) +{ + return Expressions::StaticBinaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, + Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Addition >( a, b ); +} + +template< typename T1, + typename T2, + template< typename, typename > class Operation > +__cuda_callable__ +const Expressions::StaticBinaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >, + typename Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType, + Expressions::Addition > +operator + ( const Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >& a, + const typename Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) +{ + return Expressions::StaticBinaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >, + typename Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType, + Expressions::Addition >( a, b ); +} + +template< typename L1, + template< typename > class LOperation, + typename R1, + typename R2, + template< typename, typename > class ROperation > +__cuda_callable__ +const Expressions::StaticBinaryExpressionTemplate< + Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + typename Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, + Expressions::Addition > +operator + ( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a, + const typename Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) +{ + return Expressions::StaticBinaryExpressionTemplate< + Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + typename Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, + Expressions::Addition >( a, b ); +} + +template< typename L1, + typename L2, + template< typename, typename > class LOperation, + typename R1, + template< typename > class ROperation > +__cuda_callable__ +const Expressions::StaticBinaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + typename Expressions::StaticUnaryExpressionTemplate< R1, ROperation >, + Expressions::Addition > +operator + ( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, + const typename Expressions::StaticUnaryExpressionTemplate< R1,ROperation >& b ) +{ + return Expressions::StaticBinaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + typename Expressions::StaticUnaryExpressionTemplate< R1, ROperation >, + Expressions::Addition >( a, b ); +} + +//// +// Binary expression subtraction +template< typename L1, + typename L2, + template< typename, typename > class LOperation, + typename R1, + typename R2, + template< typename, typename > class ROperation > +__cuda_callable__ +const Expressions::StaticBinaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, + Expressions::Subtraction > +operator - ( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, + const Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) +{ + return Expressions::StaticBinaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, + Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Subtraction >( a, b ); +} + +template< typename T1, + typename T2, + template< typename, typename > class Operation > +__cuda_callable__ +const Expressions::StaticBinaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >, + typename Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType, + Expressions::Subtraction > +operator - ( const Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >& a, + const typename Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) +{ + return Expressions::StaticBinaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >, + typename Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType, + Expressions::Subtraction >( a, b ); +} + +template< typename L1, + template< typename > class LOperation, + typename R1, + typename R2, + template< typename, typename > class ROperation > +__cuda_callable__ +const Expressions::StaticBinaryExpressionTemplate< + Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + typename Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, + Expressions::Subtraction > +operator - ( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a, + const typename Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) +{ + return Expressions::StaticBinaryExpressionTemplate< + Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + typename Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, + Expressions::Subtraction >( a, b ); +} + +template< typename L1, + typename L2, + template< typename, typename > class LOperation, + typename R1, + template< typename > class ROperation > +__cuda_callable__ +const Expressions::StaticBinaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + typename Expressions::StaticUnaryExpressionTemplate< R1, ROperation >, + Expressions::Subtraction > +operator - ( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, + const typename Expressions::StaticUnaryExpressionTemplate< R1,ROperation >& b ) +{ + return Expressions::StaticBinaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + typename Expressions::StaticUnaryExpressionTemplate< R1, ROperation >, + Expressions::Subtraction >( a, b ); +} + +//// +// Binary expression multiplication +template< typename L1, + typename L2, + template< typename, typename > class LOperation, + typename R1, + typename R2, + template< typename, typename > class ROperation > +__cuda_callable__ +const Expressions::StaticBinaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, + Expressions::Multiplication > +operator * ( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, + const Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) +{ + return Expressions::StaticBinaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, + Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Multiplication >( a, b ); +} + +template< typename T1, + typename T2, + template< typename, typename > class Operation > +__cuda_callable__ +const Expressions::StaticBinaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >, + typename Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType, + Expressions::Multiplication > +operator * ( const Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >& a, + const typename Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) +{ + return Expressions::StaticBinaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >, + typename Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType, + Expressions::Multiplication >( a, b ); +} + +template< typename L1, + template< typename > class LOperation, + typename R1, + typename R2, + template< typename, typename > class ROperation > +__cuda_callable__ +const Expressions::StaticBinaryExpressionTemplate< + Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + typename Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, + Expressions::Multiplication > +operator * ( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a, + const typename Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) +{ + return Expressions::StaticBinaryExpressionTemplate< + Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + typename Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, + Expressions::Multiplication >( a, b ); +} + +template< typename L1, + typename L2, + template< typename, typename > class LOperation, + typename R1, + template< typename > class ROperation > +__cuda_callable__ +const Expressions::StaticBinaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + typename Expressions::StaticUnaryExpressionTemplate< R1, ROperation >, + Expressions::Multiplication > +operator * ( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, + const typename Expressions::StaticUnaryExpressionTemplate< R1, ROperation >& b ) +{ + return Expressions::StaticBinaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + typename Expressions::StaticUnaryExpressionTemplate< R1, ROperation >, + Expressions::Multiplication >( a, b ); +} + +//// +// Binary expression division +template< typename L1, + typename L2, + template< typename, typename > class LOperation, + typename R1, + typename R2, + template< typename, typename > class ROperation > +__cuda_callable__ +const Expressions::StaticBinaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, + Expressions::Division > +operator / ( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, + const Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) +{ + return Expressions::StaticBinaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, + Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Division >( a, b ); +} + +template< typename T1, + typename T2, + template< typename, typename > class Operation > +__cuda_callable__ +const Expressions::StaticBinaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >, + typename Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType, + Expressions::Division > +operator / ( const Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >& a, + const typename Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) +{ + return Expressions::StaticBinaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >, + typename Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType, + Expressions::Division >( a, b ); +} + +template< typename L1, + template< typename > class LOperation, + typename R1, + typename R2, + template< typename, typename > class ROperation > +__cuda_callable__ +const Expressions::StaticBinaryExpressionTemplate< + Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + typename Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, + Expressions::Division > +operator / ( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a, + const typename Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) +{ + return Expressions::StaticBinaryExpressionTemplate< + Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + typename Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, + Expressions::Division >( a, b ); +} + +template< typename L1, + typename L2, + template< typename, typename > class LOperation, + typename R1, + template< typename > class ROperation > +__cuda_callable__ +const Expressions::StaticBinaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + typename Expressions::StaticUnaryExpressionTemplate< R1, ROperation >, + Expressions::Division > +operator / ( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, + const typename Expressions::StaticUnaryExpressionTemplate< R1,ROperation >& b ) +{ + return Expressions::StaticBinaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + typename Expressions::StaticUnaryExpressionTemplate< R1, ROperation >, + Expressions::Division >( a, b ); +} + +//// +// Binary expression min +template< typename L1, + typename L2, + template< typename, typename > class LOperation, + typename R1, + typename R2, + template< typename, typename > class ROperation > +__cuda_callable__ +const Expressions::StaticBinaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, + Expressions::Min > +min ( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, + const Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) +{ + return Expressions::StaticBinaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, + Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Min >( a, b ); +} + +template< typename T1, + typename T2, + template< typename, typename > class Operation > +__cuda_callable__ +const Expressions::StaticBinaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >, + typename Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType, + Expressions::Min > +min( const Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >& a, + const typename Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) +{ + return Expressions::StaticBinaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >, + typename Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType, + Expressions::Min >( a, b ); +} + +template< typename L1, + template< typename > class LOperation, + typename R1, + typename R2, + template< typename, typename > class ROperation > +__cuda_callable__ +const Expressions::StaticBinaryExpressionTemplate< + Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + typename Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, + Expressions::Min > +min( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a, + const typename Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) +{ + return Expressions::StaticBinaryExpressionTemplate< + Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + typename Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, + Expressions::Min >( a, b ); +} + +template< typename L1, + typename L2, + template< typename, typename > class LOperation, + typename R1, + template< typename > class ROperation > +__cuda_callable__ +const Expressions::StaticBinaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + typename Expressions::StaticUnaryExpressionTemplate< R1, ROperation >, + Expressions::Min > +min( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, + const typename Expressions::StaticUnaryExpressionTemplate< R1,ROperation >& b ) +{ + return Expressions::StaticBinaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + typename Expressions::StaticUnaryExpressionTemplate< R1, ROperation >, + Expressions::Min >( a, b ); +} + +//// +// Binary expression max +template< typename L1, + typename L2, + template< typename, typename > class LOperation, + typename R1, + typename R2, + template< typename, typename > class ROperation > +__cuda_callable__ +const Expressions::StaticBinaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, + Expressions::Max > +max( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, + const Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) +{ + return Expressions::StaticBinaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, + Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Max >( a, b ); +} + +template< typename T1, + typename T2, + template< typename, typename > class Operation > +__cuda_callable__ +const Expressions::StaticBinaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >, + typename Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType, + Expressions::Max > +max( const Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >& a, + const typename Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) +{ + return Expressions::StaticBinaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >, + typename Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType, + Expressions::Max >( a, b ); +} + +template< typename L1, + template< typename > class LOperation, + typename R1, + typename R2, + template< typename, typename > class ROperation > +__cuda_callable__ +const Expressions::StaticBinaryExpressionTemplate< + Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + typename Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, + Expressions::Max > +max( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a, + const typename Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) +{ + return Expressions::StaticBinaryExpressionTemplate< + Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + typename Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, + Expressions::Max >( a, b ); +} + +template< typename L1, + typename L2, + template< typename, typename > class LOperation, + typename R1, + template< typename > class ROperation > +__cuda_callable__ +const Expressions::StaticBinaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + typename Expressions::StaticUnaryExpressionTemplate< R1, ROperation >, + Expressions::Max > +max( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, + const typename Expressions::StaticUnaryExpressionTemplate< R1,ROperation >& b ) +{ + return Expressions::StaticBinaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + typename Expressions::StaticUnaryExpressionTemplate< R1, ROperation >, + Expressions::Max >( a, b ); +} + +//// +// Comparison operator == +template< typename L1, + typename L2, + template< typename, typename > class LOperation, + typename R1, + typename R2, + template< typename, typename > class ROperation > +__cuda_callable__ +bool +operator == ( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, + const Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) +{ + return Expressions::StaticComparisonEQ( a, b ); +} + +template< typename T1, + typename T2, + template< typename, typename > class Operation > +__cuda_callable__ +bool +operator == ( const Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >& a, + const typename Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) +{ + return Expressions::StaticComparisonEQ( a, b ); +} + +template< typename L1, + template< typename > class LOperation, + typename R1, + typename R2, + template< typename, typename > class ROperation > +__cuda_callable__ +bool +operator == ( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a, + const typename Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) +{ + return Expressions::StaticComparisonEQ( a, b ); +} + +template< typename L1, + typename L2, + template< typename, typename > class LOperation, + typename R1, + template< typename > class ROperation > +__cuda_callable__ +bool +operator == ( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, + const typename Expressions::StaticUnaryExpressionTemplate< R1,ROperation >& b ) +{ + return Expressions::StaticComparisonEQ( a, b ); +} + +//// +// Comparison operator != +template< typename L1, + typename L2, + template< typename, typename > class LOperation, + typename R1, + typename R2, + template< typename, typename > class ROperation > +__cuda_callable__ +bool +operator != ( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, + const Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) +{ + return Expressions::StaticComparisonNE( a, b ); +} + +template< typename T1, + typename T2, + template< typename, typename > class Operation > +__cuda_callable__ +bool +operator != ( const Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >& a, + const typename Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) +{ + return Expressions::StaticComparisonNE( a, b ); +} + +template< typename L1, + template< typename > class LOperation, + typename R1, + typename R2, + template< typename, typename > class ROperation > +__cuda_callable__ +bool +operator != ( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a, + const typename Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) +{ + return Expressions::StaticComparisonNE( a, b ); +} + +template< typename L1, + typename L2, + template< typename, typename > class LOperation, + typename R1, + template< typename > class ROperation > +__cuda_callable__ +bool +operator != ( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, + const typename Expressions::StaticUnaryExpressionTemplate< R1, ROperation >& b ) +{ + return Expressions::StaticComparisonNE( a, b ); +} + +//// +// Comparison operator < +template< typename L1, + typename L2, + template< typename, typename > class LOperation, + typename R1, + typename R2, + template< typename, typename > class ROperation > +__cuda_callable__ +bool +operator < ( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, + const Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) +{ + return Expressions::StaticComparisonLT( a, b ); +} + +template< typename T1, + typename T2, + template< typename, typename > class Operation > +__cuda_callable__ +bool +operator < ( const Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >& a, + const typename Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) +{ + return Expressions::StaticComparisonLT( a, b ); +} + +template< typename L1, + template< typename > class LOperation, + typename R1, + typename R2, + template< typename, typename > class ROperation > +__cuda_callable__ +bool +operator < ( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a, + const typename Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) +{ + return Expressions::StaticComparisonLT( a, b ); +} + +template< typename L1, + typename L2, + template< typename, typename > class LOperation, + typename R1, + template< typename > class ROperation > +__cuda_callable__ +bool +operator < ( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, + const typename Expressions::StaticUnaryExpressionTemplate< R1, ROperation >& b ) +{ + return Expressions::StaticComparisonLT( a, b ); +} + +//// +// Comparison operator <= +template< typename L1, + typename L2, + template< typename, typename > class LOperation, + typename R1, + typename R2, + template< typename, typename > class ROperation > +__cuda_callable__ +bool +operator <= ( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, + const Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) +{ + return Expressions::StaticComparisonLE( a, b ); +} + +template< typename T1, + typename T2, + template< typename, typename > class Operation > +__cuda_callable__ +bool +operator <= ( const Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >& a, + const typename Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) +{ + return Expressions::StaticComparisonLE( a, b ); +} + +template< typename L1, + template< typename > class LOperation, + typename R1, + typename R2, + template< typename, typename > class ROperation > +__cuda_callable__ +bool +operator <= ( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a, + const typename Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) +{ + return Expressions::StaticComparisonLE( a, b ); +} + +template< typename L1, + typename L2, + template< typename, typename > class LOperation, + typename R1, + template< typename > class ROperation > +__cuda_callable__ +bool +operator <= ( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, + const typename Expressions::StaticUnaryExpressionTemplate< R1, ROperation >& b ) +{ + return Expressions::StaticComparisonLE( a, b ); +} + +//// +// Comparison operator > +template< typename L1, + typename L2, + template< typename, typename > class LOperation, + typename R1, + typename R2, + template< typename, typename > class ROperation > +__cuda_callable__ +bool +operator > ( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, + const Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) +{ + return Expressions::StaticComparisonGT( a, b ); +} + +template< typename T1, + typename T2, + template< typename, typename > class Operation > +__cuda_callable__ +bool +operator > ( const Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >& a, + const typename Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) +{ + return Expressions::StaticComparisonGT( a, b ); +} + +template< typename L1, + template< typename > class LOperation, + typename R1, + typename R2, + template< typename, typename > class ROperation > +__cuda_callable__ +bool +operator > ( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a, + const typename Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) +{ + return Expressions::StaticComparisonGT( a, b ); +} + +template< typename L1, + typename L2, + template< typename, typename > class LOperation, + typename R1, + template< typename > class ROperation > +__cuda_callable__ +bool +operator > ( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, + const typename Expressions::StaticUnaryExpressionTemplate< R1, ROperation >& b ) +{ + return Expressions::StaticComparisonGT( a, b ); +} + +//// +// Comparison operator >= +template< typename L1, + typename L2, + template< typename, typename > class LOperation, + typename R1, + typename R2, + template< typename, typename > class ROperation > +__cuda_callable__ +bool +operator >= ( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, + const Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) +{ + return Expressions::StaticComparisonGE( a, b ); +} + +template< typename T1, + typename T2, + template< typename, typename > class Operation > +__cuda_callable__ +bool +operator >= ( const Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >& a, + const typename Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) +{ + return Expressions::StaticComparisonGE( a, b ); +} + +template< typename L1, + template< typename > class LOperation, + typename R1, + typename R2, + template< typename, typename > class ROperation > +__cuda_callable__ +bool +operator >= ( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a, + const typename Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) +{ + return Expressions::StaticComparisonGE( a, b ); +} + +template< typename L1, + typename L2, + template< typename, typename > class LOperation, + typename R1, + template< typename > class ROperation > +__cuda_callable__ +bool +operator >= ( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, + const typename Expressions::StaticUnaryExpressionTemplate< R1, ROperation >& b ) +{ + return Expressions::StaticComparisonGE( a, b ); +} + +//// +// Unary operations + + +//// +// Minus +template< typename L1, + typename L2, + template< typename, typename > class LOperation > +__cuda_callable__ +const Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Minus > +operator -( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) +{ + return Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Minus >( a ); +} + +template< typename L1, + template< typename > class LOperation > +__cuda_callable__ +const Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Expressions::Abs > +operator -( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a ) +{ + return Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Expressions::Minus >( a ); +} + +//// +// Abs +template< typename L1, + typename L2, + template< typename, typename > class LOperation > +__cuda_callable__ +const Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Abs > +abs( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) +{ + return Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Abs >( a ); +} + +template< typename L1, + template< typename > class LOperation > +__cuda_callable__ +const Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Expressions::Abs > +abs( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a ) +{ + return Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Expressions::Abs >( a ); +} + +//// +// Sin +template< typename L1, + typename L2, + template< typename, typename > class LOperation > +__cuda_callable__ +const Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Sin > +sin( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) +{ + return Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Sin >( a ); +} + +template< typename L1, + template< typename > class LOperation > +__cuda_callable__ +const Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Expressions::Sin > +sin( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a ) +{ + return Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Expressions::Sin >( a ); +} + +//// +// Cos +template< typename L1, + typename L2, + template< typename, typename > class LOperation > +__cuda_callable__ +const Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Cos > +cos( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) +{ + return Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Cos >( a ); +} + +template< typename L1, + template< typename > class LOperation > +__cuda_callable__ +const Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Expressions::Cos > +cos( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a ) +{ + return Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Expressions::Cos >( a ); +} + +//// +// Tan +template< typename L1, + typename L2, + template< typename, typename > class LOperation > +__cuda_callable__ +const Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Tan > +tan( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) +{ + return Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Tan >( a ); +} + +template< typename L1, + template< typename > class LOperation > +__cuda_callable__ +const Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Expressions::Tan > +tan( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a ) +{ + return Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Expressions::Tan >( a ); +} + +//// +// Sqrt +template< typename L1, + typename L2, + template< typename, typename > class LOperation > +__cuda_callable__ +const Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Sqrt > +sqrt( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) +{ + return Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Sqrt >( a ); +} + +template< typename L1, + template< typename > class LOperation > +__cuda_callable__ +const Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Expressions::Sqrt > +sqrt( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a ) +{ + return Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Expressions::Sqrt >( a ); +} + +//// +// Cbrt +template< typename L1, + typename L2, + template< typename, typename > class LOperation > +__cuda_callable__ +const Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Cbrt > +cbrt( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) +{ + return Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Cbrt >( a ); +} + +template< typename L1, + template< typename > class LOperation > +__cuda_callable__ +const Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Expressions::Cbrt > +cbrt( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a ) +{ + return Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Expressions::Cbrt >( a ); +} + +//// +// Pow +template< typename L1, + typename L2, + template< typename, typename > class LOperation, + typename Real > +__cuda_callable__ +const Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Pow > +pow( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, const Real& exp ) +{ + auto e = Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Pow >( a ); + e.parameter.set( exp ); + return e; +} + +template< typename L1, + template< typename > class LOperation, + typename Real > +__cuda_callable__ +const Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Expressions::Pow > +pow( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a, const Real& exp ) +{ + auto e = Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Expressions::Pow >( a ); + e.parameter.set( exp ); + return e; +} + +//// +// Floor +template< typename L1, + typename L2, + template< typename, typename > class LOperation > +__cuda_callable__ +const Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Sin > +floor( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) +{ + return Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Floor >( a ); +} + +template< typename L1, + template< typename > class LOperation > +__cuda_callable__ +const Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Expressions::Floor > +floor( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a ) +{ + return Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Expressions::Floor >( a ); +} + +//// +// Ceil +template< typename L1, + typename L2, + template< typename, typename > class LOperation > +__cuda_callable__ +const Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Ceil > +ceil( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) +{ + return Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Ceil >( a ); +} + +template< typename L1, + template< typename > class LOperation > +__cuda_callable__ +const Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Expressions::Ceil > +sin( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a ) +{ + return Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Expressions::Ceil >( a ); +} + +//// +// Asin +template< typename L1, + typename L2, + template< typename, typename > class LOperation > +__cuda_callable__ +const Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Asin > +asin( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) +{ + return Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Asin >( a ); +} + +template< typename L1, + template< typename > class LOperation > +__cuda_callable__ +const Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Expressions::Asin > +asin( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a ) +{ + return Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Expressions::Asin >( a ); +} + +//// +// Acos +template< typename L1, + typename L2, + template< typename, typename > class LOperation > +__cuda_callable__ +const Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Acos > +cos( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) +{ + return Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Acos >( a ); +} + +template< typename L1, + template< typename > class LOperation > +__cuda_callable__ +const Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Expressions::Acos > +acos( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a ) +{ + return Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Expressions::Cos >( a ); +} + +//// +// Atan +template< typename L1, + typename L2, + template< typename, typename > class LOperation > +__cuda_callable__ +const Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Atan > +tan( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) +{ + return Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Atan >( a ); +} + +template< typename L1, + template< typename > class LOperation > +__cuda_callable__ +const Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Expressions::Atan > +atan( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a ) +{ + return Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Expressions::Atan >( a ); +} + +//// +// Sinh +template< typename L1, + typename L2, + template< typename, typename > class LOperation > +__cuda_callable__ +const Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Sinh > +sinh( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) +{ + return Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Sinh >( a ); +} + +template< typename L1, + template< typename > class LOperation > +__cuda_callable__ +const Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Expressions::Sinh > +sinh( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a ) +{ + return Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Expressions::Sinh >( a ); +} + +//// +// Cosh +template< typename L1, + typename L2, + template< typename, typename > class LOperation > +__cuda_callable__ +const Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Cosh > +cosh( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) +{ + return Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Cosh >( a ); +} + +template< typename L1, + template< typename > class LOperation > +__cuda_callable__ +const Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Expressions::Cosh > +cosh( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a ) +{ + return Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Expressions::Cosh >( a ); +} + +//// +// Tanh +template< typename L1, + typename L2, + template< typename, typename > class LOperation > +__cuda_callable__ +const Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Tanh > +cosh( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) +{ + return Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Tanh >( a ); +} + +template< typename L1, + template< typename > class LOperation > +__cuda_callable__ +const Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Expressions::Tanh > +tanh( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a ) +{ + return Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Expressions::Tanh >( a ); +} + +//// +// Log +template< typename L1, + typename L2, + template< typename, typename > class LOperation > +__cuda_callable__ +const Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Log > +log( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) +{ + return Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Log >( a ); +} + +template< typename L1, + template< typename > class LOperation > +__cuda_callable__ +const Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Expressions::Log > +log( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a ) +{ + return Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Expressions::Log >( a ); +} + +//// +// Log10 +template< typename L1, + typename L2, + template< typename, typename > class LOperation > +__cuda_callable__ +const Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Log10 > +log10( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) +{ + return Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Log10 >( a ); +} + +template< typename L1, + template< typename > class LOperation > +__cuda_callable__ +const Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Expressions::Log10 > +log10( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a ) +{ + return Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Expressions::Log10 >( a ); +} + +//// +// Log2 +template< typename L1, + typename L2, + template< typename, typename > class LOperation > +__cuda_callable__ +const Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Log2 > +log2( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) +{ + return Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Log2 >( a ); +} + +template< typename L1, + template< typename > class LOperation > +__cuda_callable__ +const Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Expressions::Log2 > +log2( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a ) +{ + return Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Expressions::Log2 >( a ); +} + +//// +// Exp +template< typename L1, + typename L2, + template< typename, typename > class LOperation > +__cuda_callable__ +const Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Exp > +exp( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) +{ + return Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Expressions::Exp >( a ); +} + +template< typename L1, + template< typename > class LOperation > +__cuda_callable__ +const Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Expressions::Exp > +exp( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a ) +{ + return Expressions::StaticUnaryExpressionTemplate< + Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Expressions::Exp >( a ); +} + +//// +// Vertical operations - min +template< typename L1, + typename L2, + template< typename, typename > class LOperation > +typename Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >::RealType +min( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) +{ +/* using ExpressionType = Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >; + using RealType = typename ExpressionType::RealType; + using IndexType = typename ExpressionType::IndexType; + + auto fetch = [=] __cuda_callable__ ( IndexType i ) { return a[ i ]; }; + auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a = TNL::min( a, b ); }; + auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a = TNL::min( a, b ); }; + return Reduction< DeviceType >::reduce( v1.getSize(), reduction, volatileReduction, fetch, std::numeric_limits< ResultType >::max() );*/ +} + + +//// +// Output stream +template< typename T1, + typename T2, + template< typename, typename > class Operation > +std::ostream& operator << ( std::ostream& str, const StaticBinaryExpressionTemplate< T1, T2, Operation >& expression ) +{ + str << "[ "; + for( int i = 0; i < expression.getSize() - 1; i++ ) + str << expression[ i ] << ", "; + str << expression[ expression.getSize() - 1 ] << " ]"; + return str; +} + +template< typename T, + template< typename > class Operation, + typename Parameter > +std::ostream& operator << ( std::ostream& str, const StaticUnaryExpressionTemplate< T, Operation, Parameter >& expression ) +{ + str << "[ "; + for( int i = 0; i < expression.getSize() - 1; i++ ) + str << expression[ i ] << ", "; + str << expression[ expression.getSize() - 1 ] << " ]"; + return str; +} + } //namespace Expressions + } //namespace Containers +} // namespace TNL diff --git a/src/TNL/Containers/StaticVector.h b/src/TNL/Containers/StaticVector.h index 00c3b8022..9f785de45 100644 --- a/src/TNL/Containers/StaticVector.h +++ b/src/TNL/Containers/StaticVector.h @@ -12,7 +12,7 @@ #include #include -#include +#include namespace TNL { namespace Containers { @@ -82,12 +82,12 @@ class StaticVector : public StaticArray< Size, Real > typename T2, template< typename, typename > class Operation > __cuda_callable__ - StaticVector( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& op ); + StaticVector( const Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >& op ); template< typename T, template< typename > class Operation > __cuda_callable__ - StaticVector( const Expressions::UnaryExpressionTemplate< T, Operation >& op ); + StaticVector( const Expressions::StaticUnaryExpressionTemplate< T, Operation >& op ); /** * \brief Sets up a new (vector) parameter which means it can have more elements. @@ -267,12 +267,12 @@ class StaticVector< 1, Real > : public StaticArray< 1, Real > typename T2, template< typename, typename > class Operation > __cuda_callable__ - StaticVector( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& op ); + StaticVector( const Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >& op ); template< typename T, template< typename > class Operation > __cuda_callable__ - StaticVector( const Expressions::UnaryExpressionTemplate< T, Operation >& op ); + StaticVector( const Expressions::StaticUnaryExpressionTemplate< T, Operation >& op ); bool setup( const Config::ParameterContainer& parameters, const String& prefix = "" ); @@ -397,12 +397,12 @@ class StaticVector< 2, Real > : public StaticArray< 2, Real > typename T2, template< typename, typename > class Operation > __cuda_callable__ - StaticVector( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& op ); + StaticVector( const Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >& op ); template< typename T, template< typename > class Operation > __cuda_callable__ - StaticVector( const Expressions::UnaryExpressionTemplate< T, Operation >& op ); + StaticVector( const Expressions::StaticUnaryExpressionTemplate< T, Operation >& op ); bool setup( const Config::ParameterContainer& parameters, const String& prefix = "" ); @@ -528,12 +528,12 @@ class StaticVector< 3, Real > : public StaticArray< 3, Real > typename T2, template< typename, typename > class Operation > __cuda_callable__ - StaticVector( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& op ); + StaticVector( const Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >& op ); template< typename T, template< typename > class Operation > __cuda_callable__ - StaticVector( const Expressions::UnaryExpressionTemplate< T, Operation >& op ); + StaticVector( const Expressions::StaticUnaryExpressionTemplate< T, Operation >& op ); bool setup( const Config::ParameterContainer& parameters, const String& prefix = "" ); diff --git a/src/TNL/Containers/StaticVector1D_impl.h b/src/TNL/Containers/StaticVector1D_impl.h index e6e7ae4e7..6ad34ac6f 100644 --- a/src/TNL/Containers/StaticVector1D_impl.h +++ b/src/TNL/Containers/StaticVector1D_impl.h @@ -56,20 +56,18 @@ template< typename Real > typename T2, template< typename, typename > class Operation > __cuda_callable__ -StaticVector< 1, Real >::StaticVector( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& op ) +StaticVector< 1, Real >::StaticVector( const Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >& op ) { - static_assert( Expressions::BinaryExpressionTemplate< T1, T2, Operation >::isStatic(), "Attempt to assign non-static expression to static vector." ); - Algorithms::VectorAssignment< StaticVector< 1, Real >, Expressions::BinaryExpressionTemplate< T1, T2, Operation > >::assignStatic( *this, op ); + Algorithms::VectorAssignment< StaticVector< 1, Real >, Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation > >::assignStatic( *this, op ); }; template< typename Real > template< typename T, template< typename > class Operation > __cuda_callable__ -StaticVector< 1, Real >::StaticVector( const Expressions::UnaryExpressionTemplate< T, Operation >& op ) +StaticVector< 1, Real >::StaticVector( const Expressions::StaticUnaryExpressionTemplate< T, Operation >& op ) { - static_assert( Expressions::UnaryExpressionTemplate< T, Operation >::isStatic(), "Attempt to assign non-static expression to static vector." ); - Algorithms::VectorAssignment< StaticVector< 1, Real >, Expressions::UnaryExpressionTemplate< T, Operation > >::assignStatic( *this, op ); + Algorithms::VectorAssignment< StaticVector< 1, Real >, Expressions::StaticUnaryExpressionTemplate< T, Operation > >::assignStatic( *this, op ); }; template< typename Real > diff --git a/src/TNL/Containers/StaticVector2D_impl.h b/src/TNL/Containers/StaticVector2D_impl.h index 3b0f28951..2ee5aa831 100644 --- a/src/TNL/Containers/StaticVector2D_impl.h +++ b/src/TNL/Containers/StaticVector2D_impl.h @@ -62,20 +62,18 @@ template< typename Real > typename T2, template< typename, typename > class Operation > __cuda_callable__ -StaticVector< 2, Real >::StaticVector( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& op ) +StaticVector< 2, Real >::StaticVector( const Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >& op ) { - static_assert( Expressions::BinaryExpressionTemplate< T1, T2, Operation >::isStatic(), "Attempt to assign non-static expression to static vector." ); - Algorithms::VectorAssignment< StaticVector< 2, Real >, Expressions::BinaryExpressionTemplate< T1, T2, Operation > >::assignStatic( *this, op ); + Algorithms::VectorAssignment< StaticVector< 2, Real >, Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation > >::assignStatic( *this, op ); }; template< typename Real > template< typename T, template< typename > class Operation > __cuda_callable__ -StaticVector< 2, Real >::StaticVector( const Expressions::UnaryExpressionTemplate< T, Operation >& op ) +StaticVector< 2, Real >::StaticVector( const Expressions::StaticUnaryExpressionTemplate< T, Operation >& op ) { - static_assert( Expressions::UnaryExpressionTemplate< T, Operation >::isStatic(), "Attempt to assign non-static expression to static vector." ); - Algorithms::VectorAssignment< StaticVector< 2, Real >, Expressions::UnaryExpressionTemplate< T, Operation > >::assignStatic( *this, op ); + Algorithms::VectorAssignment< StaticVector< 2, Real >, Expressions::StaticUnaryExpressionTemplate< T, Operation > >::assignStatic( *this, op ); }; template< typename Real > diff --git a/src/TNL/Containers/StaticVector3D_impl.h b/src/TNL/Containers/StaticVector3D_impl.h index 8475fb035..2847c407c 100644 --- a/src/TNL/Containers/StaticVector3D_impl.h +++ b/src/TNL/Containers/StaticVector3D_impl.h @@ -62,20 +62,18 @@ template< typename Real > typename T2, template< typename, typename > class Operation > __cuda_callable__ -StaticVector< 3, Real >::StaticVector( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& op ) +StaticVector< 3, Real >::StaticVector( const Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >& op ) { - static_assert( Expressions::BinaryExpressionTemplate< T1, T2, Operation >::isStatic(), "Attempt to assign non-static expression to static vector." ); - Algorithms::VectorAssignment< StaticVector< 3, Real >, Expressions::BinaryExpressionTemplate< T1, T2, Operation > >::assignStatic( *this, op ); + Algorithms::VectorAssignment< StaticVector< 3, Real >, Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation > >::assignStatic( *this, op ); }; template< typename Real > template< typename T, template< typename > class Operation > __cuda_callable__ -StaticVector< 3, Real >::StaticVector( const Expressions::UnaryExpressionTemplate< T, Operation >& op ) +StaticVector< 3, Real >::StaticVector( const Expressions::StaticUnaryExpressionTemplate< T, Operation >& op ) { - static_assert( Expressions::UnaryExpressionTemplate< T, Operation >::isStatic(), "Attempt to assign non-static expression to static vector." ); - Algorithms::VectorAssignment< StaticVector< 3, Real >, Expressions::UnaryExpressionTemplate< T, Operation > >::assignStatic( *this, op ); + Algorithms::VectorAssignment< StaticVector< 3, Real >, Expressions::StaticUnaryExpressionTemplate< T, Operation > >::assignStatic( *this, op ); }; diff --git a/src/TNL/Containers/StaticVectorExpressions.h b/src/TNL/Containers/StaticVectorExpressions.h index 95d52d024..cf4ad298a 100644 --- a/src/TNL/Containers/StaticVectorExpressions.h +++ b/src/TNL/Containers/StaticVectorExpressions.h @@ -10,7 +10,7 @@ #pragma once -#include +#include #include #include @@ -21,156 +21,156 @@ namespace TNL { // Addition template< int Size, typename Real, typename ET > __cuda_callable__ -const Expressions::BinaryExpressionTemplate< StaticVector< Size, Real >, ET, Expressions::Addition > +const Expressions::StaticBinaryExpressionTemplate< StaticVector< Size, Real >, ET, Expressions::Addition > operator+( const StaticVector< Size, Real >& a, const ET& b ) { - return Expressions::BinaryExpressionTemplate< StaticVector< Size, Real >, ET, Expressions::Addition >( a, b ); + return Expressions::StaticBinaryExpressionTemplate< StaticVector< Size, Real >, ET, Expressions::Addition >( a, b ); } template< typename ET, int Size, typename Real > __cuda_callable__ -const Expressions::BinaryExpressionTemplate< ET, StaticVector< Size, Real >, Expressions::Addition > +const Expressions::StaticBinaryExpressionTemplate< ET, StaticVector< Size, Real >, Expressions::Addition > operator+( const ET& a, const StaticVector< Size, Real >& b ) { - return Expressions::BinaryExpressionTemplate< ET, StaticVector< Size, Real >, Expressions::Addition >( a, b ); + return Expressions::StaticBinaryExpressionTemplate< ET, StaticVector< Size, Real >, Expressions::Addition >( a, b ); } template< int Size, typename Real1, typename Real2 > __cuda_callable__ -const Expressions::BinaryExpressionTemplate< StaticVector< Size, Real1 >, StaticVector< Size, Real2 >, Expressions::Addition > +const Expressions::StaticBinaryExpressionTemplate< StaticVector< Size, Real1 >, StaticVector< Size, Real2 >, Expressions::Addition > operator+( const StaticVector< Size, Real1 >& a, const StaticVector< Size, Real2 >& b ) { - return Expressions::BinaryExpressionTemplate< StaticVector< Size, Real1 >, StaticVector< Size, Real2 >, Expressions::Addition >( a, b ); + return Expressions::StaticBinaryExpressionTemplate< StaticVector< Size, Real1 >, StaticVector< Size, Real2 >, Expressions::Addition >( a, b ); } //// // Subtraction template< int Size, typename Real, typename ET > __cuda_callable__ -const Expressions::BinaryExpressionTemplate< StaticVector< Size, Real >, ET, Expressions::Subtraction > +const Expressions::StaticBinaryExpressionTemplate< StaticVector< Size, Real >, ET, Expressions::Subtraction > operator-( const StaticVector< Size, Real >& a, const ET& b ) { - return Expressions::BinaryExpressionTemplate< StaticVector< Size, Real >, ET, Expressions::Subtraction >( a, b ); + return Expressions::StaticBinaryExpressionTemplate< StaticVector< Size, Real >, ET, Expressions::Subtraction >( a, b ); } template< typename ET, int Size, typename Real > __cuda_callable__ -const Expressions::BinaryExpressionTemplate< ET, StaticVector< Size, Real >, Expressions::Subtraction > +const Expressions::StaticBinaryExpressionTemplate< ET, StaticVector< Size, Real >, Expressions::Subtraction > operator-( const ET& a, const StaticVector< Size, Real >& b ) { - return Expressions::BinaryExpressionTemplate< ET, StaticVector< Size, Real >, Expressions::Subtraction >( a, b ); + return Expressions::StaticBinaryExpressionTemplate< ET, StaticVector< Size, Real >, Expressions::Subtraction >( a, b ); } template< int Size, typename Real1, typename Real2 > __cuda_callable__ -const Expressions::BinaryExpressionTemplate< StaticVector< Size, Real1 >, StaticVector< Size, Real2 >, Expressions::Subtraction > +const Expressions::StaticBinaryExpressionTemplate< StaticVector< Size, Real1 >, StaticVector< Size, Real2 >, Expressions::Subtraction > operator-( const StaticVector< Size, Real1 >& a, const StaticVector< Size, Real2 >& b ) { - return Expressions::BinaryExpressionTemplate< StaticVector< Size, Real1 >, StaticVector< Size, Real2 >, Expressions::Subtraction >( a, b ); + return Expressions::StaticBinaryExpressionTemplate< StaticVector< Size, Real1 >, StaticVector< Size, Real2 >, Expressions::Subtraction >( a, b ); } //// // Multiplication template< int Size, typename Real, typename ET > __cuda_callable__ -const Expressions::BinaryExpressionTemplate< StaticVector< Size, Real >, ET, Expressions::Multiplication > +const Expressions::StaticBinaryExpressionTemplate< StaticVector< Size, Real >, ET, Expressions::Multiplication > operator*( const StaticVector< Size, Real >& a, const ET& b ) { - return Expressions::BinaryExpressionTemplate< StaticVector< Size, Real >, ET, Expressions::Multiplication >( a, b ); + return Expressions::StaticBinaryExpressionTemplate< StaticVector< Size, Real >, ET, Expressions::Multiplication >( a, b ); } template< typename ET, int Size, typename Real > __cuda_callable__ -const Expressions::BinaryExpressionTemplate< ET, StaticVector< Size, Real >, Expressions::Multiplication > +const Expressions::StaticBinaryExpressionTemplate< ET, StaticVector< Size, Real >, Expressions::Multiplication > operator*( const ET& a, const StaticVector< Size, Real >& b ) { - return Expressions::BinaryExpressionTemplate< ET, StaticVector< Size, Real >, Expressions::Multiplication >( a, b ); + return Expressions::StaticBinaryExpressionTemplate< ET, StaticVector< Size, Real >, Expressions::Multiplication >( a, b ); } template< int Size, typename Real1, typename Real2 > __cuda_callable__ -const Expressions::BinaryExpressionTemplate< StaticVector< Size, Real1 >, StaticVector< Size, Real2 >, Expressions::Multiplication > +const Expressions::StaticBinaryExpressionTemplate< StaticVector< Size, Real1 >, StaticVector< Size, Real2 >, Expressions::Multiplication > operator*( const StaticVector< Size, Real1 >& a, const StaticVector< Size, Real2 >& b ) { - return Expressions::BinaryExpressionTemplate< StaticVector< Size, Real1 >, StaticVector< Size, Real2 >, Expressions::Multiplication >( a, b ); + return Expressions::StaticBinaryExpressionTemplate< StaticVector< Size, Real1 >, StaticVector< Size, Real2 >, Expressions::Multiplication >( a, b ); } //// // Division template< int Size, typename Real, typename ET > __cuda_callable__ -const Expressions::BinaryExpressionTemplate< StaticVector< Size, Real >, ET, Expressions::Division > +const Expressions::StaticBinaryExpressionTemplate< StaticVector< Size, Real >, ET, Expressions::Division > operator/( const StaticVector< Size, Real >& a, const ET& b ) { - return Expressions::BinaryExpressionTemplate< StaticVector< Size, Real >, ET, Expressions::Division >( a, b ); + return Expressions::StaticBinaryExpressionTemplate< StaticVector< Size, Real >, ET, Expressions::Division >( a, b ); } template< typename ET, int Size, typename Real > __cuda_callable__ -const Expressions::BinaryExpressionTemplate< ET, StaticVector< Size, Real >, Expressions::Division > +const Expressions::StaticBinaryExpressionTemplate< ET, StaticVector< Size, Real >, Expressions::Division > operator/( const ET& a, const StaticVector< Size, Real >& b ) { - return Expressions::BinaryExpressionTemplate< ET, StaticVector< Size, Real >, Expressions::Division >( a, b ); + return Expressions::StaticBinaryExpressionTemplate< ET, StaticVector< Size, Real >, Expressions::Division >( a, b ); } template< int Size, typename Real1, typename Real2 > __cuda_callable__ -const Expressions::BinaryExpressionTemplate< StaticVector< Size, Real1 >, StaticVector< Size, Real2 >, Expressions::Division > +const Expressions::StaticBinaryExpressionTemplate< StaticVector< Size, Real1 >, StaticVector< Size, Real2 >, Expressions::Division > operator/( const StaticVector< Size, Real1 >& a, const StaticVector< Size, Real2 >& b ) { - return Expressions::BinaryExpressionTemplate< StaticVector< Size, Real1 >, StaticVector< Size, Real2 >, Expressions::Division >( a, b ); + return Expressions::StaticBinaryExpressionTemplate< StaticVector< Size, Real1 >, StaticVector< Size, Real2 >, Expressions::Division >( a, b ); } //// // Min template< int Size, typename Real, typename ET > __cuda_callable__ -const Expressions::BinaryExpressionTemplate< StaticVector< Size, Real >, ET, Expressions::Min > +const Expressions::StaticBinaryExpressionTemplate< StaticVector< Size, Real >, ET, Expressions::Min > min( const StaticVector< Size, Real >& a, const ET& b ) { - return Expressions::BinaryExpressionTemplate< StaticVector< Size, Real >, ET, Expressions::Min >( a, b ); + return Expressions::StaticBinaryExpressionTemplate< StaticVector< Size, Real >, ET, Expressions::Min >( a, b ); } template< typename ET, int Size, typename Real > __cuda_callable__ -const Expressions::BinaryExpressionTemplate< ET, StaticVector< Size, Real >, Expressions::Min > +const Expressions::StaticBinaryExpressionTemplate< ET, StaticVector< Size, Real >, Expressions::Min > min( const ET& a, const StaticVector< Size, Real >& b ) { - return Expressions::BinaryExpressionTemplate< ET, StaticVector< Size, Real >, Expressions::Min >( a, b ); + return Expressions::StaticBinaryExpressionTemplate< ET, StaticVector< Size, Real >, Expressions::Min >( a, b ); } template< int Size, typename Real1, typename Real2 > __cuda_callable__ -const Expressions::BinaryExpressionTemplate< StaticVector< Size, Real1 >, StaticVector< Size, Real2 >, Expressions::Min > +const Expressions::StaticBinaryExpressionTemplate< StaticVector< Size, Real1 >, StaticVector< Size, Real2 >, Expressions::Min > min( const StaticVector< Size, Real1 >& a, const StaticVector< Size, Real2 >& b ) { - return Expressions::BinaryExpressionTemplate< StaticVector< Size, Real1 >, StaticVector< Size, Real2 >, Expressions::Min >( a, b ); + return Expressions::StaticBinaryExpressionTemplate< StaticVector< Size, Real1 >, StaticVector< Size, Real2 >, Expressions::Min >( a, b ); } //// // Max template< int Size, typename Real, typename ET > __cuda_callable__ -const Expressions::BinaryExpressionTemplate< StaticVector< Size, Real >, ET, Expressions::Max > +const Expressions::StaticBinaryExpressionTemplate< StaticVector< Size, Real >, ET, Expressions::Max > max( const StaticVector< Size, Real >& a, const ET& b ) { - return Expressions::BinaryExpressionTemplate< StaticVector< Size, Real >, ET, Expressions::Max >( a, b ); + return Expressions::StaticBinaryExpressionTemplate< StaticVector< Size, Real >, ET, Expressions::Max >( a, b ); } template< typename ET, int Size, typename Real > __cuda_callable__ -const Expressions::BinaryExpressionTemplate< ET, StaticVector< Size, Real >, Expressions::Max > +const Expressions::StaticBinaryExpressionTemplate< ET, StaticVector< Size, Real >, Expressions::Max > max( const ET& a, const StaticVector< Size, Real >& b ) { - return Expressions::BinaryExpressionTemplate< ET, StaticVector< Size, Real >, Expressions::Max >( a, b ); + return Expressions::StaticBinaryExpressionTemplate< ET, StaticVector< Size, Real >, Expressions::Max >( a, b ); } template< int Size, typename Real1, typename Real2 > __cuda_callable__ -const Expressions::BinaryExpressionTemplate< StaticVector< Size, Real1 >, StaticVector< Size, Real2 >, Expressions::Max > +const Expressions::StaticBinaryExpressionTemplate< StaticVector< Size, Real1 >, StaticVector< Size, Real2 >, Expressions::Max > max( const StaticVector< Size, Real1 >& a, const StaticVector< Size, Real2 >& b ) { - return Expressions::BinaryExpressionTemplate< StaticVector< Size, Real1 >, StaticVector< Size, Real2 >, Expressions::Max >( a, b ); + return Expressions::StaticBinaryExpressionTemplate< StaticVector< Size, Real1 >, StaticVector< Size, Real2 >, Expressions::Max >( a, b ); } //// @@ -315,200 +315,200 @@ bool operator>=( const StaticVector< Size, Real1 >& a, const StaticVector< Size, // Minus template< int Size, typename Real > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Minus > +const Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Minus > operator-( const StaticVector< Size, Real >& a ) { - return Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Minus >( a ); + return Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Minus >( a ); } //// // Abs template< int Size, typename Real > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Abs > +const Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Abs > abs( const StaticVector< Size, Real >& a ) { - return Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Abs >( a ); + return Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Abs >( a ); } //// // Sine template< int Size, typename Real > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Sin > +const Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Sin > sin( const StaticVector< Size, Real >& a ) { - return Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Sin >( a ); + return Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Sin >( a ); } //// // Cosine template< int Size, typename Real > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Cos > +const Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Cos > cos( const StaticVector< Size, Real >& a ) { - return Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Cos >( a ); + return Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Cos >( a ); } //// // Tangent template< int Size, typename Real > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Tan > +const Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Tan > tan( const StaticVector< Size, Real >& a ) { - return Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Tan >( a ); + return Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Tan >( a ); } //// // Sqrt template< int Size, typename Real > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Sqrt > +const Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Sqrt > sqrt( const StaticVector< Size, Real >& a ) { - return Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Sqrt >( a ); + return Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Sqrt >( a ); } //// // Cbrt template< int Size, typename Real > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Cbrt > +const Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Cbrt > cbrt( const StaticVector< Size, Real >& a ) { - return Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Cbrt >( a ); + return Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Cbrt >( a ); } //// // Power template< int Size, typename Real, typename ExpType > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Pow, ExpType > +const Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Pow, ExpType > pow( const StaticVector< Size, Real >& a, const ExpType& exp ) { - return Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Pow, ExpType >( a, exp ); + return Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Pow, ExpType >( a, exp ); } //// // Floor template< int Size, typename Real > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Floor > +const Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Floor > floor( const StaticVector< Size, Real >& a ) { - return Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Floor >( a ); + return Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Floor >( a ); } //// // Ceil template< int Size, typename Real > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Ceil > +const Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Ceil > ceil( const StaticVector< Size, Real >& a ) { - return Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Ceil >( a ); + return Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Ceil >( a ); } //// // Acos template< int Size, typename Real > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Acos > +const Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Acos > acos( const StaticVector< Size, Real >& a ) { - return Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Acos >( a ); + return Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Acos >( a ); } //// // Asin template< int Size, typename Real > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Asin > +const Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Asin > asin( const StaticVector< Size, Real >& a ) { - return Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Asin >( a ); + return Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Asin >( a ); } //// // Atan template< int Size, typename Real > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Atan > +const Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Atan > atan( const StaticVector< Size, Real >& a ) { - return Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Atan >( a ); + return Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Atan >( a ); } //// // Cosh template< int Size, typename Real > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Cosh > +const Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Cosh > cosh( const StaticVector< Size, Real >& a ) { - return Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Cosh >( a ); + return Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Cosh >( a ); } //// // Tanh template< int Size, typename Real > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Tanh > +const Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Tanh > tanh( const StaticVector< Size, Real >& a ) { - return Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Tanh >( a ); + return Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Tanh >( a ); } //// // Log template< int Size, typename Real > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Log > +const Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Log > log( const StaticVector< Size, Real >& a ) { - return Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Log >( a ); + return Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Log >( a ); } //// // Log10 template< int Size, typename Real > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Log10 > +const Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Log10 > log10( const StaticVector< Size, Real >& a ) { - return Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Log10 >( a ); + return Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Log10 >( a ); } //// // Log2 template< int Size, typename Real > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Log2 > +const Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Log2 > log2( const StaticVector< Size, Real >& a ) { - return Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Log2 >( a ); + return Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Log2 >( a ); } //// // Exp template< int Size, typename Real > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Exp > +const Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Exp > exp( const StaticVector< Size, Real >& a ) { - return Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Exp >( a ); + return Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Exp >( a ); } //// // Sign template< int Size, typename Real > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Sign > +const Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Sign > sign( const StaticVector< Size, Real >& a ) { - return Expressions::UnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Sign >( a ); + return Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Sign >( a ); } @@ -519,25 +519,25 @@ __cuda_callable__ StaticVector< Size, Real > Scale( const StaticVector< Size, Real >& a, const ET& b ) { - StaticVector< Size, Real > result = Expressions::BinaryExpressionTemplate< StaticVector< Size, Real >, ET, Expressions::Multiplication >( a, b ); + StaticVector< Size, Real > result = Expressions::StaticBinaryExpressionTemplate< StaticVector< Size, Real >, ET, Expressions::Multiplication >( a, b ); return result; } template< typename ET, int Size, typename Real > __cuda_callable__ -Expressions::BinaryExpressionTemplate< ET, StaticVector< Size, Real >, Expressions::Multiplication > +Expressions::StaticBinaryExpressionTemplate< ET, StaticVector< Size, Real >, Expressions::Multiplication > Scale( const ET& a, const StaticVector< Size, Real >& b ) { - StaticVector< Size, Real > result = Expressions::BinaryExpressionTemplate< ET, StaticVector< Size, Real >, Expressions::Multiplication >( a, b ); + StaticVector< Size, Real > result = Expressions::StaticBinaryExpressionTemplate< ET, StaticVector< Size, Real >, Expressions::Multiplication >( a, b ); return result; } template< int Size, typename Real1, typename Real2 > __cuda_callable__ -Expressions::BinaryExpressionTemplate< StaticVector< Size, Real1 >, StaticVector< Size, Real2 >, Expressions::Multiplication > +Expressions::StaticBinaryExpressionTemplate< StaticVector< Size, Real1 >, StaticVector< Size, Real2 >, Expressions::Multiplication > Scale( const StaticVector< Size, Real1 >& a, const StaticVector< Size, Real2 >& b ) { - StaticVector< Size, Real1 > result = Expressions::BinaryExpressionTemplate< StaticVector< Size, Real1 >, StaticVector< Size, Real2 >, Expressions::Multiplication >( a, b ); + StaticVector< Size, Real1 > result = Expressions::StaticBinaryExpressionTemplate< StaticVector< Size, Real1 >, StaticVector< Size, Real2 >, Expressions::Multiplication >( a, b ); return result; } diff --git a/src/TNL/Containers/StaticVector_impl.h b/src/TNL/Containers/StaticVector_impl.h index 763fb1e3e..e80b046c7 100644 --- a/src/TNL/Containers/StaticVector_impl.h +++ b/src/TNL/Containers/StaticVector_impl.h @@ -55,10 +55,9 @@ template< int Size, typename Real > template< typename T1, typename T2, template< typename, typename > class Operation > -StaticVector< Size, Real >::StaticVector( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& op ) +StaticVector< Size, Real >::StaticVector( const Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >& op ) { - static_assert( Expressions::BinaryExpressionTemplate< T1, T2, Operation >::isStatic(), "Attempt to assign non-static expression to static vector." ); - Algorithms::VectorAssignment< StaticVector< Size, Real >, Expressions::BinaryExpressionTemplate< T1, T2, Operation > >::assignStatic( *this, op ); + Algorithms::VectorAssignment< StaticVector< Size, Real >, Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation > >::assignStatic( *this, op ); }; template< int Size, @@ -66,10 +65,9 @@ template< int Size, template< typename T, template< typename > class Operation > __cuda_callable__ -StaticVector< Size, Real >::StaticVector( const Expressions::UnaryExpressionTemplate< T, Operation >& op ) +StaticVector< Size, Real >::StaticVector( const Expressions::StaticUnaryExpressionTemplate< T, Operation >& op ) { - static_assert( Expressions::UnaryExpressionTemplate< T, Operation >::isStatic(), "Attempt to assign non-static expression to static vector." ); - Algorithms::VectorAssignment< StaticVector< Size, Real >, Expressions::UnaryExpressionTemplate< T, Operation > >::assignStatic( *this, op ); + Algorithms::VectorAssignment< StaticVector< Size, Real >, Expressions::StaticUnaryExpressionTemplate< T, Operation > >::assignStatic( *this, op ); }; template< int Size, typename Real > diff --git a/src/TNL/Containers/VectorView.h b/src/TNL/Containers/VectorView.h index f941d9958..e53309f69 100644 --- a/src/TNL/Containers/VectorView.h +++ b/src/TNL/Containers/VectorView.h @@ -62,10 +62,12 @@ public: template< typename T1, typename T2, template< typename, typename > class Operation > + __cuda_callable__ VectorView( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& expression ); template< typename T, template< typename > class Operation > + __cuda_callable__ VectorView( const Expressions::UnaryExpressionTemplate< T, Operation >& expression ); diff --git a/src/UnitTests/Containers/VectorTest-1.h b/src/UnitTests/Containers/VectorTest-1.h index 292a403b4..7e7380a66 100644 --- a/src/UnitTests/Containers/VectorTest-1.h +++ b/src/UnitTests/Containers/VectorTest-1.h @@ -161,6 +161,22 @@ TYPED_TEST( VectorTest, absMin ) EXPECT_EQ( VectorOperations::getVectorAbsMin( v ), 0 ); } +TYPED_TEST( VectorTest, comparison ) +{ + using VectorType = typename TestFixture::VectorType; + using VectorOperations = typename TestFixture::VectorOperations; + using ViewType = typename TestFixture::ViewType; + const int size = VECTOR_TEST_SIZE; + + VectorType _v( size ), _w( size ); + ViewType v( _v ), w( _w ); + v = 1.0; + w = 2.0; + + EXPECT_TRUE( v < w ); + EXPECT_TRUE( w > v ); + EXPECT_TRUE( w + 1.0 < v + 4.0 ); +} #endif // HAVE_GTEST -- GitLab From e076b9c58c0a7313ad7502a2be060e86b64c2f7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Wed, 1 May 2019 22:48:46 +0200 Subject: [PATCH 24/93] Added expression templates vertical operations. --- .../Expressions/VerticalOperations.h | 107 ++++++++++++++++++ 1 file changed, 107 insertions(+) create mode 100644 src/TNL/Containers/Expressions/VerticalOperations.h diff --git a/src/TNL/Containers/Expressions/VerticalOperations.h b/src/TNL/Containers/Expressions/VerticalOperations.h new file mode 100644 index 000000000..b7698be92 --- /dev/null +++ b/src/TNL/Containers/Expressions/VerticalOperations.h @@ -0,0 +1,107 @@ +/*************************************************************************** + VerticalOperations.h - description + ------------------- + begin : May 1, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include +#include + +//// +// By vertical operations we mean those applied across vector elements or +// vector expression elements. It means for example minim/maximum of all +// vector elements etc. + +namespace TNL { + namespace Containers { + namespace Expressions { + +template< typename T > +__cuda_callable__ +auto StaticExpressionMin( const T& a ) -> decltype( a[ 0 ] ) +{ + auto aux = a[ 0 ]; + for( int i = 1; i < a.getSize(); i++ ) + aux = TNL::min( aux, a[ i ] ); + return aux; +} + +template< typename T > +__cuda_callable__ +auto StaticExpressionMax( const T& a ) -> decltype( a[ 0 ] ) +{ + auto aux = a[ 0 ]; + for( int i = 1; i < a.getSize(); i++ ) + aux = TNL::max( aux, a[ i ] ); + return aux; +} + +template< typename T > +__cuda_callable__ +auto StaticExpressionSum( const T& a ) -> decltype( a[ 0 ] ) +{ + auto aux = a[ 0 ]; + for( int i = 1; i < a.getSize(); i++ ) + aux += a[ i ]; + return aux; +} + +template< typename T > +__cuda_callable__ +auto StaticExpressionProduct( const T& a ) -> decltype( a[ 0 ] ) +{ + auto aux = a[ 0 ]; + for( int i = 1; i < a.getSize(); i++ ) + aux *= a[ i ]; + return aux; +} + +template< typename T > +__cuda_callable__ +bool StaticExpressionLogicalAnd( const T& a ) +{ + auto aux = a[ 0 ]; + for( int i = 1; i < a.getSize(); i++ ) + aux = aux && a[ i ]; + return aux; +} + +template< typename T > +__cuda_callable__ +bool StaticExpressionLogicalOr( const T& a ) +{ + auto aux = a[ 0 ]; + for( int i = 1; i < a.getSize(); i++ ) + aux = aux || a[ i ]; + return aux; +} + +template< typename T > +__cuda_callable__ +auto StaticExpressionBinaryAnd( const T& a ) -> decltype( a[ 0 ] ) +{ + auto aux = a[ 0 ]; + for( int i = 1; i < a.getSize(); i++ ) + aux = aux & a[ i ]; + return aux; +} + +template< typename T > +__cuda_callable__ +auto StaticExpressionBinaryOr( const T& a ) -> decltype( a[ 0 ] ) +{ + auto aux = a[ 0 ]; + for( int i = 1; i < a.getSize(); i++ ) + aux = aux | a[ i ]; + return aux; +} + + } //namespace Expressions + } // namespace Containers +} // namespace TNL -- GitLab From 449862f38190efd35f30cea9a1d1f98b320cffbe Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Thu, 2 May 2019 19:50:21 +0200 Subject: [PATCH 25/93] Implementing vertical vector operations. --- .../Expressions/ExpressionTemplates.h | 119 ++++++++++++++++-- .../Expressions/StaticExpressionTemplates.h | 118 +++++++++++++++-- .../Expressions/VerticalOperations.h | 110 +++++++++++++++- src/UnitTests/Containers/StaticVectorTest.cpp | 3 +- 4 files changed, 332 insertions(+), 18 deletions(-) diff --git a/src/TNL/Containers/Expressions/ExpressionTemplates.h b/src/TNL/Containers/Expressions/ExpressionTemplates.h index 194b707e4..9ea979766 100644 --- a/src/TNL/Containers/Expressions/ExpressionTemplates.h +++ b/src/TNL/Containers/Expressions/ExpressionTemplates.h @@ -50,7 +50,7 @@ struct BinaryExpressionTemplate< T1, T2, Operation, VectorVariable, VectorVariab using DeviceType = typename T1::DeviceType; using IndexType = typename T1::IndexType; using IsExpressionTemplate = bool; - + static_assert( std::is_same< typename T1::DeviceType, typename T2::DeviceType >::value, "Attempt to mix operands allocated on different device types." ); static_assert( IsStaticType< T1 >::value == IsStaticType< T2 >::value, "Attempt to mix static and non-static operands in binary expression templates." ); static constexpr bool isStatic() { return false; } @@ -1637,19 +1637,122 @@ exp( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a ) template< typename L1, typename L2, template< typename, typename > class LOperation > +__cuda_callable__ typename Expressions::BinaryExpressionTemplate< L1, L2, LOperation >::RealType min( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) { -/* using ExpressionType = Expressions::BinaryExpressionTemplate< L1, L2, LOperation >; - using RealType = typename ExpressionType::RealType; - using IndexType = typename ExpressionType::IndexType; + return ExpressionMin( a ); +} + +template< typename L1, + template< typename > class LOperation, + typename Parameter > +__cuda_callable__ +typename Expressions::UnaryExpressionTemplate< L1, LOperation, Parameter >::RealType +min( const Expressions::UnaryExpressionTemplate< L1, LOperation, Parameter >& a ) +{ + return ExpressionMin( a ); +} + +template< typename L1, + typename L2, + template< typename, typename > class LOperation > +__cuda_callable__ +typename Expressions::BinaryExpressionTemplate< L1, L2, LOperation >::RealType +max( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) +{ + return ExpressionMax( a ); +} + +template< typename L1, + template< typename > class LOperation, + typename Parameter > +__cuda_callable__ +typename Expressions::UnaryExpressionTemplate< L1, LOperation, Parameter >::RealType +max( const Expressions::UnaryExpressionTemplate< L1, LOperation, Parameter >& a ) +{ + return ExpressionMax( a ); +} + +template< typename L1, + typename L2, + template< typename, typename > class LOperation > +__cuda_callable__ +typename Expressions::BinaryExpressionTemplate< L1, L2, LOperation >::RealType +sum( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) +{ + return ExpressionSum( a ); +} + +template< typename L1, + template< typename > class LOperation, + typename Parameter > +__cuda_callable__ +typename Expressions::UnaryExpressionTemplate< L1, LOperation, Parameter >::RealType +sum( const Expressions::UnaryExpressionTemplate< L1, LOperation, Parameter >& a ) +{ + return ExpressionSum( a ); +} + +template< typename L1, + typename L2, + template< typename, typename > class LOperation > +__cuda_callable__ +typename Expressions::BinaryExpressionTemplate< L1, L2, LOperation >::RealType +product( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) +{ + return ExpressionProduct( a ); +} - auto fetch = [=] __cuda_callable__ ( IndexType i ) { return a[ i ]; }; - auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a = TNL::min( a, b ); }; - auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a = TNL::min( a, b ); }; - return Reduction< DeviceType >::reduce( v1.getSize(), reduction, volatileReduction, fetch, std::numeric_limits< ResultType >::max() );*/ +template< typename L1, + template< typename > class LOperation, + typename Parameter > +__cuda_callable__ +typename Expressions::UnaryExpressionTemplate< L1, LOperation, Parameter >::RealType +product( const Expressions::UnaryExpressionTemplate< L1, LOperation, Parameter >& a ) +{ + return ExpressionProduct( a ); +} + +template< typename L1, + typename L2, + template< typename, typename > class LOperation > +__cuda_callable__ +typename Expressions::BinaryExpressionTemplate< L1, L2, LOperation >::RealType +logicalOr( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) +{ + return ExpressionLogicalOr( a ); } +template< typename L1, + template< typename > class LOperation, + typename Parameter > +__cuda_callable__ +typename Expressions::UnaryExpressionTemplate< L1, LOperation, Parameter >::RealType +logicalOr( const Expressions::UnaryExpressionTemplate< L1, LOperation, Parameter >& a ) +{ + return ExpressionLogicalOr( a ); +} + +template< typename L1, + typename L2, + template< typename, typename > class LOperation > +__cuda_callable__ +typename Expressions::BinaryExpressionTemplate< L1, L2, LOperation >::RealType +binaryOr( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) +{ + return ExpressionBinaryOr( a ); +} + +template< typename L1, + template< typename > class LOperation, + typename Parameter > +__cuda_callable__ +typename Expressions::UnaryExpressionTemplate< L1, LOperation, Parameter >::RealType +binaryOr( const Expressions::UnaryExpressionTemplate< L1, LOperation, Parameter >& a ) +{ + return ExpressionBinaryOr( a ); +} //// // Output stream diff --git a/src/TNL/Containers/Expressions/StaticExpressionTemplates.h b/src/TNL/Containers/Expressions/StaticExpressionTemplates.h index 2d936dc9a..7292c733b 100644 --- a/src/TNL/Containers/Expressions/StaticExpressionTemplates.h +++ b/src/TNL/Containers/Expressions/StaticExpressionTemplates.h @@ -15,6 +15,7 @@ #include #include #include +#include namespace TNL { namespace Containers { @@ -1653,19 +1654,122 @@ exp( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a ) template< typename L1, typename L2, template< typename, typename > class LOperation > +__cuda_callable__ typename Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >::RealType min( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) { -/* using ExpressionType = Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >; - using RealType = typename ExpressionType::RealType; - using IndexType = typename ExpressionType::IndexType; + return StaticExpressionMin( a ); +} - auto fetch = [=] __cuda_callable__ ( IndexType i ) { return a[ i ]; }; - auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a = TNL::min( a, b ); }; - auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a = TNL::min( a, b ); }; - return Reduction< DeviceType >::reduce( v1.getSize(), reduction, volatileReduction, fetch, std::numeric_limits< ResultType >::max() );*/ +template< typename L1, + template< typename > class LOperation, + typename Parameter > +__cuda_callable__ +typename Expressions::StaticUnaryExpressionTemplate< L1, LOperation, Parameter >::RealType +min( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation, Parameter >& a ) +{ + return StaticExpressionMin( a ); } +template< typename L1, + typename L2, + template< typename, typename > class LOperation > +__cuda_callable__ +typename Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >::RealType +max( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) +{ + return StaticExpressionMax( a ); +} + +template< typename L1, + template< typename > class LOperation, + typename Parameter > +__cuda_callable__ +typename Expressions::StaticUnaryExpressionTemplate< L1, LOperation, Parameter >::RealType +max( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation, Parameter >& a ) +{ + return StaticExpressionMax( a ); +} + +template< typename L1, + typename L2, + template< typename, typename > class LOperation > +__cuda_callable__ +typename Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >::RealType +sum( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) +{ + return StaticExpressionSum( a ); +} + +template< typename L1, + template< typename > class LOperation, + typename Parameter > +__cuda_callable__ +typename Expressions::StaticUnaryExpressionTemplate< L1, LOperation, Parameter >::RealType +sum( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation, Parameter >& a ) +{ + return StaticExpressionSum( a ); +} + +template< typename L1, + typename L2, + template< typename, typename > class LOperation > +__cuda_callable__ +typename Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >::RealType +product( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) +{ + return StaticExpressionProduct( a ); +} + +template< typename L1, + template< typename > class LOperation, + typename Parameter > +__cuda_callable__ +typename Expressions::StaticUnaryExpressionTemplate< L1, LOperation, Parameter >::RealType +product( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation, Parameter >& a ) +{ + return StaticExpressionProduct( a ); +} + +template< typename L1, + typename L2, + template< typename, typename > class LOperation > +__cuda_callable__ +typename Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >::RealType +logicalOr( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) +{ + return StaticExpressionLogicalOr( a ); +} + +template< typename L1, + template< typename > class LOperation, + typename Parameter > +__cuda_callable__ +typename Expressions::StaticUnaryExpressionTemplate< L1, LOperation, Parameter >::RealType +logicalOr( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation, Parameter >& a ) +{ + return StaticExpressionLogicalOr( a ); +} + +template< typename L1, + typename L2, + template< typename, typename > class LOperation > +__cuda_callable__ +typename Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >::RealType +binaryOr( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) +{ + return StaticExpressionBinaryOr( a ); +} + +template< typename L1, + template< typename > class LOperation, + typename Parameter > +__cuda_callable__ +typename Expressions::StaticUnaryExpressionTemplate< L1, LOperation, Parameter >::RealType +binaryOr( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation, Parameter >& a ) +{ + return StaticExpressionBinaryOr( a ); +} //// // Output stream diff --git a/src/TNL/Containers/Expressions/VerticalOperations.h b/src/TNL/Containers/Expressions/VerticalOperations.h index b7698be92..a4ae2b010 100644 --- a/src/TNL/Containers/Expressions/VerticalOperations.h +++ b/src/TNL/Containers/Expressions/VerticalOperations.h @@ -15,9 +15,8 @@ //// // By vertical operations we mean those applied across vector elements or -// vector expression elements. It means for example minim/maximum of all +// vector expression elements. It means for example minim/maximum of all // vector elements etc. - namespace TNL { namespace Containers { namespace Expressions { @@ -102,6 +101,113 @@ auto StaticExpressionBinaryOr( const T& a ) -> decltype( a[ 0 ] ) return aux; } +//// +// Non-static operations +template< typename Expression > +__cuda_callable__ +auto ExpressionMin( const Expression& a ) -> decltype( a[ 0 ] ) +{ + using ResultType = decltype( a[ 0 ] ); + using IndexType = typename Expression::IndexType; + + auto fetch = [=] __cuda_callable__ ( IndexType i ) { return a[ i ]; }; + auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a = a < b ? a : b; }; + auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a = a < b ? a : b; }; + return Algorithms::Reduction< typename Expression::DeviceType >::reduce( a.getSize(), reduction, volatileReduction, fetch, std::numeric_limits< ResultType >::max() ); +} + +template< typename Expression > +__cuda_callable__ +auto ExpressionMax( const Expression& a ) -> decltype( a[ 0 ] ) +{ + using ResultType = decltype( a[ 0 ] ); + using IndexType = typename Expression::IndexType; + + auto fetch = [=] __cuda_callable__ ( IndexType i ) { return a[ i ]; }; + auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a = a > b ? a : b; }; + auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a = a > b ? a : b; }; + return Algorithms::Reduction< typename Expression::DeviceType >::reduce( a.getSize(), reduction, volatileReduction, fetch, std::numeric_limits< ResultType >::max() ); +} + +template< typename Expression > +__cuda_callable__ +auto ExpressionSum( const Expression& a ) -> decltype( a[ 0 ] ) +{ + using ResultType = decltype( a[ 0 ] ); + using IndexType = typename Expression::IndexType; + + auto fetch = [=] __cuda_callable__ ( IndexType i ) { return a[ i ]; }; + auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a += b; }; + auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a += b; }; + return Algorithms::Reduction< typename Expression::DeviceType >::reduce( a.getSize(), reduction, volatileReduction, fetch, ( ResultType ) 0.0 ); +} + +template< typename Expression > +__cuda_callable__ +auto ExpressionProduct( const Expression& a ) -> decltype( a[ 0 ] ) +{ + using ResultType = decltype( a[ 0 ] ); + using IndexType = typename Expression::IndexType; + + auto fetch = [=] __cuda_callable__ ( IndexType i ) { return a[ i ]; }; + auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a *= b; }; + auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a *= b; }; + return Algorithms::Reduction< typename Expression::DeviceType >::reduce( a.getSize(), reduction, volatileReduction, fetch, ( ResultType ) 1.0 ); +} + +template< typename Expression > +__cuda_callable__ +bool ExpressionLogicalAnd( const Expression& a ) +{ + using ResultType = decltype( a[ 0 ] ); + using IndexType = typename Expression::IndexType; + + auto fetch = [=] __cuda_callable__ ( IndexType i ) { return a[ i ]; }; + auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a = a && b; }; + auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a = a && b; }; + return Algorithms::Reduction< typename Expression::DeviceType >::reduce( a.getSize(), reduction, volatileReduction, fetch, true ); +} + +template< typename Expression > +__cuda_callable__ +bool ExpressionLogicalOr( const Expression& a ) +{ + using ResultType = decltype( a[ 0 ] ); + using IndexType = typename Expression::IndexType; + + auto fetch = [=] __cuda_callable__ ( IndexType i ) { return a[ i ]; }; + auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a = a || b; }; + auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a = a || b; }; + return Algorithms::Reduction< typename Expression::DeviceType >::reduce( a.getSize(), reduction, volatileReduction, fetch, ); +} + +template< typename Expression > +__cuda_callable__ +auto ExpressionBinaryAnd( const Expression& a ) -> decltype( a[ 0 ] ) +{ + using ResultType = decltype( a[ 0 ] ); + using IndexType = typename Expression::IndexType; + + auto fetch = [=] __cuda_callable__ ( IndexType i ) { return a[ i ]; }; + auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a = a & b; }; + auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a = a & b; }; + return Algorithms::Reduction< typename Expression::DeviceType >::reduce( a.getSize(), reduction, volatileReduction, fetch, std::numeric_limits< ResultType >::max() ); +} + +template< typename Expression > +__cuda_callable__ +auto ExpressionBinaryOr( const Expression& a ) -> decltype( a[ 0 ] ) +{ + using ResultType = decltype( a[ 0 ] ); + using IndexType = typename Expression::IndexType; + + auto fetch = [=] __cuda_callable__ ( IndexType i ) { return a[ i ]; }; + auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a = a | b; }; + auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a = a | b; }; + return Algorithms::Reduction< typename Expression::DeviceType >::reduce( a.getSize(), reduction, volatileReduction, fetch, ( ResultType ) 0 ); + +} + } //namespace Expressions } // namespace Containers } // namespace TNL diff --git a/src/UnitTests/Containers/StaticVectorTest.cpp b/src/UnitTests/Containers/StaticVectorTest.cpp index 1e1d00d7d..19093996f 100644 --- a/src/UnitTests/Containers/StaticVectorTest.cpp +++ b/src/UnitTests/Containers/StaticVectorTest.cpp @@ -266,7 +266,8 @@ TYPED_TEST( StaticVectorTest, cbrt ) v[ i ] = cbrt( u[ i ] ); } - EXPECT_EQ( cbrt( u ), v ); + for( int i = 0; i < size; i++ ) + EXPECT_NEAR( cbrt( u )[ i ], v[ i ], 1.0e-6 ); } TYPED_TEST( StaticVectorTest, pow ) -- GitLab From ef637fc84b77618637e622f8a0482c8638058c53 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Thu, 2 May 2019 21:50:41 +0200 Subject: [PATCH 26/93] Adding unit tests for vertical operations. --- .../Expressions/VerticalOperations.h | 3 +- src/UnitTests/Containers/CMakeLists.txt | 14 + src/UnitTests/Containers/VectorTest-1.h | 2 +- src/UnitTests/Containers/VectorTest-2.h | 2 +- src/UnitTests/Containers/VectorTest-3.h | 2 +- src/UnitTests/Containers/VectorTest-4.h | 2 +- src/UnitTests/Containers/VectorTest-5.h | 345 +--------------- src/UnitTests/Containers/VectorTest-6.cpp | 11 + src/UnitTests/Containers/VectorTest-6.cu | 11 + src/UnitTests/Containers/VectorTest-6.h | 391 ++++++++++++++++++ src/UnitTests/Containers/VectorTest-7.cpp | 11 + src/UnitTests/Containers/VectorTest-7.cu | 11 + src/UnitTests/Containers/VectorTest-7.h | 68 +++ 13 files changed, 523 insertions(+), 350 deletions(-) create mode 100644 src/UnitTests/Containers/VectorTest-6.cpp create mode 100644 src/UnitTests/Containers/VectorTest-6.cu create mode 100644 src/UnitTests/Containers/VectorTest-6.h create mode 100644 src/UnitTests/Containers/VectorTest-7.cpp create mode 100644 src/UnitTests/Containers/VectorTest-7.cu create mode 100644 src/UnitTests/Containers/VectorTest-7.h diff --git a/src/TNL/Containers/Expressions/VerticalOperations.h b/src/TNL/Containers/Expressions/VerticalOperations.h index a4ae2b010..250659d1a 100644 --- a/src/TNL/Containers/Expressions/VerticalOperations.h +++ b/src/TNL/Containers/Expressions/VerticalOperations.h @@ -178,7 +178,7 @@ bool ExpressionLogicalOr( const Expression& a ) auto fetch = [=] __cuda_callable__ ( IndexType i ) { return a[ i ]; }; auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a = a || b; }; auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a = a || b; }; - return Algorithms::Reduction< typename Expression::DeviceType >::reduce( a.getSize(), reduction, volatileReduction, fetch, ); + return Algorithms::Reduction< typename Expression::DeviceType >::reduce( a.getSize(), reduction, volatileReduction, fetch, ( ResultType ) 0 ); } template< typename Expression > @@ -205,7 +205,6 @@ auto ExpressionBinaryOr( const Expression& a ) -> decltype( a[ 0 ] ) auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a = a | b; }; auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a = a | b; }; return Algorithms::Reduction< typename Expression::DeviceType >::reduce( a.getSize(), reduction, volatileReduction, fetch, ( ResultType ) 0 ); - } } //namespace Expressions diff --git a/src/UnitTests/Containers/CMakeLists.txt b/src/UnitTests/Containers/CMakeLists.txt index a5b81a698..24cae4dc0 100644 --- a/src/UnitTests/Containers/CMakeLists.txt +++ b/src/UnitTests/Containers/CMakeLists.txt @@ -27,16 +27,22 @@ ADD_EXECUTABLE( VectorTest-2 VectorTest-2.cpp ) ADD_EXECUTABLE( VectorTest-3 VectorTest-3.cpp ) ADD_EXECUTABLE( VectorTest-4 VectorTest-4.cpp ) ADD_EXECUTABLE( VectorTest-5 VectorTest-5.cpp ) +ADD_EXECUTABLE( VectorTest-6 VectorTest-6.cpp ) +ADD_EXECUTABLE( VectorTest-7 VectorTest-7.cpp ) TARGET_COMPILE_OPTIONS( VectorTest-1 PRIVATE ${CXX_TESTS_FLAGS} ) TARGET_COMPILE_OPTIONS( VectorTest-2 PRIVATE ${CXX_TESTS_FLAGS} ) TARGET_COMPILE_OPTIONS( VectorTest-3 PRIVATE ${CXX_TESTS_FLAGS} ) TARGET_COMPILE_OPTIONS( VectorTest-4 PRIVATE ${CXX_TESTS_FLAGS} ) TARGET_COMPILE_OPTIONS( VectorTest-5 PRIVATE ${CXX_TESTS_FLAGS} ) +TARGET_COMPILE_OPTIONS( VectorTest-6 PRIVATE ${CXX_TESTS_FLAGS} ) +TARGET_COMPILE_OPTIONS( VectorTest-7 PRIVATE ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( VectorTest-1 ${GTEST_BOTH_LIBRARIES} ) TARGET_LINK_LIBRARIES( VectorTest-2 ${GTEST_BOTH_LIBRARIES} ) TARGET_LINK_LIBRARIES( VectorTest-3 ${GTEST_BOTH_LIBRARIES} ) TARGET_LINK_LIBRARIES( VectorTest-4 ${GTEST_BOTH_LIBRARIES} ) TARGET_LINK_LIBRARIES( VectorTest-5 ${GTEST_BOTH_LIBRARIES} ) +TARGET_LINK_LIBRARIES( VectorTest-6 ${GTEST_BOTH_LIBRARIES} ) +TARGET_LINK_LIBRARIES( VectorTest-7 ${GTEST_BOTH_LIBRARIES} ) IF( BUILD_CUDA ) CUDA_ADD_EXECUTABLE( ArrayTestCuda ArrayTest.cu @@ -52,11 +58,15 @@ IF( BUILD_CUDA ) CUDA_ADD_EXECUTABLE( VectorTestCuda-3 VectorTest-3.cu OPTIONS ${CXX_TESTS_FLAGS} ) CUDA_ADD_EXECUTABLE( VectorTestCuda-4 VectorTest-4.cu OPTIONS ${CXX_TESTS_FLAGS} ) CUDA_ADD_EXECUTABLE( VectorTestCuda-5 VectorTest-5.cu OPTIONS ${CXX_TESTS_FLAGS} ) + CUDA_ADD_EXECUTABLE( VectorTestCuda-6 VectorTest-6.cu OPTIONS ${CXX_TESTS_FLAGS} ) + CUDA_ADD_EXECUTABLE( VectorTestCuda-7 VectorTest-7.cu OPTIONS ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( VectorTestCuda-1 ${GTEST_BOTH_LIBRARIES} ) TARGET_LINK_LIBRARIES( VectorTestCuda-2 ${GTEST_BOTH_LIBRARIES} ) TARGET_LINK_LIBRARIES( VectorTestCuda-3 ${GTEST_BOTH_LIBRARIES} ) TARGET_LINK_LIBRARIES( VectorTestCuda-4 ${GTEST_BOTH_LIBRARIES} ) TARGET_LINK_LIBRARIES( VectorTestCuda-5 ${GTEST_BOTH_LIBRARIES} ) + TARGET_LINK_LIBRARIES( VectorTestCuda-6 ${GTEST_BOTH_LIBRARIES} ) + TARGET_LINK_LIBRARIES( VectorTestCuda-7 ${GTEST_BOTH_LIBRARIES} ) ENDIF( BUILD_CUDA ) IF( BUILD_CUDA ) @@ -87,6 +97,8 @@ ADD_TEST( VectorTest-2 ${EXECUTABLE_OUTPUT_PATH}/VectorTest-2${CMAKE_EXECUTABLE_ ADD_TEST( VectorTest-3 ${EXECUTABLE_OUTPUT_PATH}/VectorTest-3${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( VectorTest-4 ${EXECUTABLE_OUTPUT_PATH}/VectorTest-4${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( VectorTest-5 ${EXECUTABLE_OUTPUT_PATH}/VectorTest-5${CMAKE_EXECUTABLE_SUFFIX} ) +ADD_TEST( VectorTest-6 ${EXECUTABLE_OUTPUT_PATH}/VectorTest-6${CMAKE_EXECUTABLE_SUFFIX} ) +ADD_TEST( VectorTest-7 ${EXECUTABLE_OUTPUT_PATH}/VectorTest-7${CMAKE_EXECUTABLE_SUFFIX} ) IF( BUILD_CUDA ) ADD_TEST( ArrayTestCuda ${EXECUTABLE_OUTPUT_PATH}/ArrayTestCuda${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( ArrayViewTestCuda ${EXECUTABLE_OUTPUT_PATH}/ArrayViewTestCuda${CMAKE_EXECUTABLE_SUFFIX} ) @@ -95,6 +107,8 @@ IF( BUILD_CUDA ) ADD_TEST( VectorTestCuda-3 ${EXECUTABLE_OUTPUT_PATH}/VectorTestCuda-3${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( VectorTestCuda-4 ${EXECUTABLE_OUTPUT_PATH}/VectorTestCuda-4${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( VectorTestCuda-5 ${EXECUTABLE_OUTPUT_PATH}/VectorTestCuda-5${CMAKE_EXECUTABLE_SUFFIX} ) + ADD_TEST( VectorTestCuda-6 ${EXECUTABLE_OUTPUT_PATH}/VectorTestCuda-6${CMAKE_EXECUTABLE_SUFFIX} ) + ADD_TEST( VectorTestCuda-7 ${EXECUTABLE_OUTPUT_PATH}/VectorTestCuda-7${CMAKE_EXECUTABLE_SUFFIX} ) ENDIF() ADD_TEST( MultireductionTest ${EXECUTABLE_OUTPUT_PATH}/MultireductionTest${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( StaticArrayTest ${EXECUTABLE_OUTPUT_PATH}/StaticArrayTest${CMAKE_EXECUTABLE_SUFFIX} ) diff --git a/src/UnitTests/Containers/VectorTest-1.h b/src/UnitTests/Containers/VectorTest-1.h index 7e7380a66..1b09e1f6b 100644 --- a/src/UnitTests/Containers/VectorTest-1.h +++ b/src/UnitTests/Containers/VectorTest-1.h @@ -1,5 +1,5 @@ /*************************************************************************** - VectorTest.h - description + VectorTest-1.h - description ------------------- begin : Oct 25, 2010 copyright : (C) 2010 by Tomas Oberhuber diff --git a/src/UnitTests/Containers/VectorTest-2.h b/src/UnitTests/Containers/VectorTest-2.h index 6c32fbd7e..91c6d7de2 100644 --- a/src/UnitTests/Containers/VectorTest-2.h +++ b/src/UnitTests/Containers/VectorTest-2.h @@ -1,5 +1,5 @@ /*************************************************************************** - VectorTest.h - description + VectorTest-2.h - description ------------------- begin : Oct 25, 2010 copyright : (C) 2010 by Tomas Oberhuber diff --git a/src/UnitTests/Containers/VectorTest-3.h b/src/UnitTests/Containers/VectorTest-3.h index 53c6e5ef7..a07c72a58 100644 --- a/src/UnitTests/Containers/VectorTest-3.h +++ b/src/UnitTests/Containers/VectorTest-3.h @@ -1,5 +1,5 @@ /*************************************************************************** - VectorTest.h - description + VectorTest-3.h - description ------------------- begin : Oct 25, 2010 copyright : (C) 2010 by Tomas Oberhuber diff --git a/src/UnitTests/Containers/VectorTest-4.h b/src/UnitTests/Containers/VectorTest-4.h index e683d1b2b..5a913c4e8 100644 --- a/src/UnitTests/Containers/VectorTest-4.h +++ b/src/UnitTests/Containers/VectorTest-4.h @@ -1,5 +1,5 @@ /*************************************************************************** - VectorTest.h - description + VectorTest-4.h - description ------------------- begin : Oct 25, 2010 copyright : (C) 2010 by Tomas Oberhuber diff --git a/src/UnitTests/Containers/VectorTest-5.h b/src/UnitTests/Containers/VectorTest-5.h index 6d4bc218d..e13e90422 100644 --- a/src/UnitTests/Containers/VectorTest-5.h +++ b/src/UnitTests/Containers/VectorTest-5.h @@ -1,5 +1,5 @@ /*************************************************************************** - VectorTest.h - description + VectorTest-5.h - description ------------------- begin : Oct 25, 2010 copyright : (C) 2010 by Tomas Oberhuber @@ -204,349 +204,6 @@ TYPED_TEST( VectorTest, ceil ) } -TYPED_TEST( VectorTest, acos ) -{ - using VectorType = typename TestFixture::VectorType; - using ViewType = typename TestFixture::ViewType; - using RealType = typename VectorType::RealType; - const int size = VECTOR_TEST_SIZE; - - VectorType _u( size ), _v( size ); - ViewType u( _u ), v( _v ); - for( int i = 0; i < size; i++ ) - { - u.setElement( i, ( RealType )( i - size / 2 ) / ( RealType ) size ); - v.setElement( i, TNL::acos( ( RealType )( i - size / 2 ) / ( RealType ) size ) ); - } - - //EXPECT_EQ( acos( u ), v ); - for( int i = 0; i < size; i++ ) - EXPECT_NEAR( acos( u ).getElement( i ), v.getElement( i ), 1.0e-6 ); -} - -TYPED_TEST( VectorTest, asin ) -{ - using VectorType = typename TestFixture::VectorType; - using ViewType = typename TestFixture::ViewType; - using RealType = typename VectorType::RealType; - const int size = VECTOR_TEST_SIZE; - - VectorType _u( size ), _v( size ); - ViewType u( _u ), v( _v ); - for( int i = 0; i < size; i++ ) - { - u.setElement( i, ( RealType ) ( i - size / 2 ) / ( RealType ) size ); - v.setElement( i, TNL::asin( ( RealType )( i - size / 2 ) / ( RealType ) size ) ); - } - - //EXPECT_EQ( asin( u ), v ); - for( int i = 0; i < size; i++ ) - EXPECT_NEAR( asin( u ).getElement( i ), v.getElement( i ), 1.0e-6 ); -} - -TYPED_TEST( VectorTest, atan ) -{ - using VectorType = typename TestFixture::VectorType; - using ViewType = typename TestFixture::ViewType; - using RealType = typename VectorType::RealType; - const int size = VECTOR_TEST_SIZE; - - VectorType _u( size ), _v( size ); - ViewType u( _u ), v( _v ); - for( int i = 0; i < size; i++ ) - { - u.setElement( i, ( RealType ) i - ( RealType ) size / 2 ); - v.setElement( i, TNL::atan( ( RealType ) i - ( RealType ) size / 2 ) ); - } - - //EXPECT_EQ( atan( u ), v ); - for( int i = 0; i < size; i++ ) - EXPECT_NEAR( atan( u ).getElement( i ), v.getElement( i ), 1.0e-6 ); -} - -TYPED_TEST( VectorTest, cosh ) -{ - using VectorType = typename TestFixture::VectorType; - using ViewType = typename TestFixture::ViewType; - using RealType = typename VectorType::RealType; - const int size = VECTOR_TEST_SIZE; - - RealType h = 2.0 / ( RealType ) size; - VectorType _u( size ), _v( size ); - ViewType u( _u ), v( _v ); - for( int i = 0; i < size; i++ ) - { - u.setElement( i, i * h - ( RealType ) 1.0 ); - v.setElement( i, TNL::cosh( i * h - ( RealType ) 1.0 ) ); - } - - // EXPECT_EQ( cosh( u ), v ) does not work here for float, maybe because - // of some fast-math optimization - for( int i = 0; i < size; i++ ) - EXPECT_NEAR( cosh( u ).getElement( i ), v.getElement( i ), 1.0e-6 ); -} - -TYPED_TEST( VectorTest, tanh ) -{ - using VectorType = typename TestFixture::VectorType; - using ViewType = typename TestFixture::ViewType; - using RealType = typename VectorType::RealType; - const int size = VECTOR_TEST_SIZE; - - VectorType _u( size ), _v( size ); - ViewType u( _u ), v( _v ); - for( int i = 0; i < size; i++ ) - { - u.setElement( i, ( RealType ) i - ( RealType ) size / 2 ); - v.setElement( i, TNL::tanh( ( RealType ) i - ( RealType ) size / 2 ) ); - } - - //EXPECT_EQ( tanh( u ), v ); - for( int i = 0; i < size; i++ ) - EXPECT_NEAR( tanh( u ).getElement( i ), v.getElement( i ), 1.0e-6 ); -} - -TYPED_TEST( VectorTest, log ) -{ - using VectorType = typename TestFixture::VectorType; - using ViewType = typename TestFixture::ViewType; - using RealType = typename VectorType::RealType; - const int size = VECTOR_TEST_SIZE; - - VectorType _u( size ), _v( size ); - ViewType u( _u ), v( _v ); - for( int i = 0; i < size; i++ ) - { - u.setElement( i, ( RealType ) i + 1 ); - v.setElement( i, TNL::log( ( RealType ) i + 1 ) ); - } - - //EXPECT_EQ( log( u ), v ); - for( int i = 0; i < size; i++ ) - EXPECT_NEAR( log( u ).getElement( i ), v.getElement( i ), 1.0e-6 ); - -} - -TYPED_TEST( VectorTest, log10 ) -{ - using VectorType = typename TestFixture::VectorType; - using ViewType = typename TestFixture::ViewType; - using RealType = typename VectorType::RealType; - const int size = VECTOR_TEST_SIZE; - - VectorType _u( size ), _v( size ); - ViewType u( _u ), v( _v ); - for( int i = 0; i < size; i++ ) - { - u.setElement( i, ( RealType ) i + 1 ); - v.setElement( i, TNL::log10( ( RealType ) i + 1 ) ); - } - - // EXPECT_EQ( log10( u ), v ) does not work here for float, maybe because - // of some fast-math optimization - for( int i = 0; i < size; i++ ) - EXPECT_NEAR( log10( u ).getElement( i ), v.getElement( i ), 1.0e-6 ); -} - -TYPED_TEST( VectorTest, log2 ) -{ - using VectorType = typename TestFixture::VectorType; - using ViewType = typename TestFixture::ViewType; - using RealType = typename VectorType::RealType; - const int size = VECTOR_TEST_SIZE; - - VectorType _u( size ), _v( size ); - ViewType u( _u ), v( _v ); - for( int i = 0; i < size; i++ ) - { - u.setElement( i, ( RealType ) i + 1 ); - v.setElement( i, TNL::log2( ( RealType ) i + 1 ) ); - } - - //EXPECT_EQ( log2( u ), v ); - for( int i = 0; i < size; i++ ) - EXPECT_NEAR( log2( u ).getElement( i ), v.getElement( i ), 1.0e-6 ); - -} - -TYPED_TEST( VectorTest, exp ) -{ - using VectorType = typename TestFixture::VectorType; - using ViewType = typename TestFixture::ViewType; - using RealType = typename VectorType::RealType; - const int size = VECTOR_TEST_SIZE; - const double h = 10.0 / size; - - VectorType _u( size ), _v( size ); - ViewType u( _u ), v( _v ); - for( int i = 0; i < size; i++ ) - { - const RealType x = -5.0 + i * h; - u.setElement( i, x ); - v.setElement( i, TNL::exp( x ) ); - } - - //EXPECT_EQ( exp( u ), v ); - for( int i = 0; i < size; i++ ) - EXPECT_NEAR( exp( u ).getElement( i ), v.getElement( i ), 1.0e-6 ); -} - -TYPED_TEST( VectorTest, sign ) -{ - using VectorType = typename TestFixture::VectorType; - using ViewType = typename TestFixture::ViewType; - using RealType = typename VectorType::RealType; - const int size = VECTOR_TEST_SIZE; - - VectorType _u( size ), _v( size ); - ViewType u( _u ), v( _v ); - for( int i = 0; i < size; i++ ) - { - u.setElement( i, ( RealType ) i - ( RealType ) size / 2 ); - v.setElement( i, TNL::sign( ( RealType ) i - ( RealType ) size / 2 ) ); - } - - //EXPECT_EQ( sign( u ), v ); - for( int i = 0; i < size; i++ ) - EXPECT_NEAR( sign( u ).getElement( i ), v.getElement( i ), 1.0e-6 ); -} - -// TODO: test prefix sum with custom begin and end parameters - -TEST( VectorSpecialCasesTest, sumOfBoolVector ) -{ - using VectorType = Containers::Vector< bool, Devices::Host >; - using ViewType = VectorView< bool, Devices::Host >; - const float epsilon = 64 * std::numeric_limits< float >::epsilon(); - - VectorType v( 512 ), w( 512 ); - ViewType v_view( v ), w_view( w ); - v.setValue( true ); - w.setValue( false ); - - const int sum = v.sum< int >(); - const int l1norm = v.lpNorm< int >( 1.0 ); - const float l2norm = v.lpNorm< float >( 2.0 ); - const float l3norm = v.lpNorm< float >( 3.0 ); - EXPECT_EQ( sum, 512 ); - EXPECT_EQ( l1norm, 512 ); - EXPECT_NEAR( l2norm, std::sqrt( 512 ), epsilon ); - EXPECT_NEAR( l3norm, std::cbrt( 512 ), epsilon ); - - const int diff_sum = v.differenceSum< int >( w ); - const int diff_l1norm = v.differenceLpNorm< int >( w, 1.0 ); - const float diff_l2norm = v.differenceLpNorm< float >( w, 2.0 ); - const float diff_l3norm = v.differenceLpNorm< float >( w, 3.0 ); - EXPECT_EQ( diff_sum, 512 ); - EXPECT_EQ( diff_l1norm, 512 ); - EXPECT_NEAR( diff_l2norm, std::sqrt( 512 ), epsilon ); - EXPECT_NEAR( diff_l3norm, std::cbrt( 512 ), epsilon ); - - // test views - const int sum_view = v_view.sum< int >(); - const int l1norm_view = v_view.lpNorm< int >( 1.0 ); - const float l2norm_view = v_view.lpNorm< float >( 2.0 ); - const float l3norm_view = v_view.lpNorm< float >( 3.0 ); - EXPECT_EQ( sum_view, 512 ); - EXPECT_EQ( l1norm_view, 512 ); - EXPECT_NEAR( l2norm_view, std::sqrt( 512 ), epsilon ); - EXPECT_NEAR( l3norm_view, std::cbrt( 512 ), epsilon ); - - const int diff_sum_view = v_view.differenceSum< int >( w_view ); - const int diff_l1norm_view = v_view.differenceLpNorm< int >( w_view, 1.0 ); - const float diff_l2norm_view = v_view.differenceLpNorm< float >( w_view, 2.0 ); - const float diff_l3norm_view = v_view.differenceLpNorm< float >( w_view, 3.0 ); - EXPECT_EQ( diff_sum_view, 512 ); - EXPECT_EQ( diff_l1norm_view, 512 ); - EXPECT_NEAR( diff_l2norm_view, std::sqrt( 512 ), epsilon ); - EXPECT_NEAR( diff_l3norm_view, std::cbrt( 512 ), epsilon ); -} - -TEST( VectorSpecialCasesTest, assignmentThroughView ) -{ - using VectorType = Containers::Vector< int, Devices::Host >; - using ViewType = VectorView< int, Devices::Host >; - - static_assert( Algorithms::Details::HasSubscriptOperator< VectorType >::value, "Subscript operator detection by SFINAE does not work for Vector." ); - static_assert( Algorithms::Details::HasSubscriptOperator< ViewType >::value, "Subscript operator detection by SFINAE does not work for VectorView." ); - - VectorType u( 100 ), v( 100 ); - ViewType u_view( u ), v_view( v ); - - u.setValue( 42 ); - v.setValue( 0 ); - v_view = u_view; - EXPECT_EQ( u_view.getData(), u.getData() ); - EXPECT_EQ( v_view.getData(), v.getData() ); - for( int i = 0; i < 100; i++ ) - EXPECT_EQ( v_view[ i ], 42 ); - - u.setValue( 42 ); - v.setValue( 0 ); - v_view = u; - EXPECT_EQ( u_view.getData(), u.getData() ); - EXPECT_EQ( v_view.getData(), v.getData() ); - for( int i = 0; i < 100; i++ ) - EXPECT_EQ( v_view[ i ], 42 ); -} - -TEST( VectorSpecialCasesTest, operationsOnConstView ) -{ - using VectorType = Containers::Vector< int, Devices::Host >; - using ViewType = VectorView< const int, Devices::Host >; - - VectorType u( 100 ), v( 100 ); - ViewType u_view( u ), v_view( v ); - - u.setValue( 1 ); - v.setValue( 1 ); - - EXPECT_EQ( u_view.max(), 1 ); - EXPECT_EQ( u_view.min(), 1 ); - EXPECT_EQ( u_view.absMax(), 1 ); - EXPECT_EQ( u_view.absMin(), 1 ); - EXPECT_EQ( u_view.lpNorm( 1 ), 100 ); - EXPECT_EQ( u_view.differenceMax( v_view ), 0 ); - EXPECT_EQ( u_view.differenceMin( v_view ), 0 ); - EXPECT_EQ( u_view.differenceAbsMax( v_view ), 0 ); - EXPECT_EQ( u_view.differenceAbsMin( v_view ), 0 ); - EXPECT_EQ( u_view.differenceLpNorm( v_view, 1 ), 0 ); - EXPECT_EQ( u_view.differenceSum( v_view ), 0 ); - EXPECT_EQ( u_view.scalarProduct( v_view ), 100 ); -} - -TEST( VectorSpecialCasesTest, initializationOfVectorViewByArrayView ) -{ - using ArrayType = Containers::Array< int, Devices::Host >; - using VectorViewType = VectorView< const int, Devices::Host >; - using ArrayViewType = ArrayView< int, Devices::Host >; - - ArrayType a( 100 ); - a.setValue( 0 ); - ArrayViewType a_view( a ); - - VectorViewType v_view( a_view ); - EXPECT_EQ( v_view.getData(), a_view.getData() ); - EXPECT_EQ( v_view.sum(), 0 ); -} - -TEST( VectorSpecialCasesTest, defaultConstructors ) -{ - using ArrayType = Containers::Array< int, Devices::Host >; - using VectorViewType = VectorView< int, Devices::Host >; - using ArrayViewType = ArrayView< int, Devices::Host >; - - ArrayType a( 100 ); - a.setValue( 0 ); - - ArrayViewType a_view; - a_view.bind( a ); - - VectorViewType v_view; - v_view.bind( a ); - EXPECT_EQ( v_view.getData(), a_view.getData() ); -} - #endif // HAVE_GTEST diff --git a/src/UnitTests/Containers/VectorTest-6.cpp b/src/UnitTests/Containers/VectorTest-6.cpp new file mode 100644 index 000000000..03849218c --- /dev/null +++ b/src/UnitTests/Containers/VectorTest-6.cpp @@ -0,0 +1,11 @@ +/*************************************************************************** + VectorTest-6.cpp - description + ------------------- + begin : Apr 30, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include "VectorTest-6.h" diff --git a/src/UnitTests/Containers/VectorTest-6.cu b/src/UnitTests/Containers/VectorTest-6.cu new file mode 100644 index 000000000..400f69109 --- /dev/null +++ b/src/UnitTests/Containers/VectorTest-6.cu @@ -0,0 +1,11 @@ +/*************************************************************************** + VectorTest-6.cu - description + ------------------- + begin : Apr 30, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include "VectorTest-6.h" diff --git a/src/UnitTests/Containers/VectorTest-6.h b/src/UnitTests/Containers/VectorTest-6.h new file mode 100644 index 000000000..5ab7848c3 --- /dev/null +++ b/src/UnitTests/Containers/VectorTest-6.h @@ -0,0 +1,391 @@ +/*************************************************************************** + VectorTest-6.h - description + ------------------- + begin : Oct 25, 2010 + copyright : (C) 2010 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +// NOTE: Vector = Array + VectorOperations, so we test Vector and VectorOperations at the same time + +#pragma once + +#ifdef HAVE_GTEST +#include + +#include +#include +#include +#include "VectorTestSetup.h" + +#include "gtest/gtest.h" + +using namespace TNL; +using namespace TNL::Containers; +using namespace TNL::Containers::Algorithms; +using namespace TNL::Arithmetics; + +// should be small enough to have fast tests, but larger than minGPUReductionDataSize +// and large enough to require multiple CUDA blocks for reduction +constexpr int VECTOR_TEST_SIZE = 5000; + +TYPED_TEST( VectorTest, acos ) +{ + using VectorType = typename TestFixture::VectorType; + using ViewType = typename TestFixture::ViewType; + using RealType = typename VectorType::RealType; + const int size = VECTOR_TEST_SIZE; + + VectorType _u( size ), _v( size ); + ViewType u( _u ), v( _v ); + for( int i = 0; i < size; i++ ) + { + u.setElement( i, ( RealType )( i - size / 2 ) / ( RealType ) size ); + v.setElement( i, TNL::acos( ( RealType )( i - size / 2 ) / ( RealType ) size ) ); + } + + //EXPECT_EQ( acos( u ), v ); + for( int i = 0; i < size; i++ ) + EXPECT_NEAR( acos( u ).getElement( i ), v.getElement( i ), 1.0e-6 ); +} + +TYPED_TEST( VectorTest, asin ) +{ + using VectorType = typename TestFixture::VectorType; + using ViewType = typename TestFixture::ViewType; + using RealType = typename VectorType::RealType; + const int size = VECTOR_TEST_SIZE; + + VectorType _u( size ), _v( size ); + ViewType u( _u ), v( _v ); + for( int i = 0; i < size; i++ ) + { + u.setElement( i, ( RealType ) ( i - size / 2 ) / ( RealType ) size ); + v.setElement( i, TNL::asin( ( RealType )( i - size / 2 ) / ( RealType ) size ) ); + } + + //EXPECT_EQ( asin( u ), v ); + for( int i = 0; i < size; i++ ) + EXPECT_NEAR( asin( u ).getElement( i ), v.getElement( i ), 1.0e-6 ); +} + +TYPED_TEST( VectorTest, atan ) +{ + using VectorType = typename TestFixture::VectorType; + using ViewType = typename TestFixture::ViewType; + using RealType = typename VectorType::RealType; + const int size = VECTOR_TEST_SIZE; + + VectorType _u( size ), _v( size ); + ViewType u( _u ), v( _v ); + for( int i = 0; i < size; i++ ) + { + u.setElement( i, ( RealType ) i - ( RealType ) size / 2 ); + v.setElement( i, TNL::atan( ( RealType ) i - ( RealType ) size / 2 ) ); + } + + //EXPECT_EQ( atan( u ), v ); + for( int i = 0; i < size; i++ ) + EXPECT_NEAR( atan( u ).getElement( i ), v.getElement( i ), 1.0e-6 ); +} + +TYPED_TEST( VectorTest, cosh ) +{ + using VectorType = typename TestFixture::VectorType; + using ViewType = typename TestFixture::ViewType; + using RealType = typename VectorType::RealType; + const int size = VECTOR_TEST_SIZE; + + RealType h = 2.0 / ( RealType ) size; + VectorType _u( size ), _v( size ); + ViewType u( _u ), v( _v ); + for( int i = 0; i < size; i++ ) + { + u.setElement( i, i * h - ( RealType ) 1.0 ); + v.setElement( i, TNL::cosh( i * h - ( RealType ) 1.0 ) ); + } + + // EXPECT_EQ( cosh( u ), v ) does not work here for float, maybe because + // of some fast-math optimization + for( int i = 0; i < size; i++ ) + EXPECT_NEAR( cosh( u ).getElement( i ), v.getElement( i ), 1.0e-6 ); +} + +TYPED_TEST( VectorTest, tanh ) +{ + using VectorType = typename TestFixture::VectorType; + using ViewType = typename TestFixture::ViewType; + using RealType = typename VectorType::RealType; + const int size = VECTOR_TEST_SIZE; + + VectorType _u( size ), _v( size ); + ViewType u( _u ), v( _v ); + for( int i = 0; i < size; i++ ) + { + u.setElement( i, ( RealType ) i - ( RealType ) size / 2 ); + v.setElement( i, TNL::tanh( ( RealType ) i - ( RealType ) size / 2 ) ); + } + + //EXPECT_EQ( tanh( u ), v ); + for( int i = 0; i < size; i++ ) + EXPECT_NEAR( tanh( u ).getElement( i ), v.getElement( i ), 1.0e-6 ); +} + +TYPED_TEST( VectorTest, log ) +{ + using VectorType = typename TestFixture::VectorType; + using ViewType = typename TestFixture::ViewType; + using RealType = typename VectorType::RealType; + const int size = VECTOR_TEST_SIZE; + + VectorType _u( size ), _v( size ); + ViewType u( _u ), v( _v ); + for( int i = 0; i < size; i++ ) + { + u.setElement( i, ( RealType ) i + 1 ); + v.setElement( i, TNL::log( ( RealType ) i + 1 ) ); + } + + //EXPECT_EQ( log( u ), v ); + for( int i = 0; i < size; i++ ) + EXPECT_NEAR( log( u ).getElement( i ), v.getElement( i ), 1.0e-6 ); + +} + +TYPED_TEST( VectorTest, log10 ) +{ + using VectorType = typename TestFixture::VectorType; + using ViewType = typename TestFixture::ViewType; + using RealType = typename VectorType::RealType; + const int size = VECTOR_TEST_SIZE; + + VectorType _u( size ), _v( size ); + ViewType u( _u ), v( _v ); + for( int i = 0; i < size; i++ ) + { + u.setElement( i, ( RealType ) i + 1 ); + v.setElement( i, TNL::log10( ( RealType ) i + 1 ) ); + } + + // EXPECT_EQ( log10( u ), v ) does not work here for float, maybe because + // of some fast-math optimization + for( int i = 0; i < size; i++ ) + EXPECT_NEAR( log10( u ).getElement( i ), v.getElement( i ), 1.0e-6 ); +} + +TYPED_TEST( VectorTest, log2 ) +{ + using VectorType = typename TestFixture::VectorType; + using ViewType = typename TestFixture::ViewType; + using RealType = typename VectorType::RealType; + const int size = VECTOR_TEST_SIZE; + + VectorType _u( size ), _v( size ); + ViewType u( _u ), v( _v ); + for( int i = 0; i < size; i++ ) + { + u.setElement( i, ( RealType ) i + 1 ); + v.setElement( i, TNL::log2( ( RealType ) i + 1 ) ); + } + + //EXPECT_EQ( log2( u ), v ); + for( int i = 0; i < size; i++ ) + EXPECT_NEAR( log2( u ).getElement( i ), v.getElement( i ), 1.0e-6 ); + +} + +TYPED_TEST( VectorTest, exp ) +{ + using VectorType = typename TestFixture::VectorType; + using ViewType = typename TestFixture::ViewType; + using RealType = typename VectorType::RealType; + const int size = VECTOR_TEST_SIZE; + const double h = 10.0 / size; + + VectorType _u( size ), _v( size ); + ViewType u( _u ), v( _v ); + for( int i = 0; i < size; i++ ) + { + const RealType x = -5.0 + i * h; + u.setElement( i, x ); + v.setElement( i, TNL::exp( x ) ); + } + + //EXPECT_EQ( exp( u ), v ); + for( int i = 0; i < size; i++ ) + EXPECT_NEAR( exp( u ).getElement( i ), v.getElement( i ), 1.0e-6 ); +} + +TYPED_TEST( VectorTest, sign ) +{ + using VectorType = typename TestFixture::VectorType; + using ViewType = typename TestFixture::ViewType; + using RealType = typename VectorType::RealType; + const int size = VECTOR_TEST_SIZE; + + VectorType _u( size ), _v( size ); + ViewType u( _u ), v( _v ); + for( int i = 0; i < size; i++ ) + { + u.setElement( i, ( RealType ) i - ( RealType ) size / 2 ); + v.setElement( i, TNL::sign( ( RealType ) i - ( RealType ) size / 2 ) ); + } + + //EXPECT_EQ( sign( u ), v ); + for( int i = 0; i < size; i++ ) + EXPECT_NEAR( sign( u ).getElement( i ), v.getElement( i ), 1.0e-6 ); +} + +// TODO: test prefix sum with custom begin and end parameters + +TEST( VectorSpecialCasesTest, sumOfBoolVector ) +{ + using VectorType = Containers::Vector< bool, Devices::Host >; + using ViewType = VectorView< bool, Devices::Host >; + const float epsilon = 64 * std::numeric_limits< float >::epsilon(); + + VectorType v( 512 ), w( 512 ); + ViewType v_view( v ), w_view( w ); + v.setValue( true ); + w.setValue( false ); + + const int sum = v.sum< int >(); + const int l1norm = v.lpNorm< int >( 1.0 ); + const float l2norm = v.lpNorm< float >( 2.0 ); + const float l3norm = v.lpNorm< float >( 3.0 ); + EXPECT_EQ( sum, 512 ); + EXPECT_EQ( l1norm, 512 ); + EXPECT_NEAR( l2norm, std::sqrt( 512 ), epsilon ); + EXPECT_NEAR( l3norm, std::cbrt( 512 ), epsilon ); + + const int diff_sum = v.differenceSum< int >( w ); + const int diff_l1norm = v.differenceLpNorm< int >( w, 1.0 ); + const float diff_l2norm = v.differenceLpNorm< float >( w, 2.0 ); + const float diff_l3norm = v.differenceLpNorm< float >( w, 3.0 ); + EXPECT_EQ( diff_sum, 512 ); + EXPECT_EQ( diff_l1norm, 512 ); + EXPECT_NEAR( diff_l2norm, std::sqrt( 512 ), epsilon ); + EXPECT_NEAR( diff_l3norm, std::cbrt( 512 ), epsilon ); + + // test views + const int sum_view = v_view.sum< int >(); + const int l1norm_view = v_view.lpNorm< int >( 1.0 ); + const float l2norm_view = v_view.lpNorm< float >( 2.0 ); + const float l3norm_view = v_view.lpNorm< float >( 3.0 ); + EXPECT_EQ( sum_view, 512 ); + EXPECT_EQ( l1norm_view, 512 ); + EXPECT_NEAR( l2norm_view, std::sqrt( 512 ), epsilon ); + EXPECT_NEAR( l3norm_view, std::cbrt( 512 ), epsilon ); + + const int diff_sum_view = v_view.differenceSum< int >( w_view ); + const int diff_l1norm_view = v_view.differenceLpNorm< int >( w_view, 1.0 ); + const float diff_l2norm_view = v_view.differenceLpNorm< float >( w_view, 2.0 ); + const float diff_l3norm_view = v_view.differenceLpNorm< float >( w_view, 3.0 ); + EXPECT_EQ( diff_sum_view, 512 ); + EXPECT_EQ( diff_l1norm_view, 512 ); + EXPECT_NEAR( diff_l2norm_view, std::sqrt( 512 ), epsilon ); + EXPECT_NEAR( diff_l3norm_view, std::cbrt( 512 ), epsilon ); +} + +TEST( VectorSpecialCasesTest, assignmentThroughView ) +{ + using VectorType = Containers::Vector< int, Devices::Host >; + using ViewType = VectorView< int, Devices::Host >; + + static_assert( Algorithms::Details::HasSubscriptOperator< VectorType >::value, "Subscript operator detection by SFINAE does not work for Vector." ); + static_assert( Algorithms::Details::HasSubscriptOperator< ViewType >::value, "Subscript operator detection by SFINAE does not work for VectorView." ); + + VectorType u( 100 ), v( 100 ); + ViewType u_view( u ), v_view( v ); + + u.setValue( 42 ); + v.setValue( 0 ); + v_view = u_view; + EXPECT_EQ( u_view.getData(), u.getData() ); + EXPECT_EQ( v_view.getData(), v.getData() ); + for( int i = 0; i < 100; i++ ) + EXPECT_EQ( v_view[ i ], 42 ); + + u.setValue( 42 ); + v.setValue( 0 ); + v_view = u; + EXPECT_EQ( u_view.getData(), u.getData() ); + EXPECT_EQ( v_view.getData(), v.getData() ); + for( int i = 0; i < 100; i++ ) + EXPECT_EQ( v_view[ i ], 42 ); +} + +TEST( VectorSpecialCasesTest, operationsOnConstView ) +{ + using VectorType = Containers::Vector< int, Devices::Host >; + using ViewType = VectorView< const int, Devices::Host >; + + VectorType u( 100 ), v( 100 ); + ViewType u_view( u ), v_view( v ); + + u.setValue( 1 ); + v.setValue( 1 ); + + EXPECT_EQ( u_view.max(), 1 ); + EXPECT_EQ( u_view.min(), 1 ); + EXPECT_EQ( u_view.absMax(), 1 ); + EXPECT_EQ( u_view.absMin(), 1 ); + EXPECT_EQ( u_view.lpNorm( 1 ), 100 ); + EXPECT_EQ( u_view.differenceMax( v_view ), 0 ); + EXPECT_EQ( u_view.differenceMin( v_view ), 0 ); + EXPECT_EQ( u_view.differenceAbsMax( v_view ), 0 ); + EXPECT_EQ( u_view.differenceAbsMin( v_view ), 0 ); + EXPECT_EQ( u_view.differenceLpNorm( v_view, 1 ), 0 ); + EXPECT_EQ( u_view.differenceSum( v_view ), 0 ); + EXPECT_EQ( u_view.scalarProduct( v_view ), 100 ); +} + +TEST( VectorSpecialCasesTest, initializationOfVectorViewByArrayView ) +{ + using ArrayType = Containers::Array< int, Devices::Host >; + using VectorViewType = VectorView< const int, Devices::Host >; + using ArrayViewType = ArrayView< int, Devices::Host >; + + ArrayType a( 100 ); + a.setValue( 0 ); + ArrayViewType a_view( a ); + + VectorViewType v_view( a_view ); + EXPECT_EQ( v_view.getData(), a_view.getData() ); + EXPECT_EQ( v_view.sum(), 0 ); +} + +TEST( VectorSpecialCasesTest, defaultConstructors ) +{ + using ArrayType = Containers::Array< int, Devices::Host >; + using VectorViewType = VectorView< int, Devices::Host >; + using ArrayViewType = ArrayView< int, Devices::Host >; + + ArrayType a( 100 ); + a.setValue( 0 ); + + ArrayViewType a_view; + a_view.bind( a ); + + VectorViewType v_view; + v_view.bind( a ); + EXPECT_EQ( v_view.getData(), a_view.getData() ); +} + +#endif // HAVE_GTEST + + +#include "../GtestMissingError.h" +int main( int argc, char* argv[] ) +{ + //Test(); + //return 0; +#ifdef HAVE_GTEST + ::testing::InitGoogleTest( &argc, argv ); + return RUN_ALL_TESTS(); +#else + throw GtestMissingError(); +#endif +} diff --git a/src/UnitTests/Containers/VectorTest-7.cpp b/src/UnitTests/Containers/VectorTest-7.cpp new file mode 100644 index 000000000..8db62ba86 --- /dev/null +++ b/src/UnitTests/Containers/VectorTest-7.cpp @@ -0,0 +1,11 @@ +/*************************************************************************** + VectorTest-7.cpp - description + ------------------- + begin : Apr 30, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include "VectorTest-7.h" diff --git a/src/UnitTests/Containers/VectorTest-7.cu b/src/UnitTests/Containers/VectorTest-7.cu new file mode 100644 index 000000000..303dcd1b0 --- /dev/null +++ b/src/UnitTests/Containers/VectorTest-7.cu @@ -0,0 +1,11 @@ +/*************************************************************************** + VectorTest-7.cu - description + ------------------- + begin : Apr 30, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include "VectorTest-7.h" diff --git a/src/UnitTests/Containers/VectorTest-7.h b/src/UnitTests/Containers/VectorTest-7.h new file mode 100644 index 000000000..02f2a00b2 --- /dev/null +++ b/src/UnitTests/Containers/VectorTest-7.h @@ -0,0 +1,68 @@ +/*************************************************************************** + VectorTest-6.h - description + ------------------- + begin : Oct 25, 2010 + copyright : (C) 2010 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +// NOTE: Vector = Array + VectorOperations, so we test Vector and VectorOperations at the same time + +#pragma once + +#ifdef HAVE_GTEST +#include + +#include +#include +#include +#include "VectorTestSetup.h" + +#include "gtest/gtest.h" + +using namespace TNL; +using namespace TNL::Containers; +using namespace TNL::Containers::Algorithms; +using namespace TNL::Arithmetics; + +// should be small enough to have fast tests, but larger than minGPUReductionDataSize +// and large enough to require multiple CUDA blocks for reduction +constexpr int VECTOR_TEST_SIZE = 500; + +TYPED_TEST( VectorTest, min ) +{ + using VectorType = typename TestFixture::VectorType; + using ViewType = typename TestFixture::ViewType; + using RealType = typename VectorType::RealType; + const int size = VECTOR_TEST_SIZE; + + VectorType _u( size ); + ViewType u( _u ); + for( int i = 0; i < size; i++ ) + { + u.setElement( i, ( RealType )( i - size / 2 ) / ( RealType ) size ); + } + + EXPECT_EQ( min( u ), u.getElement( 0 ) ); + //for( int i = 0; i < size; i++ ) + // EXPECT_NEAR( acos( u ).getElement( i ), v.getElement( i ), 1.0e-6 ); +} + + +#endif // HAVE_GTEST + + +#include "../GtestMissingError.h" +int main( int argc, char* argv[] ) +{ + //Test(); + //return 0; +#ifdef HAVE_GTEST + ::testing::InitGoogleTest( &argc, argv ); + return RUN_ALL_TESTS(); +#else + throw GtestMissingError(); +#endif +} -- GitLab From 458d921794f4fb7f75b9be68a3a6c5e132624a5a Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Fri, 3 May 2019 14:49:43 +0200 Subject: [PATCH 27/93] Vertical operations are almost working. --- .../Expressions/ExpressionTemplates.h | 1286 ++++++++-------- .../Expressions/StaticExpressionTemplates.h | 1312 +++++++++-------- .../Expressions/VerticalOperations.h | 204 ++- src/TNL/Containers/StaticVector.h | 2 - src/TNL/Containers/StaticVectorExpressions.h | 399 +++-- src/TNL/Containers/VectorViewExpressions.h | 410 ++++-- .../DistributedMeshes/DistributedGrid.hpp | 2 +- src/UnitTests/Containers/ArrayViewTest.h | 5 +- src/UnitTests/Containers/StaticVectorTest.cpp | 112 +- src/UnitTests/Containers/VectorTest-4.h | 2 +- src/UnitTests/Containers/VectorTest-7.h | 39 +- 11 files changed, 2060 insertions(+), 1713 deletions(-) diff --git a/src/TNL/Containers/Expressions/ExpressionTemplates.h b/src/TNL/Containers/Expressions/ExpressionTemplates.h index 9ea979766..280b2817e 100644 --- a/src/TNL/Containers/Expressions/ExpressionTemplates.h +++ b/src/TNL/Containers/Expressions/ExpressionTemplates.h @@ -80,8 +80,10 @@ struct BinaryExpressionTemplate< T1, T2, Operation, VectorVariable, VectorVariab } protected: - typename OperandType< T1, DeviceType >::type op1; - typename OperandType< T2, DeviceType >::type op2; + const T1 op1; + const T2 op2; + //typename OperandType< T1, DeviceType >::type op1; + //typename OperandType< T2, DeviceType >::type op2; }; template< typename T1, @@ -262,6 +264,12 @@ struct UnaryExpressionTemplate< T1, Operation, void, VectorVariable > //typename std::add_const< typename OperandType< T1, DeviceType >::type >::type operand; }; + } //namespace Expressions + } //namespace Containers + +//// +// All operations are supposed to be in namespace TNL + //// // Binary expressions addition template< typename L1, @@ -271,34 +279,34 @@ template< typename L1, typename R2, template< typename, typename > class ROperation > __cuda_callable__ -const Expressions::BinaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, - Expressions::Addition > -operator + ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, - const Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) +const Containers::Expressions::BinaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, + Containers::Expressions::Addition > +operator + ( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, + const Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) { - return Expressions::BinaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Addition >( a, b ); + return Containers::Expressions::BinaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, + Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Addition >( a, b ); } template< typename T1, typename T2, template< typename, typename > class Operation > __cuda_callable__ -const Expressions::BinaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< T1, T2, Operation >, - typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType, - Expressions::Addition > -operator + ( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, - const typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) +const Containers::Expressions::BinaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >, + typename Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType, + Containers::Expressions::Addition > +operator + ( const Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, + const typename Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) { - return Expressions::BinaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< T1, T2, Operation >, - typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType, - Expressions::Addition >( a, b ); + return Containers::Expressions::BinaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >, + typename Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType, + Containers::Expressions::Addition >( a, b ); } template< typename L1, @@ -307,17 +315,17 @@ template< typename L1, typename R2, template< typename, typename > class ROperation > __cuda_callable__ -const Expressions::BinaryExpressionTemplate< - Expressions::UnaryExpressionTemplate< L1, LOperation >, - typename Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, - Expressions::Addition > -operator + ( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a, - const typename Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) +const Containers::Expressions::BinaryExpressionTemplate< + Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, + typename Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, + Containers::Expressions::Addition > +operator + ( const Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >& a, + const typename Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) { - return Expressions::BinaryExpressionTemplate< - Expressions::UnaryExpressionTemplate< L1, LOperation >, - typename Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, - Expressions::Addition >( a, b ); + return Containers::Expressions::BinaryExpressionTemplate< + Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, + typename Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, + Containers::Expressions::Addition >( a, b ); } template< typename L1, @@ -326,17 +334,17 @@ template< typename L1, typename R1, template< typename > class ROperation > __cuda_callable__ -const Expressions::BinaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - typename Expressions::UnaryExpressionTemplate< R1, ROperation >, - Expressions::Addition > -operator + ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, - const typename Expressions::UnaryExpressionTemplate< R1,ROperation >& b ) +const Containers::Expressions::BinaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + typename Containers::Expressions::UnaryExpressionTemplate< R1, ROperation >, + Containers::Expressions::Addition > +operator + ( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, + const typename Containers::Expressions::UnaryExpressionTemplate< R1,ROperation >& b ) { - return Expressions::BinaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - typename Expressions::UnaryExpressionTemplate< R1, ROperation >, - Expressions::Addition >( a, b ); + return Containers::Expressions::BinaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + typename Containers::Expressions::UnaryExpressionTemplate< R1, ROperation >, + Containers::Expressions::Addition >( a, b ); } //// @@ -348,34 +356,34 @@ template< typename L1, typename R2, template< typename, typename > class ROperation > __cuda_callable__ -const Expressions::BinaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, - Expressions::Subtraction > -operator - ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, - const Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) +const Containers::Expressions::BinaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, + Containers::Expressions::Subtraction > +operator - ( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, + const Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) { - return Expressions::BinaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Subtraction >( a, b ); + return Containers::Expressions::BinaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, + Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Subtraction >( a, b ); } template< typename T1, typename T2, template< typename, typename > class Operation > __cuda_callable__ -const Expressions::BinaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< T1, T2, Operation >, - typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType, - Expressions::Subtraction > -operator - ( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, - const typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) +const Containers::Expressions::BinaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >, + typename Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType, + Containers::Expressions::Subtraction > +operator - ( const Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, + const typename Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) { - return Expressions::BinaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< T1, T2, Operation >, - typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType, - Expressions::Subtraction >( a, b ); + return Containers::Expressions::BinaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >, + typename Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType, + Containers::Expressions::Subtraction >( a, b ); } template< typename L1, @@ -384,17 +392,17 @@ template< typename L1, typename R2, template< typename, typename > class ROperation > __cuda_callable__ -const Expressions::BinaryExpressionTemplate< - Expressions::UnaryExpressionTemplate< L1, LOperation >, - typename Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, - Expressions::Subtraction > -operator - ( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a, - const typename Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) +const Containers::Expressions::BinaryExpressionTemplate< + Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, + typename Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, + Containers::Expressions::Subtraction > +operator - ( const Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >& a, + const typename Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) { - return Expressions::BinaryExpressionTemplate< - Expressions::UnaryExpressionTemplate< L1, LOperation >, - typename Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, - Expressions::Subtraction >( a, b ); + return Containers::Expressions::BinaryExpressionTemplate< + Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, + typename Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, + Containers::Expressions::Subtraction >( a, b ); } template< typename L1, @@ -403,17 +411,17 @@ template< typename L1, typename R1, template< typename > class ROperation > __cuda_callable__ -const Expressions::BinaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - typename Expressions::UnaryExpressionTemplate< R1, ROperation >, - Expressions::Subtraction > -operator - ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, - const typename Expressions::UnaryExpressionTemplate< R1,ROperation >& b ) +const Containers::Expressions::BinaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + typename Containers::Expressions::UnaryExpressionTemplate< R1, ROperation >, + Containers::Expressions::Subtraction > +operator - ( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, + const typename Containers::Expressions::UnaryExpressionTemplate< R1,ROperation >& b ) { - return Expressions::BinaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - typename Expressions::UnaryExpressionTemplate< R1, ROperation >, - Expressions::Subtraction >( a, b ); + return Containers::Expressions::BinaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + typename Containers::Expressions::UnaryExpressionTemplate< R1, ROperation >, + Containers::Expressions::Subtraction >( a, b ); } //// @@ -425,34 +433,34 @@ template< typename L1, typename R2, template< typename, typename > class ROperation > __cuda_callable__ -const Expressions::BinaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, - Expressions::Multiplication > -operator * ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, - const Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) +const Containers::Expressions::BinaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, + Containers::Expressions::Multiplication > +operator * ( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, + const Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) { - return Expressions::BinaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Multiplication >( a, b ); + return Containers::Expressions::BinaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, + Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Multiplication >( a, b ); } template< typename T1, typename T2, template< typename, typename > class Operation > __cuda_callable__ -const Expressions::BinaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< T1, T2, Operation >, - typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType, - Expressions::Multiplication > -operator * ( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, - const typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) +const Containers::Expressions::BinaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >, + typename Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType, + Containers::Expressions::Multiplication > +operator * ( const Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, + const typename Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) { - return Expressions::BinaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< T1, T2, Operation >, - typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType, - Expressions::Multiplication >( a, b ); + return Containers::Expressions::BinaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >, + typename Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType, + Containers::Expressions::Multiplication >( a, b ); } template< typename L1, @@ -461,17 +469,17 @@ template< typename L1, typename R2, template< typename, typename > class ROperation > __cuda_callable__ -const Expressions::BinaryExpressionTemplate< - Expressions::UnaryExpressionTemplate< L1, LOperation >, - typename Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, - Expressions::Multiplication > -operator * ( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a, - const typename Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) +const Containers::Expressions::BinaryExpressionTemplate< + Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, + typename Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, + Containers::Expressions::Multiplication > +operator * ( const Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >& a, + const typename Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) { - return Expressions::BinaryExpressionTemplate< - Expressions::UnaryExpressionTemplate< L1, LOperation >, - typename Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, - Expressions::Multiplication >( a, b ); + return Containers::Expressions::BinaryExpressionTemplate< + Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, + typename Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, + Containers::Expressions::Multiplication >( a, b ); } template< typename L1, @@ -480,17 +488,17 @@ template< typename L1, typename R1, template< typename > class ROperation > __cuda_callable__ -const Expressions::BinaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - typename Expressions::UnaryExpressionTemplate< R1, ROperation >, - Expressions::Multiplication > -operator * ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, - const typename Expressions::UnaryExpressionTemplate< R1, ROperation >& b ) +const Containers::Expressions::BinaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + typename Containers::Expressions::UnaryExpressionTemplate< R1, ROperation >, + Containers::Expressions::Multiplication > +operator * ( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, + const typename Containers::Expressions::UnaryExpressionTemplate< R1, ROperation >& b ) { - return Expressions::BinaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - typename Expressions::UnaryExpressionTemplate< R1, ROperation >, - Expressions::Multiplication >( a, b ); + return Containers::Expressions::BinaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + typename Containers::Expressions::UnaryExpressionTemplate< R1, ROperation >, + Containers::Expressions::Multiplication >( a, b ); } //// @@ -502,34 +510,34 @@ template< typename L1, typename R2, template< typename, typename > class ROperation > __cuda_callable__ -const Expressions::BinaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, - Expressions::Division > -operator / ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, - const Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) +const Containers::Expressions::BinaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, + Containers::Expressions::Division > +operator / ( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, + const Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) { - return Expressions::BinaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Division >( a, b ); + return Containers::Expressions::BinaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, + Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Division >( a, b ); } template< typename T1, typename T2, template< typename, typename > class Operation > __cuda_callable__ -const Expressions::BinaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< T1, T2, Operation >, - typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType, - Expressions::Division > -operator / ( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, - const typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) +const Containers::Expressions::BinaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >, + typename Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType, + Containers::Expressions::Division > +operator / ( const Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, + const typename Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) { - return Expressions::BinaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< T1, T2, Operation >, - typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType, - Expressions::Division >( a, b ); + return Containers::Expressions::BinaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >, + typename Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType, + Containers::Expressions::Division >( a, b ); } template< typename L1, @@ -538,17 +546,17 @@ template< typename L1, typename R2, template< typename, typename > class ROperation > __cuda_callable__ -const Expressions::BinaryExpressionTemplate< - Expressions::UnaryExpressionTemplate< L1, LOperation >, - typename Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, - Expressions::Division > -operator / ( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a, - const typename Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) +const Containers::Expressions::BinaryExpressionTemplate< + Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, + typename Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, + Containers::Expressions::Division > +operator / ( const Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >& a, + const typename Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) { - return Expressions::BinaryExpressionTemplate< - Expressions::UnaryExpressionTemplate< L1, LOperation >, - typename Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, - Expressions::Division >( a, b ); + return Containers::Expressions::BinaryExpressionTemplate< + Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, + typename Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, + Containers::Expressions::Division >( a, b ); } template< typename L1, @@ -557,17 +565,17 @@ template< typename L1, typename R1, template< typename > class ROperation > __cuda_callable__ -const Expressions::BinaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - typename Expressions::UnaryExpressionTemplate< R1, ROperation >, - Expressions::Division > -operator / ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, - const typename Expressions::UnaryExpressionTemplate< R1,ROperation >& b ) +const Containers::Expressions::BinaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + typename Containers::Expressions::UnaryExpressionTemplate< R1, ROperation >, + Containers::Expressions::Division > +operator / ( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, + const typename Containers::Expressions::UnaryExpressionTemplate< R1,ROperation >& b ) { - return Expressions::BinaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - typename Expressions::UnaryExpressionTemplate< R1, ROperation >, - Expressions::Division >( a, b ); + return Containers::Expressions::BinaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + typename Containers::Expressions::UnaryExpressionTemplate< R1, ROperation >, + Containers::Expressions::Division >( a, b ); } //// @@ -579,34 +587,34 @@ template< typename L1, typename R2, template< typename, typename > class ROperation > __cuda_callable__ -const Expressions::BinaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, - Expressions::Min > -min ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, - const Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) +const Containers::Expressions::BinaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, + Containers::Expressions::Min > +min ( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, + const Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) { - return Expressions::BinaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Min >( a, b ); + return Containers::Expressions::BinaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, + Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Min >( a, b ); } template< typename T1, typename T2, template< typename, typename > class Operation > __cuda_callable__ -const Expressions::BinaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< T1, T2, Operation >, - typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType, - Expressions::Min > -min( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, - const typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) +const Containers::Expressions::BinaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >, + typename Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType, + Containers::Expressions::Min > +min( const Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, + const typename Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) { - return Expressions::BinaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< T1, T2, Operation >, - typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType, - Expressions::Min >( a, b ); + return Containers::Expressions::BinaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >, + typename Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType, + Containers::Expressions::Min >( a, b ); } template< typename L1, @@ -615,17 +623,17 @@ template< typename L1, typename R2, template< typename, typename > class ROperation > __cuda_callable__ -const Expressions::BinaryExpressionTemplate< - Expressions::UnaryExpressionTemplate< L1, LOperation >, - typename Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, - Expressions::Min > -min( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a, - const typename Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) +const Containers::Expressions::BinaryExpressionTemplate< + Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, + typename Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, + Containers::Expressions::Min > +min( const Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >& a, + const typename Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) { - return Expressions::BinaryExpressionTemplate< - Expressions::UnaryExpressionTemplate< L1, LOperation >, - typename Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, - Expressions::Min >( a, b ); + return Containers::Expressions::BinaryExpressionTemplate< + Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, + typename Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, + Containers::Expressions::Min >( a, b ); } template< typename L1, @@ -634,17 +642,17 @@ template< typename L1, typename R1, template< typename > class ROperation > __cuda_callable__ -const Expressions::BinaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - typename Expressions::UnaryExpressionTemplate< R1, ROperation >, - Expressions::Min > -min( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, - const typename Expressions::UnaryExpressionTemplate< R1,ROperation >& b ) +const Containers::Expressions::BinaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + typename Containers::Expressions::UnaryExpressionTemplate< R1, ROperation >, + Containers::Expressions::Min > +min( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, + const typename Containers::Expressions::UnaryExpressionTemplate< R1,ROperation >& b ) { - return Expressions::BinaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - typename Expressions::UnaryExpressionTemplate< R1, ROperation >, - Expressions::Min >( a, b ); + return Containers::Expressions::BinaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + typename Containers::Expressions::UnaryExpressionTemplate< R1, ROperation >, + Containers::Expressions::Min >( a, b ); } //// @@ -656,34 +664,34 @@ template< typename L1, typename R2, template< typename, typename > class ROperation > __cuda_callable__ -const Expressions::BinaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, - Expressions::Max > -max( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, - const Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) +const Containers::Expressions::BinaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, + Containers::Expressions::Max > +max( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, + const Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) { - return Expressions::BinaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Max >( a, b ); + return Containers::Expressions::BinaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, + Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Max >( a, b ); } template< typename T1, typename T2, template< typename, typename > class Operation > __cuda_callable__ -const Expressions::BinaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< T1, T2, Operation >, - typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType, - Expressions::Max > -max( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, - const typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) +const Containers::Expressions::BinaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >, + typename Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType, + Containers::Expressions::Max > +max( const Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, + const typename Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) { - return Expressions::BinaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< T1, T2, Operation >, - typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType, - Expressions::Max >( a, b ); + return Containers::Expressions::BinaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >, + typename Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType, + Containers::Expressions::Max >( a, b ); } template< typename L1, @@ -692,17 +700,17 @@ template< typename L1, typename R2, template< typename, typename > class ROperation > __cuda_callable__ -const Expressions::BinaryExpressionTemplate< - Expressions::UnaryExpressionTemplate< L1, LOperation >, - typename Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, - Expressions::Max > -max( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a, - const typename Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) +const Containers::Expressions::BinaryExpressionTemplate< + Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, + typename Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, + Containers::Expressions::Max > +max( const Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >& a, + const typename Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) { - return Expressions::BinaryExpressionTemplate< - Expressions::UnaryExpressionTemplate< L1, LOperation >, - typename Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, - Expressions::Max >( a, b ); + return Containers::Expressions::BinaryExpressionTemplate< + Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, + typename Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, + Containers::Expressions::Max >( a, b ); } template< typename L1, @@ -711,17 +719,17 @@ template< typename L1, typename R1, template< typename > class ROperation > __cuda_callable__ -const Expressions::BinaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - typename Expressions::UnaryExpressionTemplate< R1, ROperation >, - Expressions::Max > -max( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, - const typename Expressions::UnaryExpressionTemplate< R1,ROperation >& b ) +const Containers::Expressions::BinaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + typename Containers::Expressions::UnaryExpressionTemplate< R1, ROperation >, + Containers::Expressions::Max > +max( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, + const typename Containers::Expressions::UnaryExpressionTemplate< R1,ROperation >& b ) { - return Expressions::BinaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - typename Expressions::UnaryExpressionTemplate< R1, ROperation >, - Expressions::Max >( a, b ); + return Containers::Expressions::BinaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + typename Containers::Expressions::UnaryExpressionTemplate< R1, ROperation >, + Containers::Expressions::Max >( a, b ); } //// @@ -734,10 +742,10 @@ template< typename L1, template< typename, typename > class ROperation > __cuda_callable__ bool -operator == ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, - const Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) +operator == ( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, + const Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) { - return Expressions::ComparisonEQ( a, b ); + return Containers::Expressions::ComparisonEQ( a, b ); } template< typename T1, @@ -745,10 +753,10 @@ template< typename T1, template< typename, typename > class Operation > __cuda_callable__ bool -operator == ( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, - const typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) +operator == ( const Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, + const typename Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) { - return Expressions::ComparisonEQ( a, b ); + return Containers::Expressions::ComparisonEQ( a, b ); } template< typename L1, @@ -758,10 +766,10 @@ template< typename L1, template< typename, typename > class ROperation > __cuda_callable__ bool -operator == ( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a, - const typename Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) +operator == ( const Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >& a, + const typename Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) { - return Expressions::ComparisonEQ( a, b ); + return Containers::Expressions::ComparisonEQ( a, b ); } template< typename L1, @@ -771,10 +779,10 @@ template< typename L1, template< typename > class ROperation > __cuda_callable__ bool -operator == ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, - const typename Expressions::UnaryExpressionTemplate< R1,ROperation >& b ) +operator == ( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, + const typename Containers::Expressions::UnaryExpressionTemplate< R1,ROperation >& b ) { - return Expressions::ComparisonEQ( a, b ); + return Containers::Expressions::ComparisonEQ( a, b ); } //// @@ -787,10 +795,10 @@ template< typename L1, template< typename, typename > class ROperation > __cuda_callable__ bool -operator != ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, - const Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) +operator != ( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, + const Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) { - return Expressions::ComparisonNE( a, b ); + return Containers::Expressions::ComparisonNE( a, b ); } template< typename T1, @@ -798,10 +806,10 @@ template< typename T1, template< typename, typename > class Operation > __cuda_callable__ bool -operator != ( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, - const typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) +operator != ( const Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, + const typename Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) { - return Expressions::ComparisonNE( a, b ); + return Containers::Expressions::ComparisonNE( a, b ); } template< typename L1, @@ -811,10 +819,10 @@ template< typename L1, template< typename, typename > class ROperation > __cuda_callable__ bool -operator != ( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a, - const typename Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) +operator != ( const Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >& a, + const typename Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) { - return Expressions::ComparisonNE( a, b ); + return Containers::Expressions::ComparisonNE( a, b ); } template< typename L1, @@ -824,10 +832,10 @@ template< typename L1, template< typename > class ROperation > __cuda_callable__ bool -operator != ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, - const typename Expressions::UnaryExpressionTemplate< R1, ROperation >& b ) +operator != ( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, + const typename Containers::Expressions::UnaryExpressionTemplate< R1, ROperation >& b ) { - return Expressions::ComparisonNE( a, b ); + return Containers::Expressions::ComparisonNE( a, b ); } //// @@ -840,10 +848,10 @@ template< typename L1, template< typename, typename > class ROperation > __cuda_callable__ bool -operator < ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, - const Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) +operator < ( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, + const Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) { - return Expressions::ComparisonLT( a, b ); + return Containers::Expressions::ComparisonLT( a, b ); } template< typename T1, @@ -851,10 +859,10 @@ template< typename T1, template< typename, typename > class Operation > __cuda_callable__ bool -operator < ( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, - const typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) +operator < ( const Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, + const typename Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) { - return Expressions::ComparisonLT( a, b ); + return Containers::Expressions::ComparisonLT( a, b ); } template< typename L1, @@ -864,10 +872,10 @@ template< typename L1, template< typename, typename > class ROperation > __cuda_callable__ bool -operator < ( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a, - const typename Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) +operator < ( const Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >& a, + const typename Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) { - return Expressions::ComparisonLT( a, b ); + return Containers::Expressions::ComparisonLT( a, b ); } template< typename L1, @@ -877,10 +885,10 @@ template< typename L1, template< typename > class ROperation > __cuda_callable__ bool -operator < ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, - const typename Expressions::UnaryExpressionTemplate< R1, ROperation >& b ) +operator < ( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, + const typename Containers::Expressions::UnaryExpressionTemplate< R1, ROperation >& b ) { - return Expressions::ComparisonLT( a, b ); + return Containers::Expressions::ComparisonLT( a, b ); } //// @@ -893,10 +901,10 @@ template< typename L1, template< typename, typename > class ROperation > __cuda_callable__ bool -operator <= ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, - const Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) +operator <= ( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, + const Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) { - return Expressions::ComparisonLE( a, b ); + return Containers::Expressions::ComparisonLE( a, b ); } template< typename T1, @@ -904,10 +912,10 @@ template< typename T1, template< typename, typename > class Operation > __cuda_callable__ bool -operator <= ( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, - const typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) +operator <= ( const Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, + const typename Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) { - return Expressions::ComparisonLE( a, b ); + return Containers::Expressions::ComparisonLE( a, b ); } template< typename L1, @@ -917,10 +925,10 @@ template< typename L1, template< typename, typename > class ROperation > __cuda_callable__ bool -operator <= ( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a, - const typename Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) +operator <= ( const Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >& a, + const typename Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) { - return Expressions::ComparisonLE( a, b ); + return Containers::Expressions::ComparisonLE( a, b ); } template< typename L1, @@ -930,10 +938,10 @@ template< typename L1, template< typename > class ROperation > __cuda_callable__ bool -operator <= ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, - const typename Expressions::UnaryExpressionTemplate< R1, ROperation >& b ) +operator <= ( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, + const typename Containers::Expressions::UnaryExpressionTemplate< R1, ROperation >& b ) { - return Expressions::ComparisonLE( a, b ); + return Containers::Expressions::ComparisonLE( a, b ); } //// @@ -946,10 +954,10 @@ template< typename L1, template< typename, typename > class ROperation > __cuda_callable__ bool -operator > ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, - const Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) +operator > ( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, + const Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) { - return Expressions::ComparisonGT( a, b ); + return Containers::Expressions::ComparisonGT( a, b ); } template< typename T1, @@ -957,10 +965,10 @@ template< typename T1, template< typename, typename > class Operation > __cuda_callable__ bool -operator > ( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, - const typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) +operator > ( const Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, + const typename Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) { - return Expressions::ComparisonGT( a, b ); + return Containers::Expressions::ComparisonGT( a, b ); } template< typename L1, @@ -970,10 +978,10 @@ template< typename L1, template< typename, typename > class ROperation > __cuda_callable__ bool -operator > ( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a, - const typename Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) +operator > ( const Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >& a, + const typename Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) { - return Expressions::ComparisonGT( a, b ); + return Containers::Expressions::ComparisonGT( a, b ); } template< typename L1, @@ -983,10 +991,10 @@ template< typename L1, template< typename > class ROperation > __cuda_callable__ bool -operator > ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, - const typename Expressions::UnaryExpressionTemplate< R1, ROperation >& b ) +operator > ( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, + const typename Containers::Expressions::UnaryExpressionTemplate< R1, ROperation >& b ) { - return Expressions::ComparisonGT( a, b ); + return Containers::Expressions::ComparisonGT( a, b ); } //// @@ -999,10 +1007,10 @@ template< typename L1, template< typename, typename > class ROperation > __cuda_callable__ bool -operator >= ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, - const Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) +operator >= ( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, + const Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) { - return Expressions::ComparisonGE( a, b ); + return Containers::Expressions::ComparisonGE( a, b ); } template< typename T1, @@ -1010,10 +1018,10 @@ template< typename T1, template< typename, typename > class Operation > __cuda_callable__ bool -operator >= ( const Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, - const typename Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) +operator >= ( const Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, + const typename Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) { - return Expressions::ComparisonGE( a, b ); + return Containers::Expressions::ComparisonGE( a, b ); } template< typename L1, @@ -1023,10 +1031,10 @@ template< typename L1, template< typename, typename > class ROperation > __cuda_callable__ bool -operator >= ( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a, - const typename Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) +operator >= ( const Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >& a, + const typename Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) { - return Expressions::ComparisonGE( a, b ); + return Containers::Expressions::ComparisonGE( a, b ); } template< typename L1, @@ -1036,10 +1044,10 @@ template< typename L1, template< typename > class ROperation > __cuda_callable__ bool -operator >= ( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, - const typename Expressions::UnaryExpressionTemplate< R1, ROperation >& b ) +operator >= ( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, + const typename Containers::Expressions::UnaryExpressionTemplate< R1, ROperation >& b ) { - return Expressions::ComparisonGE( a, b ); + return Containers::Expressions::ComparisonGE( a, b ); } //// @@ -1052,27 +1060,27 @@ template< typename L1, typename L2, template< typename, typename > class LOperation > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Minus > -operator -( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) +const Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Minus > +operator -( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) { - return Expressions::UnaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Minus >( a ); + return Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Minus >( a ); } template< typename L1, template< typename > class LOperation > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< - Expressions::UnaryExpressionTemplate< L1, LOperation >, - Expressions::Abs > -operator -( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a ) +const Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Abs > +operator -( const Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >& a ) { - return Expressions::UnaryExpressionTemplate< - Expressions::UnaryExpressionTemplate< L1, LOperation >, - Expressions::Minus >( a ); + return Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Minus >( a ); } //// @@ -1081,27 +1089,27 @@ template< typename L1, typename L2, template< typename, typename > class LOperation > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Abs > -abs( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) +const Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Abs > +abs( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) { - return Expressions::UnaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Abs >( a ); + return Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Abs >( a ); } template< typename L1, template< typename > class LOperation > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< - Expressions::UnaryExpressionTemplate< L1, LOperation >, - Expressions::Abs > -abs( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a ) +const Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Abs > +abs( const Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >& a ) { - return Expressions::UnaryExpressionTemplate< - Expressions::UnaryExpressionTemplate< L1, LOperation >, - Expressions::Abs >( a ); + return Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Abs >( a ); } //// @@ -1110,27 +1118,27 @@ template< typename L1, typename L2, template< typename, typename > class LOperation > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Sin > -sin( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) +const Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Sin > +sin( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) { - return Expressions::UnaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Sin >( a ); + return Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Sin >( a ); } template< typename L1, template< typename > class LOperation > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< - Expressions::UnaryExpressionTemplate< L1, LOperation >, - Expressions::Sin > -sin( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a ) +const Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Sin > +sin( const Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >& a ) { - return Expressions::UnaryExpressionTemplate< - Expressions::UnaryExpressionTemplate< L1, LOperation >, - Expressions::Sin >( a ); + return Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Sin >( a ); } //// @@ -1139,27 +1147,27 @@ template< typename L1, typename L2, template< typename, typename > class LOperation > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Cos > -cos( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) +const Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Cos > +cos( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) { - return Expressions::UnaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Cos >( a ); + return Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Cos >( a ); } template< typename L1, template< typename > class LOperation > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< - Expressions::UnaryExpressionTemplate< L1, LOperation >, - Expressions::Cos > -cos( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a ) +const Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Cos > +cos( const Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >& a ) { - return Expressions::UnaryExpressionTemplate< - Expressions::UnaryExpressionTemplate< L1, LOperation >, - Expressions::Cos >( a ); + return Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Cos >( a ); } //// @@ -1168,27 +1176,27 @@ template< typename L1, typename L2, template< typename, typename > class LOperation > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Tan > -tan( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) +const Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Tan > +tan( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) { - return Expressions::UnaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Tan >( a ); + return Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Tan >( a ); } template< typename L1, template< typename > class LOperation > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< - Expressions::UnaryExpressionTemplate< L1, LOperation >, - Expressions::Tan > -tan( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a ) +const Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Tan > +tan( const Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >& a ) { - return Expressions::UnaryExpressionTemplate< - Expressions::UnaryExpressionTemplate< L1, LOperation >, - Expressions::Tan >( a ); + return Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Tan >( a ); } //// @@ -1197,27 +1205,27 @@ template< typename L1, typename L2, template< typename, typename > class LOperation > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Sqrt > -sqrt( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) +const Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Sqrt > +sqrt( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) { - return Expressions::UnaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Sqrt >( a ); + return Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Sqrt >( a ); } template< typename L1, template< typename > class LOperation > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< - Expressions::UnaryExpressionTemplate< L1, LOperation >, - Expressions::Sqrt > -sqrt( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a ) +const Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Sqrt > +sqrt( const Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >& a ) { - return Expressions::UnaryExpressionTemplate< - Expressions::UnaryExpressionTemplate< L1, LOperation >, - Expressions::Sqrt >( a ); + return Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Sqrt >( a ); } //// @@ -1226,27 +1234,27 @@ template< typename L1, typename L2, template< typename, typename > class LOperation > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Cbrt > -cbrt( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) +const Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Cbrt > +cbrt( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) { - return Expressions::UnaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Cbrt >( a ); + return Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Cbrt >( a ); } template< typename L1, template< typename > class LOperation > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< - Expressions::UnaryExpressionTemplate< L1, LOperation >, - Expressions::Cbrt > -cbrt( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a ) +const Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Cbrt > +cbrt( const Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >& a ) { - return Expressions::UnaryExpressionTemplate< - Expressions::UnaryExpressionTemplate< L1, LOperation >, - Expressions::Cbrt >( a ); + return Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Cbrt >( a ); } //// @@ -1256,14 +1264,14 @@ template< typename L1, template< typename, typename > class LOperation, typename Real > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Pow > -pow( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, const Real& exp ) +const Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Pow > +pow( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, const Real& exp ) { - auto e = Expressions::UnaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Pow >( a ); + auto e = Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Pow >( a ); e.parameter.set( exp ); return e; } @@ -1272,14 +1280,14 @@ template< typename L1, template< typename > class LOperation, typename Real > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< - Expressions::UnaryExpressionTemplate< L1, LOperation >, - Expressions::Pow > -pow( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a, const Real& exp ) +const Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Pow > +pow( const Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >& a, const Real& exp ) { - auto e = Expressions::UnaryExpressionTemplate< - Expressions::UnaryExpressionTemplate< L1, LOperation >, - Expressions::Pow >( a ); + auto e = Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Pow >( a ); e.parameter.set( exp ); return e; } @@ -1290,27 +1298,27 @@ template< typename L1, typename L2, template< typename, typename > class LOperation > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Sin > -floor( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) +const Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Sin > +floor( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) { - return Expressions::UnaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Floor >( a ); + return Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Floor >( a ); } template< typename L1, template< typename > class LOperation > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< - Expressions::UnaryExpressionTemplate< L1, LOperation >, - Expressions::Floor > -floor( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a ) +const Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Floor > +floor( const Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >& a ) { - return Expressions::UnaryExpressionTemplate< - Expressions::UnaryExpressionTemplate< L1, LOperation >, - Expressions::Floor >( a ); + return Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Floor >( a ); } //// @@ -1319,27 +1327,27 @@ template< typename L1, typename L2, template< typename, typename > class LOperation > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Ceil > -ceil( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) +const Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Ceil > +ceil( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) { - return Expressions::UnaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Ceil >( a ); + return Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Ceil >( a ); } template< typename L1, template< typename > class LOperation > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< - Expressions::UnaryExpressionTemplate< L1, LOperation >, - Expressions::Ceil > -sin( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a ) +const Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Ceil > +sin( const Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >& a ) { - return Expressions::UnaryExpressionTemplate< - Expressions::UnaryExpressionTemplate< L1, LOperation >, - Expressions::Ceil >( a ); + return Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Ceil >( a ); } //// @@ -1348,27 +1356,27 @@ template< typename L1, typename L2, template< typename, typename > class LOperation > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Asin > -asin( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) +const Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Asin > +asin( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) { - return Expressions::UnaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Asin >( a ); + return Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Asin >( a ); } template< typename L1, template< typename > class LOperation > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< - Expressions::UnaryExpressionTemplate< L1, LOperation >, - Expressions::Asin > -asin( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a ) +const Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Asin > +asin( const Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >& a ) { - return Expressions::UnaryExpressionTemplate< - Expressions::UnaryExpressionTemplate< L1, LOperation >, - Expressions::Asin >( a ); + return Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Asin >( a ); } //// @@ -1377,27 +1385,27 @@ template< typename L1, typename L2, template< typename, typename > class LOperation > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Acos > -cos( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) +const Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Acos > +cos( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) { - return Expressions::UnaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Acos >( a ); + return Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Acos >( a ); } template< typename L1, template< typename > class LOperation > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< - Expressions::UnaryExpressionTemplate< L1, LOperation >, - Expressions::Acos > -acos( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a ) +const Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Acos > +acos( const Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >& a ) { - return Expressions::UnaryExpressionTemplate< - Expressions::UnaryExpressionTemplate< L1, LOperation >, - Expressions::Cos >( a ); + return Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Cos >( a ); } //// @@ -1406,27 +1414,27 @@ template< typename L1, typename L2, template< typename, typename > class LOperation > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Atan > -tan( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) +const Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Atan > +tan( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) { - return Expressions::UnaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Atan >( a ); + return Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Atan >( a ); } template< typename L1, template< typename > class LOperation > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< - Expressions::UnaryExpressionTemplate< L1, LOperation >, - Expressions::Atan > -atan( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a ) +const Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Atan > +atan( const Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >& a ) { - return Expressions::UnaryExpressionTemplate< - Expressions::UnaryExpressionTemplate< L1, LOperation >, - Expressions::Atan >( a ); + return Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Atan >( a ); } //// @@ -1435,27 +1443,27 @@ template< typename L1, typename L2, template< typename, typename > class LOperation > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Sinh > -sinh( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) +const Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Sinh > +sinh( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) { - return Expressions::UnaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Sinh >( a ); + return Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Sinh >( a ); } template< typename L1, template< typename > class LOperation > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< - Expressions::UnaryExpressionTemplate< L1, LOperation >, - Expressions::Sinh > -sinh( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a ) +const Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Sinh > +sinh( const Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >& a ) { - return Expressions::UnaryExpressionTemplate< - Expressions::UnaryExpressionTemplate< L1, LOperation >, - Expressions::Sinh >( a ); + return Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Sinh >( a ); } //// @@ -1464,27 +1472,27 @@ template< typename L1, typename L2, template< typename, typename > class LOperation > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Cosh > -cosh( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) +const Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Cosh > +cosh( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) { - return Expressions::UnaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Cosh >( a ); + return Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Cosh >( a ); } template< typename L1, template< typename > class LOperation > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< - Expressions::UnaryExpressionTemplate< L1, LOperation >, - Expressions::Cosh > -cosh( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a ) +const Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Cosh > +cosh( const Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >& a ) { - return Expressions::UnaryExpressionTemplate< - Expressions::UnaryExpressionTemplate< L1, LOperation >, - Expressions::Cosh >( a ); + return Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Cosh >( a ); } //// @@ -1493,27 +1501,27 @@ template< typename L1, typename L2, template< typename, typename > class LOperation > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Tanh > -cosh( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) +const Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Tanh > +cosh( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) { - return Expressions::UnaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Tanh >( a ); + return Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Tanh >( a ); } template< typename L1, template< typename > class LOperation > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< - Expressions::UnaryExpressionTemplate< L1, LOperation >, - Expressions::Tanh > -tanh( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a ) +const Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Tanh > +tanh( const Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >& a ) { - return Expressions::UnaryExpressionTemplate< - Expressions::UnaryExpressionTemplate< L1, LOperation >, - Expressions::Tanh >( a ); + return Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Tanh >( a ); } //// @@ -1522,27 +1530,27 @@ template< typename L1, typename L2, template< typename, typename > class LOperation > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Log > -log( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) +const Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Log > +log( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) { - return Expressions::UnaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Log >( a ); + return Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Log >( a ); } template< typename L1, template< typename > class LOperation > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< - Expressions::UnaryExpressionTemplate< L1, LOperation >, - Expressions::Log > -log( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a ) +const Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Log > +log( const Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >& a ) { - return Expressions::UnaryExpressionTemplate< - Expressions::UnaryExpressionTemplate< L1, LOperation >, - Expressions::Log >( a ); + return Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Log >( a ); } //// @@ -1551,27 +1559,27 @@ template< typename L1, typename L2, template< typename, typename > class LOperation > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Log10 > -log10( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) +const Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Log10 > +log10( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) { - return Expressions::UnaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Log10 >( a ); + return Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Log10 >( a ); } template< typename L1, template< typename > class LOperation > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< - Expressions::UnaryExpressionTemplate< L1, LOperation >, - Expressions::Log10 > -log10( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a ) +const Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Log10 > +log10( const Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >& a ) { - return Expressions::UnaryExpressionTemplate< - Expressions::UnaryExpressionTemplate< L1, LOperation >, - Expressions::Log10 >( a ); + return Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Log10 >( a ); } //// @@ -1580,27 +1588,27 @@ template< typename L1, typename L2, template< typename, typename > class LOperation > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Log2 > -log2( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) +const Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Log2 > +log2( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) { - return Expressions::UnaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Log2 >( a ); + return Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Log2 >( a ); } template< typename L1, template< typename > class LOperation > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< - Expressions::UnaryExpressionTemplate< L1, LOperation >, - Expressions::Log2 > -log2( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a ) +const Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Log2 > +log2( const Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >& a ) { - return Expressions::UnaryExpressionTemplate< - Expressions::UnaryExpressionTemplate< L1, LOperation >, - Expressions::Log2 >( a ); + return Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Log2 >( a ); } //// @@ -1609,27 +1617,27 @@ template< typename L1, typename L2, template< typename, typename > class LOperation > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Exp > -exp( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) +const Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Exp > +exp( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) { - return Expressions::UnaryExpressionTemplate< - Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Exp >( a ); + return Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Exp >( a ); } template< typename L1, template< typename > class LOperation > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< - Expressions::UnaryExpressionTemplate< L1, LOperation >, - Expressions::Exp > -exp( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a ) +const Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Exp > +exp( const Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >& a ) { - return Expressions::UnaryExpressionTemplate< - Expressions::UnaryExpressionTemplate< L1, LOperation >, - Expressions::Exp >( a ); + return Containers::Expressions::UnaryExpressionTemplate< + Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Exp >( a ); } //// @@ -1637,9 +1645,8 @@ exp( const Expressions::UnaryExpressionTemplate< L1, LOperation >& a ) template< typename L1, typename L2, template< typename, typename > class LOperation > -__cuda_callable__ -typename Expressions::BinaryExpressionTemplate< L1, L2, LOperation >::RealType -min( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) +typename Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >::RealType +min( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) { return ExpressionMin( a ); } @@ -1647,9 +1654,8 @@ min( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) template< typename L1, template< typename > class LOperation, typename Parameter > -__cuda_callable__ -typename Expressions::UnaryExpressionTemplate< L1, LOperation, Parameter >::RealType -min( const Expressions::UnaryExpressionTemplate< L1, LOperation, Parameter >& a ) +typename Containers::Expressions::UnaryExpressionTemplate< L1, LOperation, Parameter >::RealType +min( const Containers::Expressions::UnaryExpressionTemplate< L1, LOperation, Parameter >& a ) { return ExpressionMin( a ); } @@ -1657,9 +1663,8 @@ min( const Expressions::UnaryExpressionTemplate< L1, LOperation, Parameter >& a template< typename L1, typename L2, template< typename, typename > class LOperation > -__cuda_callable__ -typename Expressions::BinaryExpressionTemplate< L1, L2, LOperation >::RealType -max( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) +typename Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >::RealType +max( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) { return ExpressionMax( a ); } @@ -1667,9 +1672,8 @@ max( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) template< typename L1, template< typename > class LOperation, typename Parameter > -__cuda_callable__ -typename Expressions::UnaryExpressionTemplate< L1, LOperation, Parameter >::RealType -max( const Expressions::UnaryExpressionTemplate< L1, LOperation, Parameter >& a ) +typename Containers::Expressions::UnaryExpressionTemplate< L1, LOperation, Parameter >::RealType +max( const Containers::Expressions::UnaryExpressionTemplate< L1, LOperation, Parameter >& a ) { return ExpressionMax( a ); } @@ -1677,9 +1681,8 @@ max( const Expressions::UnaryExpressionTemplate< L1, LOperation, Parameter >& a template< typename L1, typename L2, template< typename, typename > class LOperation > -__cuda_callable__ -typename Expressions::BinaryExpressionTemplate< L1, L2, LOperation >::RealType -sum( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) +typename Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >::RealType +sum( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) { return ExpressionSum( a ); } @@ -1687,19 +1690,37 @@ sum( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) template< typename L1, template< typename > class LOperation, typename Parameter > -__cuda_callable__ -typename Expressions::UnaryExpressionTemplate< L1, LOperation, Parameter >::RealType -sum( const Expressions::UnaryExpressionTemplate< L1, LOperation, Parameter >& a ) +typename Containers::Expressions::UnaryExpressionTemplate< L1, LOperation, Parameter >::RealType +sum( const Containers::Expressions::UnaryExpressionTemplate< L1, LOperation, Parameter >& a ) { return ExpressionSum( a ); } +template< typename L1, + typename L2, + template< typename, typename > class LOperation, + typename Real > +typename Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >::RealType +lpNorm( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, const Real& p ) +{ + return ExpressionLpNorm( a, p ); +} + +template< typename L1, + template< typename > class LOperation, + typename Parameter, + typename Real > +typename Containers::Expressions::UnaryExpressionTemplate< L1, LOperation, Parameter >::RealType +lpNorm( const Containers::Expressions::UnaryExpressionTemplate< L1, LOperation, Parameter >& a, const Real& p ) +{ + return ExpressionLpNorm( a, p ); +} + template< typename L1, typename L2, template< typename, typename > class LOperation > -__cuda_callable__ -typename Expressions::BinaryExpressionTemplate< L1, L2, LOperation >::RealType -product( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) +typename Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >::RealType +product( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) { return ExpressionProduct( a ); } @@ -1707,9 +1728,8 @@ product( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) template< typename L1, template< typename > class LOperation, typename Parameter > -__cuda_callable__ -typename Expressions::UnaryExpressionTemplate< L1, LOperation, Parameter >::RealType -product( const Expressions::UnaryExpressionTemplate< L1, LOperation, Parameter >& a ) +typename Containers::Expressions::UnaryExpressionTemplate< L1, LOperation, Parameter >::RealType +product( const Containers::Expressions::UnaryExpressionTemplate< L1, LOperation, Parameter >& a ) { return ExpressionProduct( a ); } @@ -1717,9 +1737,8 @@ product( const Expressions::UnaryExpressionTemplate< L1, LOperation, Parameter > template< typename L1, typename L2, template< typename, typename > class LOperation > -__cuda_callable__ -typename Expressions::BinaryExpressionTemplate< L1, L2, LOperation >::RealType -logicalOr( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) +bool +logicalOr( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) { return ExpressionLogicalOr( a ); } @@ -1727,19 +1746,17 @@ logicalOr( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a template< typename L1, template< typename > class LOperation, typename Parameter > -__cuda_callable__ -typename Expressions::UnaryExpressionTemplate< L1, LOperation, Parameter >::RealType -logicalOr( const Expressions::UnaryExpressionTemplate< L1, LOperation, Parameter >& a ) +bool +logicalAnd( const Containers::Expressions::UnaryExpressionTemplate< L1, LOperation, Parameter >& a ) { - return ExpressionLogicalOr( a ); + return ExpressionLogicalAnd( a ); } template< typename L1, typename L2, template< typename, typename > class LOperation > -__cuda_callable__ -typename Expressions::BinaryExpressionTemplate< L1, L2, LOperation >::RealType -binaryOr( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) +typename Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >::RealType +binaryOr( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) { return ExpressionBinaryOr( a ); } @@ -1747,11 +1764,10 @@ binaryOr( const Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a ) template< typename L1, template< typename > class LOperation, typename Parameter > -__cuda_callable__ -typename Expressions::UnaryExpressionTemplate< L1, LOperation, Parameter >::RealType -binaryOr( const Expressions::UnaryExpressionTemplate< L1, LOperation, Parameter >& a ) +typename Containers::Expressions::UnaryExpressionTemplate< L1, LOperation, Parameter >::RealType +binaryAnd( const Containers::Expressions::UnaryExpressionTemplate< L1, LOperation, Parameter >& a ) { - return ExpressionBinaryOr( a ); + return ExpressionBinaryAnd( a ); } //// @@ -1759,7 +1775,7 @@ binaryOr( const Expressions::UnaryExpressionTemplate< L1, LOperation, Parameter template< typename T1, typename T2, template< typename, typename > class Operation > -std::ostream& operator << ( std::ostream& str, const BinaryExpressionTemplate< T1, T2, Operation >& expression ) +std::ostream& operator << ( std::ostream& str, const Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >& expression ) { str << "[ "; for( int i = 0; i < expression.getSize() - 1; i++ ) @@ -1771,7 +1787,7 @@ std::ostream& operator << ( std::ostream& str, const BinaryExpressionTemplate< T template< typename T, template< typename > class Operation, typename Parameter > -std::ostream& operator << ( std::ostream& str, const UnaryExpressionTemplate< T, Operation, Parameter >& expression ) +std::ostream& operator << ( std::ostream& str, const Containers::Expressions::UnaryExpressionTemplate< T, Operation, Parameter >& expression ) { str << "[ "; for( int i = 0; i < expression.getSize() - 1; i++ ) @@ -1779,6 +1795,4 @@ std::ostream& operator << ( std::ostream& str, const UnaryExpressionTemplate< T, str << expression.getElement( expression.getSize() - 1 ) << " ]"; return str; } - } //namespace Expressions - } //namespace Containers } // namespace TNL diff --git a/src/TNL/Containers/Expressions/StaticExpressionTemplates.h b/src/TNL/Containers/Expressions/StaticExpressionTemplates.h index 7292c733b..a1d6ae639 100644 --- a/src/TNL/Containers/Expressions/StaticExpressionTemplates.h +++ b/src/TNL/Containers/Expressions/StaticExpressionTemplates.h @@ -279,6 +279,37 @@ struct StaticUnaryExpressionTemplate< T1, Operation, void, VectorVariable > const T1& operand; }; +//// +// Output stream +template< typename T1, + typename T2, + template< typename, typename > class Operation > +std::ostream& operator << ( std::ostream& str, const Containers::Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >& expression ) +{ + str << "[ "; + for( int i = 0; i < expression.getSize() - 1; i++ ) + str << expression[ i ] << ", "; + str << expression[ expression.getSize() - 1 ] << " ]"; + return str; +} + +template< typename T, + template< typename > class Operation, + typename Parameter > +std::ostream& operator << ( std::ostream& str, const Containers::Expressions::StaticUnaryExpressionTemplate< T, Operation, Parameter >& expression ) +{ + str << "[ "; + for( int i = 0; i < expression.getSize() - 1; i++ ) + str << expression[ i ] << ", "; + str << expression[ expression.getSize() - 1 ] << " ]"; + return str; +} + } //namespace Expressions + } //namespace Containers + +//// +// All operations are supposed to be in namespace TNL + //// // Binary expressions addition template< typename L1, @@ -288,34 +319,34 @@ template< typename L1, typename R2, template< typename, typename > class ROperation > __cuda_callable__ -const Expressions::StaticBinaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, - Expressions::Addition > -operator + ( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, - const Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) +const Containers::Expressions::StaticBinaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, + Containers::Expressions::Addition > +operator + ( const Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, + const Containers::Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) { - return Expressions::StaticBinaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, - Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Addition >( a, b ); + return Containers::Expressions::StaticBinaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, + Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Addition >( a, b ); } template< typename T1, typename T2, template< typename, typename > class Operation > __cuda_callable__ -const Expressions::StaticBinaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >, - typename Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType, - Expressions::Addition > -operator + ( const Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >& a, - const typename Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) +const Containers::Expressions::StaticBinaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >, + typename Containers::Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType, + Containers::Expressions::Addition > +operator + ( const Containers::Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >& a, + const typename Containers::Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) { - return Expressions::StaticBinaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >, - typename Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType, - Expressions::Addition >( a, b ); + return Containers::Expressions::StaticBinaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >, + typename Containers::Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType, + Containers::Expressions::Addition >( a, b ); } template< typename L1, @@ -324,17 +355,17 @@ template< typename L1, typename R2, template< typename, typename > class ROperation > __cuda_callable__ -const Expressions::StaticBinaryExpressionTemplate< - Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, - typename Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, - Expressions::Addition > -operator + ( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a, - const typename Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) +const Containers::Expressions::StaticBinaryExpressionTemplate< + Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + typename Containers::Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, + Containers::Expressions::Addition > +operator + ( const Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a, + const typename Containers::Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) { - return Expressions::StaticBinaryExpressionTemplate< - Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, - typename Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, - Expressions::Addition >( a, b ); + return Containers::Expressions::StaticBinaryExpressionTemplate< + Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + typename Containers::Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, + Containers::Expressions::Addition >( a, b ); } template< typename L1, @@ -343,17 +374,17 @@ template< typename L1, typename R1, template< typename > class ROperation > __cuda_callable__ -const Expressions::StaticBinaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, - typename Expressions::StaticUnaryExpressionTemplate< R1, ROperation >, - Expressions::Addition > -operator + ( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, - const typename Expressions::StaticUnaryExpressionTemplate< R1,ROperation >& b ) +const Containers::Expressions::StaticBinaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + typename Containers::Expressions::StaticUnaryExpressionTemplate< R1, ROperation >, + Containers::Expressions::Addition > +operator + ( const Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, + const typename Containers::Expressions::StaticUnaryExpressionTemplate< R1,ROperation >& b ) { - return Expressions::StaticBinaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, - typename Expressions::StaticUnaryExpressionTemplate< R1, ROperation >, - Expressions::Addition >( a, b ); + return Containers::Expressions::StaticBinaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + typename Containers::Expressions::StaticUnaryExpressionTemplate< R1, ROperation >, + Containers::Expressions::Addition >( a, b ); } //// @@ -365,34 +396,34 @@ template< typename L1, typename R2, template< typename, typename > class ROperation > __cuda_callable__ -const Expressions::StaticBinaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, - Expressions::Subtraction > -operator - ( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, - const Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) +const Containers::Expressions::StaticBinaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, + Containers::Expressions::Subtraction > +operator - ( const Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, + const Containers::Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) { - return Expressions::StaticBinaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, - Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Subtraction >( a, b ); + return Containers::Expressions::StaticBinaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, + Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Subtraction >( a, b ); } template< typename T1, typename T2, template< typename, typename > class Operation > __cuda_callable__ -const Expressions::StaticBinaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >, - typename Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType, - Expressions::Subtraction > -operator - ( const Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >& a, - const typename Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) +const Containers::Expressions::StaticBinaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >, + typename Containers::Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType, + Containers::Expressions::Subtraction > +operator - ( const Containers::Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >& a, + const typename Containers::Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) { - return Expressions::StaticBinaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >, - typename Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType, - Expressions::Subtraction >( a, b ); + return Containers::Expressions::StaticBinaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >, + typename Containers::Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType, + Containers::Expressions::Subtraction >( a, b ); } template< typename L1, @@ -401,17 +432,17 @@ template< typename L1, typename R2, template< typename, typename > class ROperation > __cuda_callable__ -const Expressions::StaticBinaryExpressionTemplate< - Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, - typename Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, - Expressions::Subtraction > -operator - ( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a, - const typename Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) +const Containers::Expressions::StaticBinaryExpressionTemplate< + Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + typename Containers::Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, + Containers::Expressions::Subtraction > +operator - ( const Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a, + const typename Containers::Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) { - return Expressions::StaticBinaryExpressionTemplate< - Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, - typename Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, - Expressions::Subtraction >( a, b ); + return Containers::Expressions::StaticBinaryExpressionTemplate< + Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + typename Containers::Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, + Containers::Expressions::Subtraction >( a, b ); } template< typename L1, @@ -420,17 +451,17 @@ template< typename L1, typename R1, template< typename > class ROperation > __cuda_callable__ -const Expressions::StaticBinaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, - typename Expressions::StaticUnaryExpressionTemplate< R1, ROperation >, - Expressions::Subtraction > -operator - ( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, - const typename Expressions::StaticUnaryExpressionTemplate< R1,ROperation >& b ) +const Containers::Expressions::StaticBinaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + typename Containers::Expressions::StaticUnaryExpressionTemplate< R1, ROperation >, + Containers::Expressions::Subtraction > +operator - ( const Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, + const typename Containers::Expressions::StaticUnaryExpressionTemplate< R1,ROperation >& b ) { - return Expressions::StaticBinaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, - typename Expressions::StaticUnaryExpressionTemplate< R1, ROperation >, - Expressions::Subtraction >( a, b ); + return Containers::Expressions::StaticBinaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + typename Containers::Expressions::StaticUnaryExpressionTemplate< R1, ROperation >, + Containers::Expressions::Subtraction >( a, b ); } //// @@ -442,34 +473,34 @@ template< typename L1, typename R2, template< typename, typename > class ROperation > __cuda_callable__ -const Expressions::StaticBinaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, - Expressions::Multiplication > -operator * ( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, - const Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) +const Containers::Expressions::StaticBinaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, + Containers::Expressions::Multiplication > +operator * ( const Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, + const Containers::Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) { - return Expressions::StaticBinaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, - Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Multiplication >( a, b ); + return Containers::Expressions::StaticBinaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, + Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Multiplication >( a, b ); } template< typename T1, typename T2, template< typename, typename > class Operation > __cuda_callable__ -const Expressions::StaticBinaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >, - typename Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType, - Expressions::Multiplication > -operator * ( const Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >& a, - const typename Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) +const Containers::Expressions::StaticBinaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >, + typename Containers::Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType, + Containers::Expressions::Multiplication > +operator * ( const Containers::Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >& a, + const typename Containers::Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) { - return Expressions::StaticBinaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >, - typename Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType, - Expressions::Multiplication >( a, b ); + return Containers::Expressions::StaticBinaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >, + typename Containers::Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType, + Containers::Expressions::Multiplication >( a, b ); } template< typename L1, @@ -478,17 +509,17 @@ template< typename L1, typename R2, template< typename, typename > class ROperation > __cuda_callable__ -const Expressions::StaticBinaryExpressionTemplate< - Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, - typename Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, - Expressions::Multiplication > -operator * ( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a, - const typename Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) +const Containers::Expressions::StaticBinaryExpressionTemplate< + Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + typename Containers::Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, + Containers::Expressions::Multiplication > +operator * ( const Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a, + const typename Containers::Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) { - return Expressions::StaticBinaryExpressionTemplate< - Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, - typename Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, - Expressions::Multiplication >( a, b ); + return Containers::Expressions::StaticBinaryExpressionTemplate< + Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + typename Containers::Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, + Containers::Expressions::Multiplication >( a, b ); } template< typename L1, @@ -497,17 +528,17 @@ template< typename L1, typename R1, template< typename > class ROperation > __cuda_callable__ -const Expressions::StaticBinaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, - typename Expressions::StaticUnaryExpressionTemplate< R1, ROperation >, - Expressions::Multiplication > -operator * ( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, - const typename Expressions::StaticUnaryExpressionTemplate< R1, ROperation >& b ) +const Containers::Expressions::StaticBinaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + typename Containers::Expressions::StaticUnaryExpressionTemplate< R1, ROperation >, + Containers::Expressions::Multiplication > +operator * ( const Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, + const typename Containers::Expressions::StaticUnaryExpressionTemplate< R1, ROperation >& b ) { - return Expressions::StaticBinaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, - typename Expressions::StaticUnaryExpressionTemplate< R1, ROperation >, - Expressions::Multiplication >( a, b ); + return Containers::Expressions::StaticBinaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + typename Containers::Expressions::StaticUnaryExpressionTemplate< R1, ROperation >, + Containers::Expressions::Multiplication >( a, b ); } //// @@ -519,34 +550,34 @@ template< typename L1, typename R2, template< typename, typename > class ROperation > __cuda_callable__ -const Expressions::StaticBinaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, - Expressions::Division > -operator / ( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, - const Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) +const Containers::Expressions::StaticBinaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, + Containers::Expressions::Division > +operator / ( const Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, + const Containers::Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) { - return Expressions::StaticBinaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, - Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Division >( a, b ); + return Containers::Expressions::StaticBinaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, + Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Division >( a, b ); } template< typename T1, typename T2, template< typename, typename > class Operation > __cuda_callable__ -const Expressions::StaticBinaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >, - typename Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType, - Expressions::Division > -operator / ( const Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >& a, - const typename Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) +const Containers::Expressions::StaticBinaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >, + typename Containers::Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType, + Containers::Expressions::Division > +operator / ( const Containers::Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >& a, + const typename Containers::Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) { - return Expressions::StaticBinaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >, - typename Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType, - Expressions::Division >( a, b ); + return Containers::Expressions::StaticBinaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >, + typename Containers::Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType, + Containers::Expressions::Division >( a, b ); } template< typename L1, @@ -555,17 +586,17 @@ template< typename L1, typename R2, template< typename, typename > class ROperation > __cuda_callable__ -const Expressions::StaticBinaryExpressionTemplate< - Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, - typename Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, - Expressions::Division > -operator / ( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a, - const typename Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) +const Containers::Expressions::StaticBinaryExpressionTemplate< + Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + typename Containers::Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, + Containers::Expressions::Division > +operator / ( const Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a, + const typename Containers::Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) { - return Expressions::StaticBinaryExpressionTemplate< - Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, - typename Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, - Expressions::Division >( a, b ); + return Containers::Expressions::StaticBinaryExpressionTemplate< + Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + typename Containers::Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, + Containers::Expressions::Division >( a, b ); } template< typename L1, @@ -574,17 +605,17 @@ template< typename L1, typename R1, template< typename > class ROperation > __cuda_callable__ -const Expressions::StaticBinaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, - typename Expressions::StaticUnaryExpressionTemplate< R1, ROperation >, - Expressions::Division > -operator / ( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, - const typename Expressions::StaticUnaryExpressionTemplate< R1,ROperation >& b ) +const Containers::Expressions::StaticBinaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + typename Containers::Expressions::StaticUnaryExpressionTemplate< R1, ROperation >, + Containers::Expressions::Division > +operator / ( const Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, + const typename Containers::Expressions::StaticUnaryExpressionTemplate< R1,ROperation >& b ) { - return Expressions::StaticBinaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, - typename Expressions::StaticUnaryExpressionTemplate< R1, ROperation >, - Expressions::Division >( a, b ); + return Containers::Expressions::StaticBinaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + typename Containers::Expressions::StaticUnaryExpressionTemplate< R1, ROperation >, + Containers::Expressions::Division >( a, b ); } //// @@ -596,34 +627,34 @@ template< typename L1, typename R2, template< typename, typename > class ROperation > __cuda_callable__ -const Expressions::StaticBinaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, - Expressions::Min > -min ( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, - const Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) +const Containers::Expressions::StaticBinaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, + Containers::Expressions::Min > +min ( const Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, + const Containers::Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) { - return Expressions::StaticBinaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, - Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Min >( a, b ); + return Containers::Expressions::StaticBinaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, + Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Min >( a, b ); } template< typename T1, typename T2, template< typename, typename > class Operation > __cuda_callable__ -const Expressions::StaticBinaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >, - typename Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType, - Expressions::Min > -min( const Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >& a, - const typename Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) +const Containers::Expressions::StaticBinaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >, + typename Containers::Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType, + Containers::Expressions::Min > +min( const Containers::Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >& a, + const typename Containers::Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) { - return Expressions::StaticBinaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >, - typename Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType, - Expressions::Min >( a, b ); + return Containers::Expressions::StaticBinaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >, + typename Containers::Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType, + Containers::Expressions::Min >( a, b ); } template< typename L1, @@ -632,17 +663,17 @@ template< typename L1, typename R2, template< typename, typename > class ROperation > __cuda_callable__ -const Expressions::StaticBinaryExpressionTemplate< - Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, - typename Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, - Expressions::Min > -min( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a, - const typename Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) +const Containers::Expressions::StaticBinaryExpressionTemplate< + Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + typename Containers::Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, + Containers::Expressions::Min > +min( const Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a, + const typename Containers::Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) { - return Expressions::StaticBinaryExpressionTemplate< - Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, - typename Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, - Expressions::Min >( a, b ); + return Containers::Expressions::StaticBinaryExpressionTemplate< + Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + typename Containers::Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, + Containers::Expressions::Min >( a, b ); } template< typename L1, @@ -651,17 +682,17 @@ template< typename L1, typename R1, template< typename > class ROperation > __cuda_callable__ -const Expressions::StaticBinaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, - typename Expressions::StaticUnaryExpressionTemplate< R1, ROperation >, - Expressions::Min > -min( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, - const typename Expressions::StaticUnaryExpressionTemplate< R1,ROperation >& b ) +const Containers::Expressions::StaticBinaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + typename Containers::Expressions::StaticUnaryExpressionTemplate< R1, ROperation >, + Containers::Expressions::Min > +min( const Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, + const typename Containers::Expressions::StaticUnaryExpressionTemplate< R1,ROperation >& b ) { - return Expressions::StaticBinaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, - typename Expressions::StaticUnaryExpressionTemplate< R1, ROperation >, - Expressions::Min >( a, b ); + return Containers::Expressions::StaticBinaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + typename Containers::Expressions::StaticUnaryExpressionTemplate< R1, ROperation >, + Containers::Expressions::Min >( a, b ); } //// @@ -673,34 +704,34 @@ template< typename L1, typename R2, template< typename, typename > class ROperation > __cuda_callable__ -const Expressions::StaticBinaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, - Expressions::Max > -max( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, - const Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) +const Containers::Expressions::StaticBinaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, + Containers::Expressions::Max > +max( const Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, + const Containers::Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) { - return Expressions::StaticBinaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, - Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Max >( a, b ); + return Containers::Expressions::StaticBinaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, + Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Max >( a, b ); } template< typename T1, typename T2, template< typename, typename > class Operation > __cuda_callable__ -const Expressions::StaticBinaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >, - typename Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType, - Expressions::Max > -max( const Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >& a, - const typename Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) +const Containers::Expressions::StaticBinaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >, + typename Containers::Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType, + Containers::Expressions::Max > +max( const Containers::Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >& a, + const typename Containers::Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) { - return Expressions::StaticBinaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >, - typename Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType, - Expressions::Max >( a, b ); + return Containers::Expressions::StaticBinaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >, + typename Containers::Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType, + Containers::Expressions::Max >( a, b ); } template< typename L1, @@ -709,17 +740,17 @@ template< typename L1, typename R2, template< typename, typename > class ROperation > __cuda_callable__ -const Expressions::StaticBinaryExpressionTemplate< - Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, - typename Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, - Expressions::Max > -max( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a, - const typename Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) +const Containers::Expressions::StaticBinaryExpressionTemplate< + Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + typename Containers::Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, + Containers::Expressions::Max > +max( const Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a, + const typename Containers::Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) { - return Expressions::StaticBinaryExpressionTemplate< - Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, - typename Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, - Expressions::Max >( a, b ); + return Containers::Expressions::StaticBinaryExpressionTemplate< + Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + typename Containers::Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >, + Containers::Expressions::Max >( a, b ); } template< typename L1, @@ -728,17 +759,17 @@ template< typename L1, typename R1, template< typename > class ROperation > __cuda_callable__ -const Expressions::StaticBinaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, - typename Expressions::StaticUnaryExpressionTemplate< R1, ROperation >, - Expressions::Max > -max( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, - const typename Expressions::StaticUnaryExpressionTemplate< R1,ROperation >& b ) +const Containers::Expressions::StaticBinaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + typename Containers::Expressions::StaticUnaryExpressionTemplate< R1, ROperation >, + Containers::Expressions::Max > +max( const Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, + const typename Containers::Expressions::StaticUnaryExpressionTemplate< R1,ROperation >& b ) { - return Expressions::StaticBinaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, - typename Expressions::StaticUnaryExpressionTemplate< R1, ROperation >, - Expressions::Max >( a, b ); + return Containers::Expressions::StaticBinaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + typename Containers::Expressions::StaticUnaryExpressionTemplate< R1, ROperation >, + Containers::Expressions::Max >( a, b ); } //// @@ -751,10 +782,10 @@ template< typename L1, template< typename, typename > class ROperation > __cuda_callable__ bool -operator == ( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, - const Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) +operator == ( const Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, + const Containers::Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) { - return Expressions::StaticComparisonEQ( a, b ); + return Containers::Expressions::StaticComparisonEQ( a, b ); } template< typename T1, @@ -762,10 +793,10 @@ template< typename T1, template< typename, typename > class Operation > __cuda_callable__ bool -operator == ( const Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >& a, - const typename Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) +operator == ( const Containers::Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >& a, + const typename Containers::Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) { - return Expressions::StaticComparisonEQ( a, b ); + return Containers::Expressions::StaticComparisonEQ( a, b ); } template< typename L1, @@ -775,10 +806,10 @@ template< typename L1, template< typename, typename > class ROperation > __cuda_callable__ bool -operator == ( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a, - const typename Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) +operator == ( const Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a, + const typename Containers::Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) { - return Expressions::StaticComparisonEQ( a, b ); + return Containers::Expressions::StaticComparisonEQ( a, b ); } template< typename L1, @@ -788,10 +819,10 @@ template< typename L1, template< typename > class ROperation > __cuda_callable__ bool -operator == ( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, - const typename Expressions::StaticUnaryExpressionTemplate< R1,ROperation >& b ) +operator == ( const Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, + const typename Containers::Expressions::StaticUnaryExpressionTemplate< R1,ROperation >& b ) { - return Expressions::StaticComparisonEQ( a, b ); + return Containers::Expressions::StaticComparisonEQ( a, b ); } //// @@ -804,10 +835,10 @@ template< typename L1, template< typename, typename > class ROperation > __cuda_callable__ bool -operator != ( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, - const Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) +operator != ( const Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, + const Containers::Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) { - return Expressions::StaticComparisonNE( a, b ); + return Containers::Expressions::StaticComparisonNE( a, b ); } template< typename T1, @@ -815,10 +846,10 @@ template< typename T1, template< typename, typename > class Operation > __cuda_callable__ bool -operator != ( const Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >& a, - const typename Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) +operator != ( const Containers::Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >& a, + const typename Containers::Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) { - return Expressions::StaticComparisonNE( a, b ); + return Containers::Expressions::StaticComparisonNE( a, b ); } template< typename L1, @@ -828,10 +859,10 @@ template< typename L1, template< typename, typename > class ROperation > __cuda_callable__ bool -operator != ( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a, - const typename Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) +operator != ( const Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a, + const typename Containers::Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) { - return Expressions::StaticComparisonNE( a, b ); + return Containers::Expressions::StaticComparisonNE( a, b ); } template< typename L1, @@ -841,10 +872,10 @@ template< typename L1, template< typename > class ROperation > __cuda_callable__ bool -operator != ( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, - const typename Expressions::StaticUnaryExpressionTemplate< R1, ROperation >& b ) +operator != ( const Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, + const typename Containers::Expressions::StaticUnaryExpressionTemplate< R1, ROperation >& b ) { - return Expressions::StaticComparisonNE( a, b ); + return Containers::Expressions::StaticComparisonNE( a, b ); } //// @@ -857,10 +888,10 @@ template< typename L1, template< typename, typename > class ROperation > __cuda_callable__ bool -operator < ( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, - const Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) +operator < ( const Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, + const Containers::Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) { - return Expressions::StaticComparisonLT( a, b ); + return Containers::Expressions::StaticComparisonLT( a, b ); } template< typename T1, @@ -868,10 +899,10 @@ template< typename T1, template< typename, typename > class Operation > __cuda_callable__ bool -operator < ( const Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >& a, - const typename Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) +operator < ( const Containers::Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >& a, + const typename Containers::Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) { - return Expressions::StaticComparisonLT( a, b ); + return Containers::Expressions::StaticComparisonLT( a, b ); } template< typename L1, @@ -881,10 +912,10 @@ template< typename L1, template< typename, typename > class ROperation > __cuda_callable__ bool -operator < ( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a, - const typename Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) +operator < ( const Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a, + const typename Containers::Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) { - return Expressions::StaticComparisonLT( a, b ); + return Containers::Expressions::StaticComparisonLT( a, b ); } template< typename L1, @@ -894,10 +925,10 @@ template< typename L1, template< typename > class ROperation > __cuda_callable__ bool -operator < ( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, - const typename Expressions::StaticUnaryExpressionTemplate< R1, ROperation >& b ) +operator < ( const Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, + const typename Containers::Expressions::StaticUnaryExpressionTemplate< R1, ROperation >& b ) { - return Expressions::StaticComparisonLT( a, b ); + return Containers::Expressions::StaticComparisonLT( a, b ); } //// @@ -910,10 +941,10 @@ template< typename L1, template< typename, typename > class ROperation > __cuda_callable__ bool -operator <= ( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, - const Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) +operator <= ( const Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, + const Containers::Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) { - return Expressions::StaticComparisonLE( a, b ); + return Containers::Expressions::StaticComparisonLE( a, b ); } template< typename T1, @@ -921,10 +952,10 @@ template< typename T1, template< typename, typename > class Operation > __cuda_callable__ bool -operator <= ( const Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >& a, - const typename Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) +operator <= ( const Containers::Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >& a, + const typename Containers::Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) { - return Expressions::StaticComparisonLE( a, b ); + return Containers::Expressions::StaticComparisonLE( a, b ); } template< typename L1, @@ -934,10 +965,10 @@ template< typename L1, template< typename, typename > class ROperation > __cuda_callable__ bool -operator <= ( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a, - const typename Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) +operator <= ( const Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a, + const typename Containers::Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) { - return Expressions::StaticComparisonLE( a, b ); + return Containers::Expressions::StaticComparisonLE( a, b ); } template< typename L1, @@ -947,10 +978,10 @@ template< typename L1, template< typename > class ROperation > __cuda_callable__ bool -operator <= ( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, - const typename Expressions::StaticUnaryExpressionTemplate< R1, ROperation >& b ) +operator <= ( const Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, + const typename Containers::Expressions::StaticUnaryExpressionTemplate< R1, ROperation >& b ) { - return Expressions::StaticComparisonLE( a, b ); + return Containers::Expressions::StaticComparisonLE( a, b ); } //// @@ -963,10 +994,10 @@ template< typename L1, template< typename, typename > class ROperation > __cuda_callable__ bool -operator > ( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, - const Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) +operator > ( const Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, + const Containers::Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) { - return Expressions::StaticComparisonGT( a, b ); + return Containers::Expressions::StaticComparisonGT( a, b ); } template< typename T1, @@ -974,10 +1005,10 @@ template< typename T1, template< typename, typename > class Operation > __cuda_callable__ bool -operator > ( const Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >& a, - const typename Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) +operator > ( const Containers::Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >& a, + const typename Containers::Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) { - return Expressions::StaticComparisonGT( a, b ); + return Containers::Expressions::StaticComparisonGT( a, b ); } template< typename L1, @@ -987,10 +1018,10 @@ template< typename L1, template< typename, typename > class ROperation > __cuda_callable__ bool -operator > ( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a, - const typename Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) +operator > ( const Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a, + const typename Containers::Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) { - return Expressions::StaticComparisonGT( a, b ); + return Containers::Expressions::StaticComparisonGT( a, b ); } template< typename L1, @@ -1000,10 +1031,10 @@ template< typename L1, template< typename > class ROperation > __cuda_callable__ bool -operator > ( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, - const typename Expressions::StaticUnaryExpressionTemplate< R1, ROperation >& b ) +operator > ( const Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, + const typename Containers::Expressions::StaticUnaryExpressionTemplate< R1, ROperation >& b ) { - return Expressions::StaticComparisonGT( a, b ); + return Containers::Expressions::StaticComparisonGT( a, b ); } //// @@ -1016,10 +1047,10 @@ template< typename L1, template< typename, typename > class ROperation > __cuda_callable__ bool -operator >= ( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, - const Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) +operator >= ( const Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, + const Containers::Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) { - return Expressions::StaticComparisonGE( a, b ); + return Containers::Expressions::StaticComparisonGE( a, b ); } template< typename T1, @@ -1027,10 +1058,10 @@ template< typename T1, template< typename, typename > class Operation > __cuda_callable__ bool -operator >= ( const Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >& a, - const typename Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) +operator >= ( const Containers::Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >& a, + const typename Containers::Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) { - return Expressions::StaticComparisonGE( a, b ); + return Containers::Expressions::StaticComparisonGE( a, b ); } template< typename L1, @@ -1040,10 +1071,10 @@ template< typename L1, template< typename, typename > class ROperation > __cuda_callable__ bool -operator >= ( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a, - const typename Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) +operator >= ( const Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a, + const typename Containers::Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) { - return Expressions::StaticComparisonGE( a, b ); + return Containers::Expressions::StaticComparisonGE( a, b ); } template< typename L1, @@ -1053,10 +1084,10 @@ template< typename L1, template< typename > class ROperation > __cuda_callable__ bool -operator >= ( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, - const typename Expressions::StaticUnaryExpressionTemplate< R1, ROperation >& b ) +operator >= ( const Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, + const typename Containers::Expressions::StaticUnaryExpressionTemplate< R1, ROperation >& b ) { - return Expressions::StaticComparisonGE( a, b ); + return Containers::Expressions::StaticComparisonGE( a, b ); } //// @@ -1069,27 +1100,27 @@ template< typename L1, typename L2, template< typename, typename > class LOperation > __cuda_callable__ -const Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Minus > -operator -( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) +const Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Minus > +operator -( const Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) { - return Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Minus >( a ); + return Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Minus >( a ); } template< typename L1, template< typename > class LOperation > __cuda_callable__ -const Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, - Expressions::Abs > -operator -( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a ) +const Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Abs > +operator -( const Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a ) { - return Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, - Expressions::Minus >( a ); + return Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Minus >( a ); } //// @@ -1098,27 +1129,27 @@ template< typename L1, typename L2, template< typename, typename > class LOperation > __cuda_callable__ -const Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Abs > -abs( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) +const Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Abs > +abs( const Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) { - return Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Abs >( a ); + return Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Abs >( a ); } template< typename L1, template< typename > class LOperation > __cuda_callable__ -const Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, - Expressions::Abs > -abs( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a ) +const Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Abs > +abs( const Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a ) { - return Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, - Expressions::Abs >( a ); + return Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Abs >( a ); } //// @@ -1127,27 +1158,27 @@ template< typename L1, typename L2, template< typename, typename > class LOperation > __cuda_callable__ -const Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Sin > -sin( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) +const Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Sin > +sin( const Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) { - return Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Sin >( a ); + return Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Sin >( a ); } template< typename L1, template< typename > class LOperation > __cuda_callable__ -const Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, - Expressions::Sin > -sin( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a ) +const Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Sin > +sin( const Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a ) { - return Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, - Expressions::Sin >( a ); + return Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Sin >( a ); } //// @@ -1156,27 +1187,27 @@ template< typename L1, typename L2, template< typename, typename > class LOperation > __cuda_callable__ -const Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Cos > -cos( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) +const Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Cos > +cos( const Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) { - return Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Cos >( a ); + return Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Cos >( a ); } template< typename L1, template< typename > class LOperation > __cuda_callable__ -const Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, - Expressions::Cos > -cos( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a ) +const Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Cos > +cos( const Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a ) { - return Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, - Expressions::Cos >( a ); + return Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Cos >( a ); } //// @@ -1185,27 +1216,27 @@ template< typename L1, typename L2, template< typename, typename > class LOperation > __cuda_callable__ -const Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Tan > -tan( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) +const Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Tan > +tan( const Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) { - return Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Tan >( a ); + return Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Tan >( a ); } template< typename L1, template< typename > class LOperation > __cuda_callable__ -const Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, - Expressions::Tan > -tan( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a ) +const Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Tan > +tan( const Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a ) { - return Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, - Expressions::Tan >( a ); + return Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Tan >( a ); } //// @@ -1214,27 +1245,27 @@ template< typename L1, typename L2, template< typename, typename > class LOperation > __cuda_callable__ -const Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Sqrt > -sqrt( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) +const Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Sqrt > +sqrt( const Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) { - return Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Sqrt >( a ); + return Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Sqrt >( a ); } template< typename L1, template< typename > class LOperation > __cuda_callable__ -const Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, - Expressions::Sqrt > -sqrt( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a ) +const Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Sqrt > +sqrt( const Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a ) { - return Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, - Expressions::Sqrt >( a ); + return Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Sqrt >( a ); } //// @@ -1243,27 +1274,27 @@ template< typename L1, typename L2, template< typename, typename > class LOperation > __cuda_callable__ -const Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Cbrt > -cbrt( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) +const Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Cbrt > +cbrt( const Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) { - return Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Cbrt >( a ); + return Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Cbrt >( a ); } template< typename L1, template< typename > class LOperation > __cuda_callable__ -const Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, - Expressions::Cbrt > -cbrt( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a ) +const Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Cbrt > +cbrt( const Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a ) { - return Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, - Expressions::Cbrt >( a ); + return Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Cbrt >( a ); } //// @@ -1273,14 +1304,14 @@ template< typename L1, template< typename, typename > class LOperation, typename Real > __cuda_callable__ -const Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Pow > -pow( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, const Real& exp ) +const Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Pow > +pow( const Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, const Real& exp ) { - auto e = Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Pow >( a ); + auto e = Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Pow >( a ); e.parameter.set( exp ); return e; } @@ -1289,14 +1320,14 @@ template< typename L1, template< typename > class LOperation, typename Real > __cuda_callable__ -const Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, - Expressions::Pow > -pow( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a, const Real& exp ) +const Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Pow > +pow( const Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a, const Real& exp ) { - auto e = Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, - Expressions::Pow >( a ); + auto e = Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Pow >( a ); e.parameter.set( exp ); return e; } @@ -1307,27 +1338,27 @@ template< typename L1, typename L2, template< typename, typename > class LOperation > __cuda_callable__ -const Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Sin > -floor( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) +const Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Sin > +floor( const Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) { - return Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Floor >( a ); + return Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Floor >( a ); } template< typename L1, template< typename > class LOperation > __cuda_callable__ -const Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, - Expressions::Floor > -floor( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a ) +const Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Floor > +floor( const Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a ) { - return Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, - Expressions::Floor >( a ); + return Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Floor >( a ); } //// @@ -1336,27 +1367,27 @@ template< typename L1, typename L2, template< typename, typename > class LOperation > __cuda_callable__ -const Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Ceil > -ceil( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) +const Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Ceil > +ceil( const Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) { - return Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Ceil >( a ); + return Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Ceil >( a ); } template< typename L1, template< typename > class LOperation > __cuda_callable__ -const Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, - Expressions::Ceil > -sin( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a ) +const Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Ceil > +sin( const Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a ) { - return Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, - Expressions::Ceil >( a ); + return Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Ceil >( a ); } //// @@ -1365,27 +1396,27 @@ template< typename L1, typename L2, template< typename, typename > class LOperation > __cuda_callable__ -const Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Asin > -asin( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) +const Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Asin > +asin( const Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) { - return Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Asin >( a ); + return Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Asin >( a ); } template< typename L1, template< typename > class LOperation > __cuda_callable__ -const Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, - Expressions::Asin > -asin( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a ) +const Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Asin > +asin( const Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a ) { - return Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, - Expressions::Asin >( a ); + return Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Asin >( a ); } //// @@ -1394,27 +1425,27 @@ template< typename L1, typename L2, template< typename, typename > class LOperation > __cuda_callable__ -const Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Acos > -cos( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) +const Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Acos > +cos( const Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) { - return Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Acos >( a ); + return Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Acos >( a ); } template< typename L1, template< typename > class LOperation > __cuda_callable__ -const Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, - Expressions::Acos > -acos( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a ) +const Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Acos > +acos( const Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a ) { - return Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, - Expressions::Cos >( a ); + return Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Cos >( a ); } //// @@ -1423,27 +1454,27 @@ template< typename L1, typename L2, template< typename, typename > class LOperation > __cuda_callable__ -const Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Atan > -tan( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) +const Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Atan > +tan( const Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) { - return Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Atan >( a ); + return Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Atan >( a ); } template< typename L1, template< typename > class LOperation > __cuda_callable__ -const Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, - Expressions::Atan > -atan( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a ) +const Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Atan > +atan( const Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a ) { - return Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, - Expressions::Atan >( a ); + return Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Atan >( a ); } //// @@ -1452,27 +1483,27 @@ template< typename L1, typename L2, template< typename, typename > class LOperation > __cuda_callable__ -const Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Sinh > -sinh( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) +const Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Sinh > +sinh( const Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) { - return Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Sinh >( a ); + return Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Sinh >( a ); } template< typename L1, template< typename > class LOperation > __cuda_callable__ -const Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, - Expressions::Sinh > -sinh( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a ) +const Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Sinh > +sinh( const Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a ) { - return Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, - Expressions::Sinh >( a ); + return Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Sinh >( a ); } //// @@ -1481,27 +1512,27 @@ template< typename L1, typename L2, template< typename, typename > class LOperation > __cuda_callable__ -const Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Cosh > -cosh( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) +const Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Cosh > +cosh( const Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) { - return Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Cosh >( a ); + return Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Cosh >( a ); } template< typename L1, template< typename > class LOperation > __cuda_callable__ -const Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, - Expressions::Cosh > -cosh( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a ) +const Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Cosh > +cosh( const Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a ) { - return Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, - Expressions::Cosh >( a ); + return Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Cosh >( a ); } //// @@ -1510,27 +1541,27 @@ template< typename L1, typename L2, template< typename, typename > class LOperation > __cuda_callable__ -const Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Tanh > -cosh( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) +const Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Tanh > +cosh( const Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) { - return Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Tanh >( a ); + return Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Tanh >( a ); } template< typename L1, template< typename > class LOperation > __cuda_callable__ -const Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, - Expressions::Tanh > -tanh( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a ) +const Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Tanh > +tanh( const Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a ) { - return Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, - Expressions::Tanh >( a ); + return Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Tanh >( a ); } //// @@ -1539,27 +1570,27 @@ template< typename L1, typename L2, template< typename, typename > class LOperation > __cuda_callable__ -const Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Log > -log( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) +const Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Log > +log( const Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) { - return Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Log >( a ); + return Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Log >( a ); } template< typename L1, template< typename > class LOperation > __cuda_callable__ -const Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, - Expressions::Log > -log( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a ) +const Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Log > +log( const Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a ) { - return Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, - Expressions::Log >( a ); + return Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Log >( a ); } //// @@ -1568,27 +1599,27 @@ template< typename L1, typename L2, template< typename, typename > class LOperation > __cuda_callable__ -const Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Log10 > -log10( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) +const Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Log10 > +log10( const Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) { - return Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Log10 >( a ); + return Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Log10 >( a ); } template< typename L1, template< typename > class LOperation > __cuda_callable__ -const Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, - Expressions::Log10 > -log10( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a ) +const Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Log10 > +log10( const Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a ) { - return Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, - Expressions::Log10 >( a ); + return Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Log10 >( a ); } //// @@ -1597,27 +1628,27 @@ template< typename L1, typename L2, template< typename, typename > class LOperation > __cuda_callable__ -const Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Log2 > -log2( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) +const Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Log2 > +log2( const Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) { - return Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Log2 >( a ); + return Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Log2 >( a ); } template< typename L1, template< typename > class LOperation > __cuda_callable__ -const Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, - Expressions::Log2 > -log2( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a ) +const Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Log2 > +log2( const Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a ) { - return Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, - Expressions::Log2 >( a ); + return Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Log2 >( a ); } //// @@ -1626,27 +1657,27 @@ template< typename L1, typename L2, template< typename, typename > class LOperation > __cuda_callable__ -const Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Exp > -exp( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) +const Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Exp > +exp( const Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) { - return Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, - Expressions::Exp >( a ); + return Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >, + Containers::Expressions::Exp >( a ); } template< typename L1, template< typename > class LOperation > __cuda_callable__ -const Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, - Expressions::Exp > -exp( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a ) +const Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Exp > +exp( const Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a ) { - return Expressions::StaticUnaryExpressionTemplate< - Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, - Expressions::Exp >( a ); + return Containers::Expressions::StaticUnaryExpressionTemplate< + Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >, + Containers::Expressions::Exp >( a ); } //// @@ -1655,8 +1686,8 @@ template< typename L1, typename L2, template< typename, typename > class LOperation > __cuda_callable__ -typename Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >::RealType -min( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) +typename Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >::RealType +min( const Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) { return StaticExpressionMin( a ); } @@ -1665,8 +1696,8 @@ template< typename L1, template< typename > class LOperation, typename Parameter > __cuda_callable__ -typename Expressions::StaticUnaryExpressionTemplate< L1, LOperation, Parameter >::RealType -min( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation, Parameter >& a ) +typename Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation, Parameter >::RealType +min( const Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation, Parameter >& a ) { return StaticExpressionMin( a ); } @@ -1675,8 +1706,8 @@ template< typename L1, typename L2, template< typename, typename > class LOperation > __cuda_callable__ -typename Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >::RealType -max( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) +typename Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >::RealType +max( const Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) { return StaticExpressionMax( a ); } @@ -1685,8 +1716,8 @@ template< typename L1, template< typename > class LOperation, typename Parameter > __cuda_callable__ -typename Expressions::StaticUnaryExpressionTemplate< L1, LOperation, Parameter >::RealType -max( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation, Parameter >& a ) +typename Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation, Parameter >::RealType +max( const Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation, Parameter >& a ) { return StaticExpressionMax( a ); } @@ -1695,8 +1726,8 @@ template< typename L1, typename L2, template< typename, typename > class LOperation > __cuda_callable__ -typename Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >::RealType -sum( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) +typename Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >::RealType +sum( const Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) { return StaticExpressionSum( a ); } @@ -1705,18 +1736,40 @@ template< typename L1, template< typename > class LOperation, typename Parameter > __cuda_callable__ -typename Expressions::StaticUnaryExpressionTemplate< L1, LOperation, Parameter >::RealType -sum( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation, Parameter >& a ) +typename Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation, Parameter >::RealType +sum( const Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation, Parameter >& a ) { return StaticExpressionSum( a ); } +template< typename L1, + typename L2, + template< typename, typename > class LOperation, + typename Real > +__cuda_callable__ +typename Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >::RealType +lpNorm( const Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, const Real& p ) +{ + return StaticExpressionLpNorm( a, p ); +} + +template< typename L1, + template< typename > class LOperation, + typename Parameter, + typename Real > +__cuda_callable__ +typename Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation, Parameter >::RealType +lpNorm( const Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation, Parameter >& a, const Real& p ) +{ + return StaticExpressionLpNorm( a, p ); +} + template< typename L1, typename L2, template< typename, typename > class LOperation > __cuda_callable__ -typename Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >::RealType -product( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) +typename Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >::RealType +product( const Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) { return StaticExpressionProduct( a ); } @@ -1725,8 +1778,8 @@ template< typename L1, template< typename > class LOperation, typename Parameter > __cuda_callable__ -typename Expressions::StaticUnaryExpressionTemplate< L1, LOperation, Parameter >::RealType -product( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation, Parameter >& a ) +typename Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation, Parameter >::RealType +product( const Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation, Parameter >& a ) { return StaticExpressionProduct( a ); } @@ -1735,8 +1788,8 @@ template< typename L1, typename L2, template< typename, typename > class LOperation > __cuda_callable__ -typename Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >::RealType -logicalOr( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) +typename Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >::RealType +logicalOr( const Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) { return StaticExpressionLogicalOr( a ); } @@ -1745,8 +1798,8 @@ template< typename L1, template< typename > class LOperation, typename Parameter > __cuda_callable__ -typename Expressions::StaticUnaryExpressionTemplate< L1, LOperation, Parameter >::RealType -logicalOr( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation, Parameter >& a ) +typename Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation, Parameter >::RealType +logicalOr( const Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation, Parameter >& a ) { return StaticExpressionLogicalOr( a ); } @@ -1755,8 +1808,8 @@ template< typename L1, typename L2, template< typename, typename > class LOperation > __cuda_callable__ -typename Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >::RealType -binaryOr( const Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) +typename Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >::RealType +binaryOr( const Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a ) { return StaticExpressionBinaryOr( a ); } @@ -1765,37 +1818,10 @@ template< typename L1, template< typename > class LOperation, typename Parameter > __cuda_callable__ -typename Expressions::StaticUnaryExpressionTemplate< L1, LOperation, Parameter >::RealType -binaryOr( const Expressions::StaticUnaryExpressionTemplate< L1, LOperation, Parameter >& a ) +typename Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation, Parameter >::RealType +binaryOr( const Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation, Parameter >& a ) { return StaticExpressionBinaryOr( a ); } -//// -// Output stream -template< typename T1, - typename T2, - template< typename, typename > class Operation > -std::ostream& operator << ( std::ostream& str, const StaticBinaryExpressionTemplate< T1, T2, Operation >& expression ) -{ - str << "[ "; - for( int i = 0; i < expression.getSize() - 1; i++ ) - str << expression[ i ] << ", "; - str << expression[ expression.getSize() - 1 ] << " ]"; - return str; -} - -template< typename T, - template< typename > class Operation, - typename Parameter > -std::ostream& operator << ( std::ostream& str, const StaticUnaryExpressionTemplate< T, Operation, Parameter >& expression ) -{ - str << "[ "; - for( int i = 0; i < expression.getSize() - 1; i++ ) - str << expression[ i ] << ", "; - str << expression[ expression.getSize() - 1 ] << " ]"; - return str; -} - } //namespace Expressions - } //namespace Containers } // namespace TNL diff --git a/src/TNL/Containers/Expressions/VerticalOperations.h b/src/TNL/Containers/Expressions/VerticalOperations.h index 250659d1a..e7549e90a 100644 --- a/src/TNL/Containers/Expressions/VerticalOperations.h +++ b/src/TNL/Containers/Expressions/VerticalOperations.h @@ -21,190 +21,234 @@ namespace TNL { namespace Containers { namespace Expressions { -template< typename T > +template< typename Expression > __cuda_callable__ -auto StaticExpressionMin( const T& a ) -> decltype( a[ 0 ] ) +auto StaticExpressionMin( const Expression& expression ) -> typename std::remove_reference< decltype( expression[ 0 ] ) >::type { - auto aux = a[ 0 ]; - for( int i = 1; i < a.getSize(); i++ ) - aux = TNL::min( aux, a[ i ] ); + auto aux = expression[ 0 ]; + for( int i = 1; i < expression.getSize(); i++ ) + aux = TNL::min( aux, expression[ i ] ); return aux; } -template< typename T > +template< typename Expression > __cuda_callable__ -auto StaticExpressionMax( const T& a ) -> decltype( a[ 0 ] ) +auto StaticExpressionMax( const Expression& expression ) -> typename std::remove_reference< decltype( expression[ 0 ] ) >::type { - auto aux = a[ 0 ]; - for( int i = 1; i < a.getSize(); i++ ) - aux = TNL::max( aux, a[ i ] ); + auto aux = expression[ 0 ]; + for( int i = 1; i < expression.getSize(); i++ ) + aux = TNL::max( aux, expression[ i ] ); return aux; } -template< typename T > +template< typename Expression > __cuda_callable__ -auto StaticExpressionSum( const T& a ) -> decltype( a[ 0 ] ) +auto StaticExpressionSum( const Expression& expression ) -> typename std::remove_reference< decltype( expression[ 0 ] ) >::type { - auto aux = a[ 0 ]; - for( int i = 1; i < a.getSize(); i++ ) - aux += a[ i ]; + auto aux = expression[ 0 ]; + for( int i = 1; i < expression.getSize(); i++ ) + aux += expression[ i ]; return aux; } -template< typename T > +template< typename Expression, typename Real > __cuda_callable__ -auto StaticExpressionProduct( const T& a ) -> decltype( a[ 0 ] ) +auto StaticExpressionLpNorm( const Expression& expression, const Real& p ) -> typename std::remove_reference< decltype( expression[ 0 ] ) >::type { - auto aux = a[ 0 ]; - for( int i = 1; i < a.getSize(); i++ ) - aux *= a[ i ]; + if( p == ( Real ) 1.0 ) + { + auto aux = TNL::abs( expression[ 0 ] ); + for( int i = 1; i < expression.getSize(); i++ ) + aux += TNL::abs( expression[ i ] ); + return aux; + } + if( p == ( Real ) 2.0 ) + { + auto aux = expression[ 0 ] * expression[ 0 ]; + for( int i = 1; i < expression.getSize(); i++ ) + aux += expression[ i ] * expression[ i ]; + return TNL::sqrt( aux ); + } + auto aux = TNL::pow( expression[ 0 ], p ); + for( int i = 1; i < expression.getSize(); i++ ) + aux += TNL::pow( expression[ i ], p ); + return TNL::pow( aux, 1.0 / p ); +} + + +template< typename Expression > +__cuda_callable__ +auto StaticExpressionProduct( const Expression& expression ) -> typename std::remove_reference< decltype( expression[ 0 ] ) >::type +{ + auto aux = expression[ 0 ]; + for( int i = 1; i < expression.getSize(); i++ ) + aux *= expression[ i ]; return aux; } -template< typename T > +template< typename Expression > __cuda_callable__ -bool StaticExpressionLogicalAnd( const T& a ) +bool StaticExpressionLogicalAnd( const Expression& expression ) { - auto aux = a[ 0 ]; - for( int i = 1; i < a.getSize(); i++ ) - aux = aux && a[ i ]; + auto aux = expression[ 0 ]; + for( int i = 1; i < expression.getSize(); i++ ) + aux = aux && expression[ i ]; return aux; } -template< typename T > +template< typename Expression > __cuda_callable__ -bool StaticExpressionLogicalOr( const T& a ) +bool StaticExpressionLogicalOr( const Expression& expression ) { - auto aux = a[ 0 ]; - for( int i = 1; i < a.getSize(); i++ ) - aux = aux || a[ i ]; + auto aux = expression[ 0 ]; + for( int i = 1; i < expression.getSize(); i++ ) + aux = aux || expression[ i ]; return aux; } -template< typename T > +template< typename Expression > __cuda_callable__ -auto StaticExpressionBinaryAnd( const T& a ) -> decltype( a[ 0 ] ) +auto StaticExpressionBinaryAnd( const Expression& expression ) -> typename std::remove_reference< decltype( expression[ 0 ] ) >::type { - auto aux = a[ 0 ]; - for( int i = 1; i < a.getSize(); i++ ) - aux = aux & a[ i ]; + auto aux = expression[ 0 ]; + for( int i = 1; i < expression.getSize(); i++ ) + aux = aux & expression[ i ]; return aux; } -template< typename T > +template< typename Expression > __cuda_callable__ -auto StaticExpressionBinaryOr( const T& a ) -> decltype( a[ 0 ] ) +auto StaticExpressionBinaryOr( const Expression& expression ) -> typename std::remove_reference< decltype( expression[ 0 ] ) >::type { - auto aux = a[ 0 ]; - for( int i = 1; i < a.getSize(); i++ ) - aux = aux | a[ i ]; + auto aux = expression[ 0 ]; + for( int i = 1; i < expression.getSize(); i++ ) + aux = aux | expression[ i ]; return aux; } //// // Non-static operations template< typename Expression > -__cuda_callable__ -auto ExpressionMin( const Expression& a ) -> decltype( a[ 0 ] ) +auto ExpressionMin( const Expression& expression ) -> typename std::remove_reference< decltype( expression[ 0 ] ) >::type { - using ResultType = decltype( a[ 0 ] ); + using ResultType = typename std::remove_cv< typename std::remove_reference< decltype( expression[ 0 ] ) >::type >::type; using IndexType = typename Expression::IndexType; - auto fetch = [=] __cuda_callable__ ( IndexType i ) { return a[ i ]; }; + auto fetch = [=] __cuda_callable__ ( IndexType i ) { return expression[ i ]; }; auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a = a < b ? a : b; }; auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a = a < b ? a : b; }; - return Algorithms::Reduction< typename Expression::DeviceType >::reduce( a.getSize(), reduction, volatileReduction, fetch, std::numeric_limits< ResultType >::max() ); + return Algorithms::Reduction< typename Expression::DeviceType >::reduce( expression.getSize(), reduction, volatileReduction, fetch, std::numeric_limits< ResultType >::max() ); } template< typename Expression > -__cuda_callable__ -auto ExpressionMax( const Expression& a ) -> decltype( a[ 0 ] ) +auto ExpressionMax( const Expression& expression ) -> typename std::remove_reference< decltype( expression[ 0 ] ) >::type { - using ResultType = decltype( a[ 0 ] ); + using ResultType = typename std::remove_cv< typename std::remove_reference< decltype( expression[ 0 ] ) >::type >::type; using IndexType = typename Expression::IndexType; - auto fetch = [=] __cuda_callable__ ( IndexType i ) { return a[ i ]; }; + auto fetch = [=] __cuda_callable__ ( IndexType i ) { return expression[ i ]; }; auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a = a > b ? a : b; }; auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a = a > b ? a : b; }; - return Algorithms::Reduction< typename Expression::DeviceType >::reduce( a.getSize(), reduction, volatileReduction, fetch, std::numeric_limits< ResultType >::max() ); + return Algorithms::Reduction< typename Expression::DeviceType >::reduce( expression.getSize(), reduction, volatileReduction, fetch, std::numeric_limits< ResultType >::min() ); } template< typename Expression > -__cuda_callable__ -auto ExpressionSum( const Expression& a ) -> decltype( a[ 0 ] ) +auto ExpressionSum( const Expression& expression ) -> typename std::remove_reference< decltype( expression[ 0 ] ) >::type +{ + using ResultType = typename std::remove_cv< typename std::remove_reference< decltype( expression[ 0 ] ) >::type >::type; + using IndexType = typename Expression::IndexType; + + auto fetch = [=] __cuda_callable__ ( IndexType i ) { return expression[ i ]; }; + auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a += b; }; + auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a += b; }; + return Algorithms::Reduction< typename Expression::DeviceType >::reduce( expression.getSize(), reduction, volatileReduction, fetch, ( ResultType ) 0.0 ); +} + +template< typename Expression, typename Real > +auto ExpressionLpNorm( const Expression& expression, const Real& p ) -> typename std::remove_reference< decltype( expression[ 0 ] ) >::type { - using ResultType = decltype( a[ 0 ] ); + using ResultType = typename std::remove_cv< typename std::remove_reference< decltype( expression[ 0 ] ) >::type >::type; using IndexType = typename Expression::IndexType; - auto fetch = [=] __cuda_callable__ ( IndexType i ) { return a[ i ]; }; + if( p == ( Real ) 1.0 ) + { + auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( expression[ i ] ); }; + auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a += b; }; + auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a += b; }; + return Algorithms::Reduction< typename Expression::DeviceType >::reduce( expression.getSize(), reduction, volatileReduction, fetch, ( ResultType ) 0.0 ); + } + if( p == ( Real ) 2.0 ) + { + auto fetch = [=] __cuda_callable__ ( IndexType i ) { return expression[ i ] * expression[ i ]; }; + auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a += b; }; + auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a += b; }; + return TNL::sqrt( Algorithms::Reduction< typename Expression::DeviceType >::reduce( expression.getSize(), reduction, volatileReduction, fetch, ( ResultType ) 0.0 ) ); + } + auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::pow( expression[ i ], p ); }; auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a += b; }; auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a += b; }; - return Algorithms::Reduction< typename Expression::DeviceType >::reduce( a.getSize(), reduction, volatileReduction, fetch, ( ResultType ) 0.0 ); + return TNL::pow( Algorithms::Reduction< typename Expression::DeviceType >::reduce( expression.getSize(), reduction, volatileReduction, fetch, ( ResultType ) 0.0 ), ( Real ) 1.0 / p ); } + template< typename Expression > -__cuda_callable__ -auto ExpressionProduct( const Expression& a ) -> decltype( a[ 0 ] ) +auto ExpressionProduct( const Expression& expression ) -> typename std::remove_reference< decltype( expression[ 0 ] ) >::type { - using ResultType = decltype( a[ 0 ] ); + using ResultType = typename std::remove_cv< typename std::remove_reference< decltype( expression[ 0 ] ) >::type >::type; using IndexType = typename Expression::IndexType; - auto fetch = [=] __cuda_callable__ ( IndexType i ) { return a[ i ]; }; + auto fetch = [=] __cuda_callable__ ( IndexType i ) { return expression[ i ]; }; auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a *= b; }; auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a *= b; }; - return Algorithms::Reduction< typename Expression::DeviceType >::reduce( a.getSize(), reduction, volatileReduction, fetch, ( ResultType ) 1.0 ); + return Algorithms::Reduction< typename Expression::DeviceType >::reduce( expression.getSize(), reduction, volatileReduction, fetch, ( ResultType ) 1.0 ); } template< typename Expression > -__cuda_callable__ -bool ExpressionLogicalAnd( const Expression& a ) +bool ExpressionLogicalAnd( const Expression& expression ) { - using ResultType = decltype( a[ 0 ] ); + using ResultType = bool; using IndexType = typename Expression::IndexType; - auto fetch = [=] __cuda_callable__ ( IndexType i ) { return a[ i ]; }; + auto fetch = [=] __cuda_callable__ ( IndexType i ) { return expression[ i ]; }; auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a = a && b; }; auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a = a && b; }; - return Algorithms::Reduction< typename Expression::DeviceType >::reduce( a.getSize(), reduction, volatileReduction, fetch, true ); + return Algorithms::Reduction< typename Expression::DeviceType >::reduce( expression.getSize(), reduction, volatileReduction, fetch, true ); } template< typename Expression > -__cuda_callable__ -bool ExpressionLogicalOr( const Expression& a ) +bool ExpressionLogicalOr( const Expression& expression ) { - using ResultType = decltype( a[ 0 ] ); + using ResultType = bool; using IndexType = typename Expression::IndexType; - auto fetch = [=] __cuda_callable__ ( IndexType i ) { return a[ i ]; }; + auto fetch = [=] __cuda_callable__ ( IndexType i ) { return expression[ i ]; }; auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a = a || b; }; auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a = a || b; }; - return Algorithms::Reduction< typename Expression::DeviceType >::reduce( a.getSize(), reduction, volatileReduction, fetch, ( ResultType ) 0 ); + return Algorithms::Reduction< typename Expression::DeviceType >::reduce( expression.getSize(), reduction, volatileReduction, fetch, ( ResultType ) 0 ); } template< typename Expression > -__cuda_callable__ -auto ExpressionBinaryAnd( const Expression& a ) -> decltype( a[ 0 ] ) +auto ExpressionBinaryAnd( const Expression& expression ) -> typename std::remove_reference< decltype( expression[ 0 ] ) >::type { - using ResultType = decltype( a[ 0 ] ); + using ResultType = typename std::remove_cv< typename std::remove_reference< decltype( expression[ 0 ] ) >::type >::type; using IndexType = typename Expression::IndexType; - auto fetch = [=] __cuda_callable__ ( IndexType i ) { return a[ i ]; }; + auto fetch = [=] __cuda_callable__ ( IndexType i ) { return expression[ i ]; }; auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a = a & b; }; auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a = a & b; }; - return Algorithms::Reduction< typename Expression::DeviceType >::reduce( a.getSize(), reduction, volatileReduction, fetch, std::numeric_limits< ResultType >::max() ); + return Algorithms::Reduction< typename Expression::DeviceType >::reduce( expression.getSize(), reduction, volatileReduction, fetch, std::numeric_limits< ResultType >::max() ); } template< typename Expression > -__cuda_callable__ -auto ExpressionBinaryOr( const Expression& a ) -> decltype( a[ 0 ] ) +auto ExpressionBinaryOr( const Expression& expression ) -> typename std::remove_reference< decltype( expression[ 0 ] ) >::type { - using ResultType = decltype( a[ 0 ] ); + using ResultType = typename std::remove_cv< typename std::remove_reference< decltype( expression[ 0 ] ) >::type >::type; using IndexType = typename Expression::IndexType; - auto fetch = [=] __cuda_callable__ ( IndexType i ) { return a[ i ]; }; + auto fetch = [=] __cuda_callable__ ( IndexType i ) { return expression[ i ]; }; auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a = a | b; }; auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a = a | b; }; - return Algorithms::Reduction< typename Expression::DeviceType >::reduce( a.getSize(), reduction, volatileReduction, fetch, ( ResultType ) 0 ); + return Algorithms::Reduction< typename Expression::DeviceType >::reduce( expression.getSize(), reduction, volatileReduction, fetch, ( ResultType ) 0 ); } } //namespace Expressions diff --git a/src/TNL/Containers/StaticVector.h b/src/TNL/Containers/StaticVector.h index 9f785de45..83b2883d9 100644 --- a/src/TNL/Containers/StaticVector.h +++ b/src/TNL/Containers/StaticVector.h @@ -17,8 +17,6 @@ namespace TNL { namespace Containers { - - /** * \brief Vector with constant size. * diff --git a/src/TNL/Containers/StaticVectorExpressions.h b/src/TNL/Containers/StaticVectorExpressions.h index cf4ad298a..311cc07d2 100644 --- a/src/TNL/Containers/StaticVectorExpressions.h +++ b/src/TNL/Containers/StaticVectorExpressions.h @@ -1,5 +1,5 @@ /*************************************************************************** - StaticVectorExpressions.h - description + Containers::StaticVectorExpressions.h - description ------------------- begin : Apr 19, 2019 copyright : (C) 2019 by Tomas Oberhuber @@ -15,532 +15,607 @@ #include namespace TNL { - namespace Containers { + +//// +// All operations are supposed to be in namespace TNL +// namespace Containers { //// // Addition template< int Size, typename Real, typename ET > __cuda_callable__ -const Expressions::StaticBinaryExpressionTemplate< StaticVector< Size, Real >, ET, Expressions::Addition > -operator+( const StaticVector< Size, Real >& a, const ET& b ) +const Containers::Expressions::StaticBinaryExpressionTemplate< Containers::StaticVector< Size, Real >, ET, Containers::Expressions::Addition > +operator+( const Containers::StaticVector< Size, Real >& a, const ET& b ) { - return Expressions::StaticBinaryExpressionTemplate< StaticVector< Size, Real >, ET, Expressions::Addition >( a, b ); + return Containers::Expressions::StaticBinaryExpressionTemplate< Containers::StaticVector< Size, Real >, ET, Containers::Expressions::Addition >( a, b ); } template< typename ET, int Size, typename Real > __cuda_callable__ -const Expressions::StaticBinaryExpressionTemplate< ET, StaticVector< Size, Real >, Expressions::Addition > -operator+( const ET& a, const StaticVector< Size, Real >& b ) +const Containers::Expressions::StaticBinaryExpressionTemplate< ET, Containers::StaticVector< Size, Real >, Containers::Expressions::Addition > +operator+( const ET& a, const Containers::StaticVector< Size, Real >& b ) { - return Expressions::StaticBinaryExpressionTemplate< ET, StaticVector< Size, Real >, Expressions::Addition >( a, b ); + return Containers::Expressions::StaticBinaryExpressionTemplate< ET, Containers::StaticVector< Size, Real >, Containers::Expressions::Addition >( a, b ); } template< int Size, typename Real1, typename Real2 > __cuda_callable__ -const Expressions::StaticBinaryExpressionTemplate< StaticVector< Size, Real1 >, StaticVector< Size, Real2 >, Expressions::Addition > -operator+( const StaticVector< Size, Real1 >& a, const StaticVector< Size, Real2 >& b ) +const Containers::Expressions::StaticBinaryExpressionTemplate< Containers::StaticVector< Size, Real1 >, Containers::StaticVector< Size, Real2 >, Containers::Expressions::Addition > +operator+( const Containers::StaticVector< Size, Real1 >& a, const Containers::StaticVector< Size, Real2 >& b ) { - return Expressions::StaticBinaryExpressionTemplate< StaticVector< Size, Real1 >, StaticVector< Size, Real2 >, Expressions::Addition >( a, b ); + return Containers::Expressions::StaticBinaryExpressionTemplate< Containers::StaticVector< Size, Real1 >, Containers::StaticVector< Size, Real2 >, Containers::Expressions::Addition >( a, b ); } //// // Subtraction template< int Size, typename Real, typename ET > __cuda_callable__ -const Expressions::StaticBinaryExpressionTemplate< StaticVector< Size, Real >, ET, Expressions::Subtraction > -operator-( const StaticVector< Size, Real >& a, const ET& b ) +const Containers::Expressions::StaticBinaryExpressionTemplate< Containers::StaticVector< Size, Real >, ET, Containers::Expressions::Subtraction > +operator-( const Containers::StaticVector< Size, Real >& a, const ET& b ) { - return Expressions::StaticBinaryExpressionTemplate< StaticVector< Size, Real >, ET, Expressions::Subtraction >( a, b ); + return Containers::Expressions::StaticBinaryExpressionTemplate< Containers::StaticVector< Size, Real >, ET, Containers::Expressions::Subtraction >( a, b ); } template< typename ET, int Size, typename Real > __cuda_callable__ -const Expressions::StaticBinaryExpressionTemplate< ET, StaticVector< Size, Real >, Expressions::Subtraction > -operator-( const ET& a, const StaticVector< Size, Real >& b ) +const Containers::Expressions::StaticBinaryExpressionTemplate< ET, Containers::StaticVector< Size, Real >, Containers::Expressions::Subtraction > +operator-( const ET& a, const Containers::StaticVector< Size, Real >& b ) { - return Expressions::StaticBinaryExpressionTemplate< ET, StaticVector< Size, Real >, Expressions::Subtraction >( a, b ); + return Containers::Expressions::StaticBinaryExpressionTemplate< ET, Containers::StaticVector< Size, Real >, Containers::Expressions::Subtraction >( a, b ); } template< int Size, typename Real1, typename Real2 > __cuda_callable__ -const Expressions::StaticBinaryExpressionTemplate< StaticVector< Size, Real1 >, StaticVector< Size, Real2 >, Expressions::Subtraction > -operator-( const StaticVector< Size, Real1 >& a, const StaticVector< Size, Real2 >& b ) +const Containers::Expressions::StaticBinaryExpressionTemplate< Containers::StaticVector< Size, Real1 >, Containers::StaticVector< Size, Real2 >, Containers::Expressions::Subtraction > +operator-( const Containers::StaticVector< Size, Real1 >& a, const Containers::StaticVector< Size, Real2 >& b ) { - return Expressions::StaticBinaryExpressionTemplate< StaticVector< Size, Real1 >, StaticVector< Size, Real2 >, Expressions::Subtraction >( a, b ); + return Containers::Expressions::StaticBinaryExpressionTemplate< Containers::StaticVector< Size, Real1 >, Containers::StaticVector< Size, Real2 >, Containers::Expressions::Subtraction >( a, b ); } //// // Multiplication template< int Size, typename Real, typename ET > __cuda_callable__ -const Expressions::StaticBinaryExpressionTemplate< StaticVector< Size, Real >, ET, Expressions::Multiplication > -operator*( const StaticVector< Size, Real >& a, const ET& b ) +const Containers::Expressions::StaticBinaryExpressionTemplate< Containers::StaticVector< Size, Real >, ET, Containers::Expressions::Multiplication > +operator*( const Containers::StaticVector< Size, Real >& a, const ET& b ) { - return Expressions::StaticBinaryExpressionTemplate< StaticVector< Size, Real >, ET, Expressions::Multiplication >( a, b ); + return Containers::Expressions::StaticBinaryExpressionTemplate< Containers::StaticVector< Size, Real >, ET, Containers::Expressions::Multiplication >( a, b ); } template< typename ET, int Size, typename Real > __cuda_callable__ -const Expressions::StaticBinaryExpressionTemplate< ET, StaticVector< Size, Real >, Expressions::Multiplication > -operator*( const ET& a, const StaticVector< Size, Real >& b ) +const Containers::Expressions::StaticBinaryExpressionTemplate< ET, Containers::StaticVector< Size, Real >, Containers::Expressions::Multiplication > +operator*( const ET& a, const Containers::StaticVector< Size, Real >& b ) { - return Expressions::StaticBinaryExpressionTemplate< ET, StaticVector< Size, Real >, Expressions::Multiplication >( a, b ); + return Containers::Expressions::StaticBinaryExpressionTemplate< ET, Containers::StaticVector< Size, Real >, Containers::Expressions::Multiplication >( a, b ); } template< int Size, typename Real1, typename Real2 > __cuda_callable__ -const Expressions::StaticBinaryExpressionTemplate< StaticVector< Size, Real1 >, StaticVector< Size, Real2 >, Expressions::Multiplication > -operator*( const StaticVector< Size, Real1 >& a, const StaticVector< Size, Real2 >& b ) +const Containers::Expressions::StaticBinaryExpressionTemplate< Containers::StaticVector< Size, Real1 >, Containers::StaticVector< Size, Real2 >, Containers::Expressions::Multiplication > +operator*( const Containers::StaticVector< Size, Real1 >& a, const Containers::StaticVector< Size, Real2 >& b ) { - return Expressions::StaticBinaryExpressionTemplate< StaticVector< Size, Real1 >, StaticVector< Size, Real2 >, Expressions::Multiplication >( a, b ); + return Containers::Expressions::StaticBinaryExpressionTemplate< Containers::StaticVector< Size, Real1 >, Containers::StaticVector< Size, Real2 >, Containers::Expressions::Multiplication >( a, b ); } //// // Division template< int Size, typename Real, typename ET > __cuda_callable__ -const Expressions::StaticBinaryExpressionTemplate< StaticVector< Size, Real >, ET, Expressions::Division > -operator/( const StaticVector< Size, Real >& a, const ET& b ) +const Containers::Expressions::StaticBinaryExpressionTemplate< Containers::StaticVector< Size, Real >, ET, Containers::Expressions::Division > +operator/( const Containers::StaticVector< Size, Real >& a, const ET& b ) { - return Expressions::StaticBinaryExpressionTemplate< StaticVector< Size, Real >, ET, Expressions::Division >( a, b ); + return Containers::Expressions::StaticBinaryExpressionTemplate< Containers::StaticVector< Size, Real >, ET, Containers::Expressions::Division >( a, b ); } template< typename ET, int Size, typename Real > __cuda_callable__ -const Expressions::StaticBinaryExpressionTemplate< ET, StaticVector< Size, Real >, Expressions::Division > -operator/( const ET& a, const StaticVector< Size, Real >& b ) +const Containers::Expressions::StaticBinaryExpressionTemplate< ET, Containers::StaticVector< Size, Real >, Containers::Expressions::Division > +operator/( const ET& a, const Containers::StaticVector< Size, Real >& b ) { - return Expressions::StaticBinaryExpressionTemplate< ET, StaticVector< Size, Real >, Expressions::Division >( a, b ); + return Containers::Expressions::StaticBinaryExpressionTemplate< ET, Containers::StaticVector< Size, Real >, Containers::Expressions::Division >( a, b ); } template< int Size, typename Real1, typename Real2 > __cuda_callable__ -const Expressions::StaticBinaryExpressionTemplate< StaticVector< Size, Real1 >, StaticVector< Size, Real2 >, Expressions::Division > -operator/( const StaticVector< Size, Real1 >& a, const StaticVector< Size, Real2 >& b ) +const Containers::Expressions::StaticBinaryExpressionTemplate< Containers::StaticVector< Size, Real1 >, Containers::StaticVector< Size, Real2 >, Containers::Expressions::Division > +operator/( const Containers::StaticVector< Size, Real1 >& a, const Containers::StaticVector< Size, Real2 >& b ) { - return Expressions::StaticBinaryExpressionTemplate< StaticVector< Size, Real1 >, StaticVector< Size, Real2 >, Expressions::Division >( a, b ); + return Containers::Expressions::StaticBinaryExpressionTemplate< Containers::StaticVector< Size, Real1 >, Containers::StaticVector< Size, Real2 >, Containers::Expressions::Division >( a, b ); } //// // Min template< int Size, typename Real, typename ET > __cuda_callable__ -const Expressions::StaticBinaryExpressionTemplate< StaticVector< Size, Real >, ET, Expressions::Min > -min( const StaticVector< Size, Real >& a, const ET& b ) +const Containers::Expressions::StaticBinaryExpressionTemplate< Containers::StaticVector< Size, Real >, ET, Containers::Expressions::Min > +min( const Containers::StaticVector< Size, Real >& a, const ET& b ) { - return Expressions::StaticBinaryExpressionTemplate< StaticVector< Size, Real >, ET, Expressions::Min >( a, b ); + return Containers::Expressions::StaticBinaryExpressionTemplate< Containers::StaticVector< Size, Real >, ET, Containers::Expressions::Min >( a, b ); } template< typename ET, int Size, typename Real > __cuda_callable__ -const Expressions::StaticBinaryExpressionTemplate< ET, StaticVector< Size, Real >, Expressions::Min > -min( const ET& a, const StaticVector< Size, Real >& b ) +const Containers::Expressions::StaticBinaryExpressionTemplate< ET, Containers::StaticVector< Size, Real >, Containers::Expressions::Min > +min( const ET& a, const Containers::StaticVector< Size, Real >& b ) { - return Expressions::StaticBinaryExpressionTemplate< ET, StaticVector< Size, Real >, Expressions::Min >( a, b ); + return Containers::Expressions::StaticBinaryExpressionTemplate< ET, Containers::StaticVector< Size, Real >, Containers::Expressions::Min >( a, b ); } template< int Size, typename Real1, typename Real2 > __cuda_callable__ -const Expressions::StaticBinaryExpressionTemplate< StaticVector< Size, Real1 >, StaticVector< Size, Real2 >, Expressions::Min > -min( const StaticVector< Size, Real1 >& a, const StaticVector< Size, Real2 >& b ) +const Containers::Expressions::StaticBinaryExpressionTemplate< Containers::StaticVector< Size, Real1 >, Containers::StaticVector< Size, Real2 >, Containers::Expressions::Min > +min( const Containers::StaticVector< Size, Real1 >& a, const Containers::StaticVector< Size, Real2 >& b ) { - return Expressions::StaticBinaryExpressionTemplate< StaticVector< Size, Real1 >, StaticVector< Size, Real2 >, Expressions::Min >( a, b ); + return Containers::Expressions::StaticBinaryExpressionTemplate< Containers::StaticVector< Size, Real1 >, Containers::StaticVector< Size, Real2 >, Containers::Expressions::Min >( a, b ); } //// // Max template< int Size, typename Real, typename ET > __cuda_callable__ -const Expressions::StaticBinaryExpressionTemplate< StaticVector< Size, Real >, ET, Expressions::Max > -max( const StaticVector< Size, Real >& a, const ET& b ) +const Containers::Expressions::StaticBinaryExpressionTemplate< Containers::StaticVector< Size, Real >, ET, Containers::Expressions::Max > +max( const Containers::StaticVector< Size, Real >& a, const ET& b ) { - return Expressions::StaticBinaryExpressionTemplate< StaticVector< Size, Real >, ET, Expressions::Max >( a, b ); + return Containers::Expressions::StaticBinaryExpressionTemplate< Containers::StaticVector< Size, Real >, ET, Containers::Expressions::Max >( a, b ); } template< typename ET, int Size, typename Real > __cuda_callable__ -const Expressions::StaticBinaryExpressionTemplate< ET, StaticVector< Size, Real >, Expressions::Max > -max( const ET& a, const StaticVector< Size, Real >& b ) +const Containers::Expressions::StaticBinaryExpressionTemplate< ET, Containers::StaticVector< Size, Real >, Containers::Expressions::Max > +max( const ET& a, const Containers::StaticVector< Size, Real >& b ) { - return Expressions::StaticBinaryExpressionTemplate< ET, StaticVector< Size, Real >, Expressions::Max >( a, b ); + return Containers::Expressions::StaticBinaryExpressionTemplate< ET, Containers::StaticVector< Size, Real >, Containers::Expressions::Max >( a, b ); } template< int Size, typename Real1, typename Real2 > __cuda_callable__ -const Expressions::StaticBinaryExpressionTemplate< StaticVector< Size, Real1 >, StaticVector< Size, Real2 >, Expressions::Max > -max( const StaticVector< Size, Real1 >& a, const StaticVector< Size, Real2 >& b ) +const Containers::Expressions::StaticBinaryExpressionTemplate< Containers::StaticVector< Size, Real1 >, Containers::StaticVector< Size, Real2 >, Containers::Expressions::Max > +max( const Containers::StaticVector< Size, Real1 >& a, const Containers::StaticVector< Size, Real2 >& b ) { - return Expressions::StaticBinaryExpressionTemplate< StaticVector< Size, Real1 >, StaticVector< Size, Real2 >, Expressions::Max >( a, b ); + return Containers::Expressions::StaticBinaryExpressionTemplate< Containers::StaticVector< Size, Real1 >, Containers::StaticVector< Size, Real2 >, Containers::Expressions::Max >( a, b ); } //// // Comparison operations - operator == template< int Size, typename Real, typename ET > __cuda_callable__ -bool operator==( const StaticVector< Size, Real >& a, const ET& b ) +bool operator==( const Containers::StaticVector< Size, Real >& a, const ET& b ) { - return Expressions::StaticComparisonEQ( a, b ); + return Containers::Expressions::StaticComparisonEQ( a, b ); } template< typename ET, int Size, typename Real > __cuda_callable__ -bool operator==( const ET& a, const StaticVector< Size, Real >& b ) +bool operator==( const ET& a, const Containers::StaticVector< Size, Real >& b ) { - return Expressions::StaticComparisonEQ( a, b ); + return Containers::Expressions::StaticComparisonEQ( a, b ); } template< int Size, typename Real1, typename Real2 > __cuda_callable__ -bool operator==( const StaticVector< Size, Real1 >& a, const StaticVector< Size, Real2 >& b ) +bool operator==( const Containers::StaticVector< Size, Real1 >& a, const Containers::StaticVector< Size, Real2 >& b ) { - return Expressions::StaticComparisonEQ( a, b ); + return Containers::Expressions::StaticComparisonEQ( a, b ); } //// // Comparison operations - operator != template< int Size, typename Real, typename ET > __cuda_callable__ -bool operator!=( const StaticVector< Size, Real >& a, const ET& b ) +bool operator!=( const Containers::StaticVector< Size, Real >& a, const ET& b ) { - return Expressions::StaticComparisonNE( a, b ); + return Containers::Expressions::StaticComparisonNE( a, b ); } template< typename ET, int Size, typename Real > __cuda_callable__ -bool operator!=( const ET& a, const StaticVector< Size, Real >& b ) +bool operator!=( const ET& a, const Containers::StaticVector< Size, Real >& b ) { - return Expressions::StaticComparisonNE( a, b ); + return Containers::Expressions::StaticComparisonNE( a, b ); } template< int Size, typename Real1, typename Real2 > __cuda_callable__ -bool operator!=( const StaticVector< Size, Real1 >& a, const StaticVector< Size, Real2 >& b ) +bool operator!=( const Containers::StaticVector< Size, Real1 >& a, const Containers::StaticVector< Size, Real2 >& b ) { - return Expressions::StaticComparisonNE( a, b ); + return Containers::Expressions::StaticComparisonNE( a, b ); } //// // Comparison operations - operator < template< int Size, typename Real, typename ET > __cuda_callable__ -bool operator<( const StaticVector< Size, Real >& a, const ET& b ) +bool operator<( const Containers::StaticVector< Size, Real >& a, const ET& b ) { - return Expressions::StaticComparisonLT( a, b ); + return Containers::Expressions::StaticComparisonLT( a, b ); } template< typename ET, int Size, typename Real > __cuda_callable__ -bool operator<( const ET& a, const StaticVector< Size, Real >& b ) +bool operator<( const ET& a, const Containers::StaticVector< Size, Real >& b ) { - return Expressions::StaticComparisonLT( a, b ); + return Containers::Expressions::StaticComparisonLT( a, b ); } template< int Size, typename Real1, typename Real2 > __cuda_callable__ -bool operator<( const StaticVector< Size, Real1 >& a, const StaticVector< Size, Real2 >& b ) +bool operator<( const Containers::StaticVector< Size, Real1 >& a, const Containers::StaticVector< Size, Real2 >& b ) { - return Expressions::StaticComparisonLT( a, b ); + return Containers::Expressions::StaticComparisonLT( a, b ); } //// // Comparison operations - operator <= template< int Size, typename Real, typename ET > __cuda_callable__ -bool operator<=( const StaticVector< Size, Real >& a, const ET& b ) +bool operator<=( const Containers::StaticVector< Size, Real >& a, const ET& b ) { - return Expressions::StaticComparisonLE( a, b ); + return Containers::Expressions::StaticComparisonLE( a, b ); } template< typename ET, int Size, typename Real > __cuda_callable__ -bool operator<=( const ET& a, const StaticVector< Size, Real >& b ) +bool operator<=( const ET& a, const Containers::StaticVector< Size, Real >& b ) { - return Expressions::StaticComparisonLE( a, b ); + return Containers::Expressions::StaticComparisonLE( a, b ); } template< int Size, typename Real1, typename Real2 > __cuda_callable__ -bool operator<=( const StaticVector< Size, Real1 >& a, const StaticVector< Size, Real2 >& b ) +bool operator<=( const Containers::StaticVector< Size, Real1 >& a, const Containers::StaticVector< Size, Real2 >& b ) { - return Expressions::StaticComparisonLE( a, b ); + return Containers::Expressions::StaticComparisonLE( a, b ); } //// // Comparison operations - operator > template< int Size, typename Real, typename ET > __cuda_callable__ -bool operator>( const StaticVector< Size, Real >& a, const ET& b ) +bool operator>( const Containers::StaticVector< Size, Real >& a, const ET& b ) { - return Expressions::StaticComparisonGT( a, b ); + return Containers::Expressions::StaticComparisonGT( a, b ); } template< typename ET, int Size, typename Real > __cuda_callable__ -bool operator>( const ET& a, const StaticVector< Size, Real >& b ) +bool operator>( const ET& a, const Containers::StaticVector< Size, Real >& b ) { - return Expressions::StaticComparisonGT( a, b ); + return Containers::Expressions::StaticComparisonGT( a, b ); } template< int Size, typename Real1, typename Real2 > __cuda_callable__ -bool operator>( const StaticVector< Size, Real1 >& a, const StaticVector< Size, Real2 >& b ) +bool operator>( const Containers::StaticVector< Size, Real1 >& a, const Containers::StaticVector< Size, Real2 >& b ) { - return Expressions::StaticComparisonGT( a, b ); + return Containers::Expressions::StaticComparisonGT( a, b ); } //// // Comparison operations - operator >= template< int Size, typename Real, typename ET > __cuda_callable__ -bool operator>=( const StaticVector< Size, Real >& a, const ET& b ) +bool operator>=( const Containers::StaticVector< Size, Real >& a, const ET& b ) { - return Expressions::StaticComparisonGE( a, b ); + return Containers::Expressions::StaticComparisonGE( a, b ); } template< typename ET, int Size, typename Real > __cuda_callable__ -bool operator>=( const ET& a, const StaticVector< Size, Real >& b ) +bool operator>=( const ET& a, const Containers::StaticVector< Size, Real >& b ) { - return Expressions::StaticComparisonGE( a, b ); + return Containers::Expressions::StaticComparisonGE( a, b ); } template< int Size, typename Real1, typename Real2 > __cuda_callable__ -bool operator>=( const StaticVector< Size, Real1 >& a, const StaticVector< Size, Real2 >& b ) +bool operator>=( const Containers::StaticVector< Size, Real1 >& a, const Containers::StaticVector< Size, Real2 >& b ) { - return Expressions::StaticComparisonGE( a, b ); + return Containers::Expressions::StaticComparisonGE( a, b ); } //// // Minus template< int Size, typename Real > __cuda_callable__ -const Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Minus > -operator-( const StaticVector< Size, Real >& a ) +const Containers::Expressions::StaticUnaryExpressionTemplate< Containers::StaticVector< Size, Real >, Containers::Expressions::Minus > +operator-( const Containers::StaticVector< Size, Real >& a ) { - return Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Minus >( a ); + return Containers::Expressions::StaticUnaryExpressionTemplate< Containers::StaticVector< Size, Real >, Containers::Expressions::Minus >( a ); } //// // Abs template< int Size, typename Real > __cuda_callable__ -const Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Abs > -abs( const StaticVector< Size, Real >& a ) +const Containers::Expressions::StaticUnaryExpressionTemplate< Containers::StaticVector< Size, Real >, Containers::Expressions::Abs > +abs( const Containers::StaticVector< Size, Real >& a ) { - return Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Abs >( a ); + return Containers::Expressions::StaticUnaryExpressionTemplate< Containers::StaticVector< Size, Real >, Containers::Expressions::Abs >( a ); } //// // Sine template< int Size, typename Real > __cuda_callable__ -const Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Sin > -sin( const StaticVector< Size, Real >& a ) +const Containers::Expressions::StaticUnaryExpressionTemplate< Containers::StaticVector< Size, Real >, Containers::Expressions::Sin > +sin( const Containers::StaticVector< Size, Real >& a ) { - return Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Sin >( a ); + return Containers::Expressions::StaticUnaryExpressionTemplate< Containers::StaticVector< Size, Real >, Containers::Expressions::Sin >( a ); } //// // Cosine template< int Size, typename Real > __cuda_callable__ -const Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Cos > -cos( const StaticVector< Size, Real >& a ) +const Containers::Expressions::StaticUnaryExpressionTemplate< Containers::StaticVector< Size, Real >, Containers::Expressions::Cos > +cos( const Containers::StaticVector< Size, Real >& a ) { - return Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Cos >( a ); + return Containers::Expressions::StaticUnaryExpressionTemplate< Containers::StaticVector< Size, Real >, Containers::Expressions::Cos >( a ); } //// // Tangent template< int Size, typename Real > __cuda_callable__ -const Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Tan > -tan( const StaticVector< Size, Real >& a ) +const Containers::Expressions::StaticUnaryExpressionTemplate< Containers::StaticVector< Size, Real >, Containers::Expressions::Tan > +tan( const Containers::StaticVector< Size, Real >& a ) { - return Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Tan >( a ); + return Containers::Expressions::StaticUnaryExpressionTemplate< Containers::StaticVector< Size, Real >, Containers::Expressions::Tan >( a ); } //// // Sqrt template< int Size, typename Real > __cuda_callable__ -const Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Sqrt > -sqrt( const StaticVector< Size, Real >& a ) +const Containers::Expressions::StaticUnaryExpressionTemplate< Containers::StaticVector< Size, Real >, Containers::Expressions::Sqrt > +sqrt( const Containers::StaticVector< Size, Real >& a ) { - return Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Sqrt >( a ); + return Containers::Expressions::StaticUnaryExpressionTemplate< Containers::StaticVector< Size, Real >, Containers::Expressions::Sqrt >( a ); } //// // Cbrt template< int Size, typename Real > __cuda_callable__ -const Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Cbrt > -cbrt( const StaticVector< Size, Real >& a ) +const Containers::Expressions::StaticUnaryExpressionTemplate< Containers::StaticVector< Size, Real >, Containers::Expressions::Cbrt > +cbrt( const Containers::StaticVector< Size, Real >& a ) { - return Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Cbrt >( a ); + return Containers::Expressions::StaticUnaryExpressionTemplate< Containers::StaticVector< Size, Real >, Containers::Expressions::Cbrt >( a ); } //// // Power template< int Size, typename Real, typename ExpType > __cuda_callable__ -const Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Pow, ExpType > -pow( const StaticVector< Size, Real >& a, const ExpType& exp ) +const Containers::Expressions::StaticUnaryExpressionTemplate< Containers::StaticVector< Size, Real >, Containers::Expressions::Pow, ExpType > +pow( const Containers::StaticVector< Size, Real >& a, const ExpType& exp ) { - return Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Pow, ExpType >( a, exp ); + return Containers::Expressions::StaticUnaryExpressionTemplate< Containers::StaticVector< Size, Real >, Containers::Expressions::Pow, ExpType >( a, exp ); } //// // Floor template< int Size, typename Real > __cuda_callable__ -const Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Floor > -floor( const StaticVector< Size, Real >& a ) +const Containers::Expressions::StaticUnaryExpressionTemplate< Containers::StaticVector< Size, Real >, Containers::Expressions::Floor > +floor( const Containers::StaticVector< Size, Real >& a ) { - return Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Floor >( a ); + return Containers::Expressions::StaticUnaryExpressionTemplate< Containers::StaticVector< Size, Real >, Containers::Expressions::Floor >( a ); } //// // Ceil template< int Size, typename Real > __cuda_callable__ -const Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Ceil > -ceil( const StaticVector< Size, Real >& a ) +const Containers::Expressions::StaticUnaryExpressionTemplate< Containers::StaticVector< Size, Real >, Containers::Expressions::Ceil > +ceil( const Containers::StaticVector< Size, Real >& a ) { - return Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Ceil >( a ); + return Containers::Expressions::StaticUnaryExpressionTemplate< Containers::StaticVector< Size, Real >, Containers::Expressions::Ceil >( a ); } //// // Acos template< int Size, typename Real > __cuda_callable__ -const Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Acos > -acos( const StaticVector< Size, Real >& a ) +const Containers::Expressions::StaticUnaryExpressionTemplate< Containers::StaticVector< Size, Real >, Containers::Expressions::Acos > +acos( const Containers::StaticVector< Size, Real >& a ) { - return Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Acos >( a ); + return Containers::Expressions::StaticUnaryExpressionTemplate< Containers::StaticVector< Size, Real >, Containers::Expressions::Acos >( a ); } //// // Asin template< int Size, typename Real > __cuda_callable__ -const Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Asin > -asin( const StaticVector< Size, Real >& a ) +const Containers::Expressions::StaticUnaryExpressionTemplate< Containers::StaticVector< Size, Real >, Containers::Expressions::Asin > +asin( const Containers::StaticVector< Size, Real >& a ) { - return Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Asin >( a ); + return Containers::Expressions::StaticUnaryExpressionTemplate< Containers::StaticVector< Size, Real >, Containers::Expressions::Asin >( a ); } //// // Atan template< int Size, typename Real > __cuda_callable__ -const Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Atan > -atan( const StaticVector< Size, Real >& a ) +const Containers::Expressions::StaticUnaryExpressionTemplate< Containers::StaticVector< Size, Real >, Containers::Expressions::Atan > +atan( const Containers::StaticVector< Size, Real >& a ) { - return Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Atan >( a ); + return Containers::Expressions::StaticUnaryExpressionTemplate< Containers::StaticVector< Size, Real >, Containers::Expressions::Atan >( a ); } //// // Cosh template< int Size, typename Real > __cuda_callable__ -const Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Cosh > -cosh( const StaticVector< Size, Real >& a ) +const Containers::Expressions::StaticUnaryExpressionTemplate< Containers::StaticVector< Size, Real >, Containers::Expressions::Cosh > +cosh( const Containers::StaticVector< Size, Real >& a ) { - return Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Cosh >( a ); + return Containers::Expressions::StaticUnaryExpressionTemplate< Containers::StaticVector< Size, Real >, Containers::Expressions::Cosh >( a ); } //// // Tanh template< int Size, typename Real > __cuda_callable__ -const Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Tanh > -tanh( const StaticVector< Size, Real >& a ) +const Containers::Expressions::StaticUnaryExpressionTemplate< Containers::StaticVector< Size, Real >, Containers::Expressions::Tanh > +tanh( const Containers::StaticVector< Size, Real >& a ) { - return Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Tanh >( a ); + return Containers::Expressions::StaticUnaryExpressionTemplate< Containers::StaticVector< Size, Real >, Containers::Expressions::Tanh >( a ); } //// // Log template< int Size, typename Real > __cuda_callable__ -const Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Log > -log( const StaticVector< Size, Real >& a ) +const Containers::Expressions::StaticUnaryExpressionTemplate< Containers::StaticVector< Size, Real >, Containers::Expressions::Log > +log( const Containers::StaticVector< Size, Real >& a ) { - return Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Log >( a ); + return Containers::Expressions::StaticUnaryExpressionTemplate< Containers::StaticVector< Size, Real >, Containers::Expressions::Log >( a ); } //// // Log10 template< int Size, typename Real > __cuda_callable__ -const Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Log10 > -log10( const StaticVector< Size, Real >& a ) +const Containers::Expressions::StaticUnaryExpressionTemplate< Containers::StaticVector< Size, Real >, Containers::Expressions::Log10 > +log10( const Containers::StaticVector< Size, Real >& a ) { - return Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Log10 >( a ); + return Containers::Expressions::StaticUnaryExpressionTemplate< Containers::StaticVector< Size, Real >, Containers::Expressions::Log10 >( a ); } //// // Log2 template< int Size, typename Real > __cuda_callable__ -const Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Log2 > -log2( const StaticVector< Size, Real >& a ) +const Containers::Expressions::StaticUnaryExpressionTemplate< Containers::StaticVector< Size, Real >, Containers::Expressions::Log2 > +log2( const Containers::StaticVector< Size, Real >& a ) { - return Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Log2 >( a ); + return Containers::Expressions::StaticUnaryExpressionTemplate< Containers::StaticVector< Size, Real >, Containers::Expressions::Log2 >( a ); } //// // Exp template< int Size, typename Real > __cuda_callable__ -const Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Exp > -exp( const StaticVector< Size, Real >& a ) +const Containers::Expressions::StaticUnaryExpressionTemplate< Containers::StaticVector< Size, Real >, Containers::Expressions::Exp > +exp( const Containers::StaticVector< Size, Real >& a ) { - return Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Exp >( a ); + return Containers::Expressions::StaticUnaryExpressionTemplate< Containers::StaticVector< Size, Real >, Containers::Expressions::Exp >( a ); } //// // Sign template< int Size, typename Real > __cuda_callable__ -const Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Sign > -sign( const StaticVector< Size, Real >& a ) +const Containers::Expressions::StaticUnaryExpressionTemplate< Containers::StaticVector< Size, Real >, Containers::Expressions::Sign > +sign( const Containers::StaticVector< Size, Real >& a ) +{ + return Containers::Expressions::StaticUnaryExpressionTemplate< Containers::StaticVector< Size, Real >, Containers::Expressions::Sign >( a ); +} + +//// +// Vertical operations - min +template< int Size, typename Real > +__cuda_callable__ +typename Containers::StaticVector< Size, Real >::RealType +min( const Containers::StaticVector< Size, Real >& a ) +{ + return Containers::Expressions::StaticExpressionMin( a ); +} + +template< int Size, typename Real > +__cuda_callable__ +typename Containers::StaticVector< Size, Real >::RealType +max( const Containers::StaticVector< Size, Real >& a ) +{ + return Containers::Expressions::StaticExpressionMax( a ); +} + +template< int Size, typename Real > +__cuda_callable__ +typename Containers::StaticVector< Size, Real >::RealType +sum( const Containers::StaticVector< Size, Real >& a ) +{ + return Containers::Expressions::StaticExpressionSum( a ); +} + +template< int Size, typename Real, typename Real2 > +__cuda_callable__ +typename Containers::StaticVector< Size, Real >::RealType +lpNorm( const Containers::StaticVector< Size, Real >& a, const Real2& p ) +{ + return Containers::Expressions::StaticExpressionLpNorm( a, p ); +} + +template< int Size, typename Real > +__cuda_callable__ +typename Containers::StaticVector< Size, Real >::RealType +product( const Containers::StaticVector< Size, Real >& a ) +{ + return Containers::Expressions::StaticExpressionProduct( a ); +} + +template< int Size, typename Real > +__cuda_callable__ +bool +logicalOr( const Containers::StaticVector< Size, Real >& a ) { - return Expressions::StaticUnaryExpressionTemplate< StaticVector< Size, Real >, Expressions::Sign >( a ); + return Containers::Expressions::StaticExpressionLogicalOr( a ); } +template< int Size, typename Real > +__cuda_callable__ +typename Containers::StaticVector< Size, Real >::RealType +binaryOr( const Containers::StaticVector< Size, Real >& a ) +{ + return Containers::Expressions::StaticExpressionBinaryOr( a ); +} + +template< int Size, typename Real > +__cuda_callable__ +bool +logicalAnd( const Containers::StaticVector< Size, Real >& a ) +{ + return Containers::Expressions::StaticExpressionLogicalAnd( a ); +} + +template< int Size, typename Real > +__cuda_callable__ +typename Containers::StaticVector< Size, Real >::RealType +binaryAnd( const Containers::StaticVector< Size, Real >& a ) +{ + return Containers::Expressions::StaticExpressionBinaryAnd( a ); +} //// // TODO: Replace this with multiplication when its safe template< int Size, typename Real, typename ET > __cuda_callable__ -StaticVector< Size, Real > -Scale( const StaticVector< Size, Real >& a, const ET& b ) +Containers::StaticVector< Size, Real > +Scale( const Containers::StaticVector< Size, Real >& a, const ET& b ) { - StaticVector< Size, Real > result = Expressions::StaticBinaryExpressionTemplate< StaticVector< Size, Real >, ET, Expressions::Multiplication >( a, b ); + Containers::StaticVector< Size, Real > result = Containers::Expressions::StaticBinaryExpressionTemplate< Containers::StaticVector< Size, Real >, ET, Containers::Expressions::Multiplication >( a, b ); return result; } template< typename ET, int Size, typename Real > __cuda_callable__ -Expressions::StaticBinaryExpressionTemplate< ET, StaticVector< Size, Real >, Expressions::Multiplication > -Scale( const ET& a, const StaticVector< Size, Real >& b ) +Containers::Expressions::StaticBinaryExpressionTemplate< ET, Containers::StaticVector< Size, Real >, Containers::Expressions::Multiplication > +Scale( const ET& a, const Containers::StaticVector< Size, Real >& b ) { - StaticVector< Size, Real > result = Expressions::StaticBinaryExpressionTemplate< ET, StaticVector< Size, Real >, Expressions::Multiplication >( a, b ); + Containers::StaticVector< Size, Real > result = Containers::Expressions::StaticBinaryExpressionTemplate< ET, Containers::StaticVector< Size, Real >, Containers::Expressions::Multiplication >( a, b ); return result; } template< int Size, typename Real1, typename Real2 > __cuda_callable__ -Expressions::StaticBinaryExpressionTemplate< StaticVector< Size, Real1 >, StaticVector< Size, Real2 >, Expressions::Multiplication > -Scale( const StaticVector< Size, Real1 >& a, const StaticVector< Size, Real2 >& b ) +Containers::Expressions::StaticBinaryExpressionTemplate< Containers::StaticVector< Size, Real1 >, Containers::StaticVector< Size, Real2 >, Containers::Expressions::Multiplication > +Scale( const Containers::StaticVector< Size, Real1 >& a, const Containers::StaticVector< Size, Real2 >& b ) { - StaticVector< Size, Real1 > result = Expressions::StaticBinaryExpressionTemplate< StaticVector< Size, Real1 >, StaticVector< Size, Real2 >, Expressions::Multiplication >( a, b ); + Containers::StaticVector< Size, Real1 > result = Containers::Expressions::StaticBinaryExpressionTemplate< Containers::StaticVector< Size, Real1 >, Containers::StaticVector< Size, Real2 >, Containers::Expressions::Multiplication >( a, b ); return result; } - } //namespace Containers } // namespace TNL diff --git a/src/TNL/Containers/VectorViewExpressions.h b/src/TNL/Containers/VectorViewExpressions.h index f807a7ba2..077a6651a 100644 --- a/src/TNL/Containers/VectorViewExpressions.h +++ b/src/TNL/Containers/VectorViewExpressions.h @@ -1,5 +1,5 @@ /*************************************************************************** - VectorViewExpressions.h - description + Containers::VectorViewExpressions.h - description ------------------- begin : Apr 27, 2019 copyright : (C) 2019 by Tomas Oberhuber @@ -13,502 +13,506 @@ #include #include #include +#include namespace TNL { - namespace Containers { + +//// +// All operations are supposed to be in namespace TNL +// namespace Containers { //// // Addition template< typename Real, typename Device, typename Index, typename ET > __cuda_callable__ -const Expressions::BinaryExpressionTemplate< VectorView< Real, Device, Index >, ET, Expressions::Addition > -operator+( const VectorView< Real, Device, Index >& a, const ET& b ) +const Containers::Expressions::BinaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, ET, Containers::Expressions::Addition > +operator+( const Containers::VectorView< Real, Device, Index >& a, const ET& b ) { - return Expressions::BinaryExpressionTemplate< VectorView< Real, Device, Index >, ET, Expressions::Addition >( a, b ); + return Containers::Expressions::BinaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, ET, Containers::Expressions::Addition >( a, b ); } template< typename ET, typename Real, typename Device, typename Index > __cuda_callable__ -const Expressions::BinaryExpressionTemplate< ET, VectorView< Real, Device, Index >, Expressions::Addition > -operator+( const ET& a, const VectorView< Real, Device, Index >& b ) +const Containers::Expressions::BinaryExpressionTemplate< ET, Containers::VectorView< Real, Device, Index >, Containers::Expressions::Addition > +operator+( const ET& a, const Containers::VectorView< Real, Device, Index >& b ) { - return Expressions::BinaryExpressionTemplate< ET, VectorView< Real, Device, Index >, Expressions::Addition >( a, b ); + return Containers::Expressions::BinaryExpressionTemplate< ET, Containers::VectorView< Real, Device, Index >, Containers::Expressions::Addition >( a, b ); } template< typename Real1, typename Real2, typename Device, typename Index > __cuda_callable__ -const Expressions::BinaryExpressionTemplate< VectorView< Real1, Device, Index >, VectorView< Real2, Device, Index >, Expressions::Addition > -operator+( const VectorView< Real1, Device, Index >& a, const VectorView< Real2, Device, Index >& b ) +const Containers::Expressions::BinaryExpressionTemplate< Containers::VectorView< Real1, Device, Index >, Containers::VectorView< Real2, Device, Index >, Containers::Expressions::Addition > +operator+( const Containers::VectorView< Real1, Device, Index >& a, const Containers::VectorView< Real2, Device, Index >& b ) { - return Expressions::BinaryExpressionTemplate< VectorView< Real1, Device, Index >, VectorView< Real2, Device, Index >, Expressions::Addition >( a, b ); + return Containers::Expressions::BinaryExpressionTemplate< Containers::VectorView< Real1, Device, Index >, Containers::VectorView< Real2, Device, Index >, Containers::Expressions::Addition >( a, b ); } //// // Subtraction template< typename Real, typename Device, typename Index, typename ET > __cuda_callable__ -const Expressions::BinaryExpressionTemplate< VectorView< Real, Device, Index >, ET, Expressions::Subtraction > -operator-( const VectorView< Real, Device, Index >& a, const ET& b ) +const Containers::Expressions::BinaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, ET, Containers::Expressions::Subtraction > +operator-( const Containers::VectorView< Real, Device, Index >& a, const ET& b ) { - return Expressions::BinaryExpressionTemplate< VectorView< Real, Device, Index >, ET, Expressions::Subtraction >( a, b ); + return Containers::Expressions::BinaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, ET, Containers::Expressions::Subtraction >( a, b ); } template< typename ET, typename Real, typename Device, typename Index > __cuda_callable__ -const Expressions::BinaryExpressionTemplate< ET, VectorView< Real, Device, Index >, Expressions::Subtraction > -operator-( const ET& a, const VectorView< Real, Device, Index >& b ) +const Containers::Expressions::BinaryExpressionTemplate< ET, Containers::VectorView< Real, Device, Index >, Containers::Expressions::Subtraction > +operator-( const ET& a, const Containers::VectorView< Real, Device, Index >& b ) { - return Expressions::BinaryExpressionTemplate< ET, VectorView< Real, Device, Index >, Expressions::Subtraction >( a, b ); + return Containers::Expressions::BinaryExpressionTemplate< ET, Containers::VectorView< Real, Device, Index >, Containers::Expressions::Subtraction >( a, b ); } template< typename Real1, typename Real2, typename Device, typename Index > __cuda_callable__ -const Expressions::BinaryExpressionTemplate< VectorView< Real1, Device, Index >, VectorView< Real2, Device, Index >, Expressions::Subtraction > -operator-( const VectorView< Real1, Device, Index >& a, const VectorView< Real2, Device, Index >& b ) +const Containers::Expressions::BinaryExpressionTemplate< Containers::VectorView< Real1, Device, Index >, Containers::VectorView< Real2, Device, Index >, Containers::Expressions::Subtraction > +operator-( const Containers::VectorView< Real1, Device, Index >& a, const Containers::VectorView< Real2, Device, Index >& b ) { - return Expressions::BinaryExpressionTemplate< VectorView< Real1, Device, Index >, VectorView< Real2, Device, Index >, Expressions::Subtraction >( a, b ); + return Containers::Expressions::BinaryExpressionTemplate< Containers::VectorView< Real1, Device, Index >, Containers::VectorView< Real2, Device, Index >, Containers::Expressions::Subtraction >( a, b ); } //// // Multiplication template< typename Real, typename Device, typename Index, typename ET > __cuda_callable__ -const Expressions::BinaryExpressionTemplate< VectorView< Real, Device, Index >, ET, Expressions::Multiplication > -operator*( const VectorView< Real, Device, Index >& a, const ET& b ) +const Containers::Expressions::BinaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, ET, Containers::Expressions::Multiplication > +operator*( const Containers::VectorView< Real, Device, Index >& a, const ET& b ) { - return Expressions::BinaryExpressionTemplate< VectorView< Real, Device, Index >, ET, Expressions::Multiplication >( a, b ); + return Containers::Expressions::BinaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, ET, Containers::Expressions::Multiplication >( a, b ); } template< typename ET, typename Real, typename Device, typename Index > __cuda_callable__ -const Expressions::BinaryExpressionTemplate< ET, VectorView< Real, Device, Index >, Expressions::Multiplication > -operator*( const ET& a, const VectorView< Real, Device, Index >& b ) +const Containers::Expressions::BinaryExpressionTemplate< ET, Containers::VectorView< Real, Device, Index >, Containers::Expressions::Multiplication > +operator*( const ET& a, const Containers::VectorView< Real, Device, Index >& b ) { - return Expressions::BinaryExpressionTemplate< ET, VectorView< Real, Device, Index >, Expressions::Multiplication >( a, b ); + return Containers::Expressions::BinaryExpressionTemplate< ET, Containers::VectorView< Real, Device, Index >, Containers::Expressions::Multiplication >( a, b ); } template< typename Real1, typename Real2, typename Device, typename Index > __cuda_callable__ -const Expressions::BinaryExpressionTemplate< VectorView< Real1, Device, Index >, VectorView< Real2, Device, Index >, Expressions::Multiplication > -operator*( const VectorView< Real1, Device, Index >& a, const VectorView< Real2, Device, Index >& b ) +const Containers::Expressions::BinaryExpressionTemplate< Containers::VectorView< Real1, Device, Index >, Containers::VectorView< Real2, Device, Index >, Containers::Expressions::Multiplication > +operator*( const Containers::VectorView< Real1, Device, Index >& a, const Containers::VectorView< Real2, Device, Index >& b ) { - return Expressions::BinaryExpressionTemplate< VectorView< Real1, Device, Index >, VectorView< Real2, Device, Index >, Expressions::Multiplication >( a, b ); + return Containers::Expressions::BinaryExpressionTemplate< Containers::VectorView< Real1, Device, Index >, Containers::VectorView< Real2, Device, Index >, Containers::Expressions::Multiplication >( a, b ); } //// // Division template< typename Real, typename Device, typename Index, typename ET > __cuda_callable__ -const Expressions::BinaryExpressionTemplate< VectorView< Real, Device, Index >, ET, Expressions::Division > -operator/( const VectorView< Real, Device, Index >& a, const ET& b ) +const Containers::Expressions::BinaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, ET, Containers::Expressions::Division > +operator/( const Containers::VectorView< Real, Device, Index >& a, const ET& b ) { - return Expressions::BinaryExpressionTemplate< VectorView< Real, Device, Index >, ET, Expressions::Division >( a, b ); + return Containers::Expressions::BinaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, ET, Containers::Expressions::Division >( a, b ); } template< typename ET, typename Real, typename Device, typename Index > __cuda_callable__ -const Expressions::BinaryExpressionTemplate< ET, VectorView< Real, Device, Index >, Expressions::Division > -operator/( const ET& a, const VectorView< Real, Device, Index >& b ) +const Containers::Expressions::BinaryExpressionTemplate< ET, Containers::VectorView< Real, Device, Index >, Containers::Expressions::Division > +operator/( const ET& a, const Containers::VectorView< Real, Device, Index >& b ) { - return Expressions::BinaryExpressionTemplate< ET, VectorView< Real, Device, Index >, Expressions::Division >( a, b ); + return Containers::Expressions::BinaryExpressionTemplate< ET, Containers::VectorView< Real, Device, Index >, Containers::Expressions::Division >( a, b ); } template< typename Real1, typename Real2, typename Device, typename Index > __cuda_callable__ -const Expressions::BinaryExpressionTemplate< VectorView< Real1, Device, Index >, VectorView< Real2, Device, Index >, Expressions::Division > -operator/( const VectorView< Real1, Device, Index >& a, const VectorView< Real2, Device, Index >& b ) +const Containers::Expressions::BinaryExpressionTemplate< Containers::VectorView< Real1, Device, Index >, Containers::VectorView< Real2, Device, Index >, Containers::Expressions::Division > +operator/( const Containers::VectorView< Real1, Device, Index >& a, const Containers::VectorView< Real2, Device, Index >& b ) { - return Expressions::BinaryExpressionTemplate< VectorView< Real1, Device, Index >, VectorView< Real2, Device, Index >, Expressions::Division >( a, b ); + return Containers::Expressions::BinaryExpressionTemplate< Containers::VectorView< Real1, Device, Index >, Containers::VectorView< Real2, Device, Index >, Containers::Expressions::Division >( a, b ); } //// // Min template< typename Real, typename Device, typename Index, typename ET > __cuda_callable__ -const Expressions::BinaryExpressionTemplate< VectorView< Real, Device, Index >, ET, Expressions::Min > -min( const VectorView< Real, Device, Index >& a, const ET& b ) +const Containers::Expressions::BinaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, ET, Containers::Expressions::Min > +min( const Containers::VectorView< Real, Device, Index >& a, const ET& b ) { - return Expressions::BinaryExpressionTemplate< VectorView< Real, Device, Index >, ET, Expressions::Min >( a, b ); + return Containers::Expressions::BinaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, ET, Containers::Expressions::Min >( a, b ); } template< typename ET, typename Real, typename Device, typename Index > __cuda_callable__ -const Expressions::BinaryExpressionTemplate< ET, VectorView< Real, Device, Index >, Expressions::Min > -min( const ET& a, const VectorView< Real, Device, Index >& b ) +const Containers::Expressions::BinaryExpressionTemplate< ET, Containers::VectorView< Real, Device, Index >, Containers::Expressions::Min > +min( const ET& a, const Containers::VectorView< Real, Device, Index >& b ) { - return Expressions::BinaryExpressionTemplate< ET, VectorView< Real, Device, Index >, Expressions::Min >( a, b ); + return Containers::Expressions::BinaryExpressionTemplate< ET, Containers::VectorView< Real, Device, Index >, Containers::Expressions::Min >( a, b ); } template< typename Real1, typename Real2, typename Device, typename Index > __cuda_callable__ -const Expressions::BinaryExpressionTemplate< VectorView< Real1, Device, Index >, VectorView< Real2, Device, Index >, Expressions::Min > -min( const VectorView< Real1, Device, Index >& a, const VectorView< Real2, Device, Index >& b ) +const Containers::Expressions::BinaryExpressionTemplate< Containers::VectorView< Real1, Device, Index >, Containers::VectorView< Real2, Device, Index >, Containers::Expressions::Min > +min( const Containers::VectorView< Real1, Device, Index >& a, const Containers::VectorView< Real2, Device, Index >& b ) { - return Expressions::BinaryExpressionTemplate< VectorView< Real1, Device, Index >, VectorView< Real2, Device, Index >, Expressions::Min >( a, b ); + return Containers::Expressions::BinaryExpressionTemplate< Containers::VectorView< Real1, Device, Index >, Containers::VectorView< Real2, Device, Index >, Containers::Expressions::Min >( a, b ); } //// // Max template< typename Real, typename Device, typename Index, typename ET > __cuda_callable__ -const Expressions::BinaryExpressionTemplate< VectorView< Real, Device, Index >, ET, Expressions::Max > -max( const VectorView< Real, Device, Index >& a, const ET& b ) +const Containers::Expressions::BinaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, ET, Containers::Expressions::Max > +max( const Containers::VectorView< Real, Device, Index >& a, const ET& b ) { - return Expressions::BinaryExpressionTemplate< VectorView< Real, Device, Index >, ET, Expressions::Max >( a, b ); + return Containers::Expressions::BinaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, ET, Containers::Expressions::Max >( a, b ); } template< typename ET, typename Real, typename Device, typename Index > __cuda_callable__ -const Expressions::BinaryExpressionTemplate< ET, VectorView< Real, Device, Index >, Expressions::Max > -max( const ET& a, const VectorView< Real, Device, Index >& b ) +const Containers::Expressions::BinaryExpressionTemplate< ET, Containers::VectorView< Real, Device, Index >, Containers::Expressions::Max > +max( const ET& a, const Containers::VectorView< Real, Device, Index >& b ) { - return Expressions::BinaryExpressionTemplate< ET, VectorView< Real, Device, Index >, Expressions::Max >( a, b ); + return Containers::Expressions::BinaryExpressionTemplate< ET, Containers::VectorView< Real, Device, Index >, Containers::Expressions::Max >( a, b ); } template< typename Real1, typename Real2, typename Device, typename Index > __cuda_callable__ -const Expressions::BinaryExpressionTemplate< VectorView< Real1, Device, Index >, VectorView< Real2, Device, Index >, Expressions::Max > -max( const VectorView< Real1, Device, Index >& a, const VectorView< Real2, Device, Index >& b ) +const Containers::Expressions::BinaryExpressionTemplate< Containers::VectorView< Real1, Device, Index >, Containers::VectorView< Real2, Device, Index >, Containers::Expressions::Max > +max( const Containers::VectorView< Real1, Device, Index >& a, const Containers::VectorView< Real2, Device, Index >& b ) { - return Expressions::BinaryExpressionTemplate< VectorView< Real1, Device, Index >, VectorView< Real2, Device, Index >, Expressions::Max >( a, b ); + return Containers::Expressions::BinaryExpressionTemplate< Containers::VectorView< Real1, Device, Index >, Containers::VectorView< Real2, Device, Index >, Containers::Expressions::Max >( a, b ); } //// // Comparison operations - operator == template< typename Real, typename Device, typename Index, typename ET > __cuda_callable__ -bool operator==( const VectorView< Real, Device, Index >& a, const ET& b ) +bool operator==( const Containers::VectorView< Real, Device, Index >& a, const ET& b ) { - return Expressions::ComparisonEQ( a, b ); + return Containers::Expressions::ComparisonEQ( a, b ); } template< typename ET, typename Real, typename Device, typename Index > __cuda_callable__ -bool operator==( const ET& a, const VectorView< Real, Device, Index >& b ) +bool operator==( const ET& a, const Containers::VectorView< Real, Device, Index >& b ) { - return Expressions::ComparisonEQ( a, b ); + return Containers::Expressions::ComparisonEQ( a, b ); } template< typename Real1, typename Real2, typename Device, typename Index > __cuda_callable__ -bool operator==( const VectorView< Real1, Device, Index >& a, const VectorView< Real2, Device, Index >& b ) +bool operator==( const Containers::VectorView< Real1, Device, Index >& a, const Containers::VectorView< Real2, Device, Index >& b ) { - return Expressions::ComparisonEQ( a, b ); + return Containers::Expressions::ComparisonEQ( a, b ); } //// // Comparison operations - operator != template< typename Real, typename Device, typename Index, typename ET > __cuda_callable__ -bool operator!=( const VectorView< Real, Device, Index >& a, const ET& b ) +bool operator!=( const Containers::VectorView< Real, Device, Index >& a, const ET& b ) { - return Expressions::ComparisonNE( a, b ); + return Containers::Expressions::ComparisonNE( a, b ); } template< typename ET, typename Real, typename Device, typename Index > __cuda_callable__ -bool operator!=( const ET& a, const VectorView< Real, Device, Index >& b ) +bool operator!=( const ET& a, const Containers::VectorView< Real, Device, Index >& b ) { - return Expressions::ComparisonNE( a, b ); + return Containers::Expressions::ComparisonNE( a, b ); } template< typename Real1, typename Real2, typename Device, typename Index > __cuda_callable__ -bool operator!=( const VectorView< Real1, Device, Index >& a, const VectorView< Real2, Device, Index >& b ) +bool operator!=( const Containers::VectorView< Real1, Device, Index >& a, const Containers::VectorView< Real2, Device, Index >& b ) { - return Expressions::ComparisonNE( a, b ); + return Containers::Expressions::ComparisonNE( a, b ); } //// // Comparison operations - operator < template< typename Real, typename Device, typename Index, typename ET > __cuda_callable__ -bool operator<( const VectorView< Real, Device, Index >& a, const ET& b ) +bool operator<( const Containers::VectorView< Real, Device, Index >& a, const ET& b ) { - return Expressions::ComparisonLT( a, b ); + return Containers::Expressions::ComparisonLT( a, b ); } template< typename ET, typename Real, typename Device, typename Index > __cuda_callable__ -bool operator<( const ET& a, const VectorView< Real, Device, Index >& b ) +bool operator<( const ET& a, const Containers::VectorView< Real, Device, Index >& b ) { - return Expressions::ComparisonLT( a, b ); + return Containers::Expressions::ComparisonLT( a, b ); } template< typename Real1, typename Real2, typename Device, typename Index > __cuda_callable__ -bool operator<( const VectorView< Real1, Device, Index >& a, const VectorView< Real2, Device, Index >& b ) +bool operator<( const Containers::VectorView< Real1, Device, Index >& a, const Containers::VectorView< Real2, Device, Index >& b ) { - return Expressions::ComparisonLT( a, b ); + return Containers::Expressions::ComparisonLT( a, b ); } //// // Comparison operations - operator <= template< typename Real, typename Device, typename Index, typename ET > __cuda_callable__ -bool operator<=( const VectorView< Real, Device, Index >& a, const ET& b ) +bool operator<=( const Containers::VectorView< Real, Device, Index >& a, const ET& b ) { - return Expressions::ComparisonLE( a, b ); + return Containers::Expressions::ComparisonLE( a, b ); } template< typename ET, typename Real, typename Device, typename Index > __cuda_callable__ -bool operator<=( const ET& a, const VectorView< Real, Device, Index >& b ) +bool operator<=( const ET& a, const Containers::VectorView< Real, Device, Index >& b ) { - return Expressions::ComparisonLE( a, b ); + return Containers::Expressions::ComparisonLE( a, b ); } template< typename Real1, typename Real2, typename Device, typename Index > __cuda_callable__ -bool operator<=( const VectorView< Real1, Device, Index >& a, const VectorView< Real2, Device, Index >& b ) +bool operator<=( const Containers::VectorView< Real1, Device, Index >& a, const Containers::VectorView< Real2, Device, Index >& b ) { - return Expressions::ComparisonLE( a, b ); + return Containers::Expressions::ComparisonLE( a, b ); } //// // Comparison operations - operator > template< typename Real, typename Device, typename Index, typename ET > __cuda_callable__ -bool operator>( const VectorView< Real, Device, Index >& a, const ET& b ) +bool operator>( const Containers::VectorView< Real, Device, Index >& a, const ET& b ) { - return Expressions::ComparisonGT( a, b ); + return Containers::Expressions::ComparisonGT( a, b ); } template< typename ET, typename Real, typename Device, typename Index > __cuda_callable__ -bool operator>( const ET& a, const VectorView< Real, Device, Index >& b ) +bool operator>( const ET& a, const Containers::VectorView< Real, Device, Index >& b ) { - return Expressions::ComparisonGT( a, b ); + return Containers::Expressions::ComparisonGT( a, b ); } template< typename Real1, typename Real2, typename Device, typename Index > __cuda_callable__ -bool operator>( const VectorView< Real1, Device, Index >& a, const VectorView< Real2, Device, Index >& b ) +bool operator>( const Containers::VectorView< Real1, Device, Index >& a, const Containers::VectorView< Real2, Device, Index >& b ) { - return Expressions::ComparisonGT( a, b ); + return Containers::Expressions::ComparisonGT( a, b ); } //// // Comparison operations - operator >= template< typename Real, typename Device, typename Index, typename ET > __cuda_callable__ -bool operator>=( const VectorView< Real, Device, Index >& a, const ET& b ) +bool operator>=( const Containers::VectorView< Real, Device, Index >& a, const ET& b ) { - return Expressions::ComparisonGE( a, b ); + return Containers::Expressions::ComparisonGE( a, b ); } template< typename ET, typename Real, typename Device, typename Index > __cuda_callable__ -bool operator>=( const ET& a, const VectorView< Real, Device, Index >& b ) +bool operator>=( const ET& a, const Containers::VectorView< Real, Device, Index >& b ) { - return Expressions::ComparisonGE( a, b ); + return Containers::Expressions::ComparisonGE( a, b ); } template< typename Real1, typename Real2, typename Device, typename Index > __cuda_callable__ -bool operator>=( const VectorView< Real1, Device, Index >& a, const VectorView< Real2, Device, Index >& b ) +bool operator>=( const Containers::VectorView< Real1, Device, Index >& a, const Containers::VectorView< Real2, Device, Index >& b ) { - return Expressions::ComparisonGE( a, b ); + return Containers::Expressions::ComparisonGE( a, b ); } //// // Minus template< typename Real, typename Device, typename Index > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Minus > -operator-( const VectorView< Real, Device, Index >& a ) +const Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Minus > +operator-( const Containers::VectorView< Real, Device, Index >& a ) { - return Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Minus >( a ); + return Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Minus >( a ); } //// // Abs template< typename Real, typename Device, typename Index > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Abs > -abs( const VectorView< Real, Device, Index >& a ) +const Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Abs > +abs( const Containers::VectorView< Real, Device, Index >& a ) { - return Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Abs >( a ); + return Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Abs >( a ); } //// // Sine template< typename Real, typename Device, typename Index > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Sin > -sin( const VectorView< Real, Device, Index >& a ) +const Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Sin > +sin( const Containers::VectorView< Real, Device, Index >& a ) { - return Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Sin >( a ); + return Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Sin >( a ); } //// // Cosine template< typename Real, typename Device, typename Index > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Cos > -cos( const VectorView< Real, Device, Index >& a ) +const Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Cos > +cos( const Containers::VectorView< Real, Device, Index >& a ) { - return Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Cos >( a ); + return Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Cos >( a ); } //// // Tangent template< typename Real, typename Device, typename Index > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Tan > -tan( const VectorView< Real, Device, Index >& a ) +const Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Tan > +tan( const Containers::VectorView< Real, Device, Index >& a ) { - return Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Tan >( a ); + return Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Tan >( a ); } //// // Sqrt template< typename Real, typename Device, typename Index > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Sqrt > -sqrt( const VectorView< Real, Device, Index >& a ) +const Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Sqrt > +sqrt( const Containers::VectorView< Real, Device, Index >& a ) { - return Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Sqrt >( a ); + return Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Sqrt >( a ); } //// // Cbrt template< typename Real, typename Device, typename Index > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Cbrt > -cbrt( const VectorView< Real, Device, Index >& a ) +const Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Cbrt > +cbrt( const Containers::VectorView< Real, Device, Index >& a ) { - return Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Cbrt >( a ); + return Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Cbrt >( a ); } //// // Power template< typename Real, typename Device, typename Index, typename ExpType > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Pow, ExpType > -pow( const VectorView< Real, Device, Index >& a, const ExpType& exp ) +const Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Pow, ExpType > +pow( const Containers::VectorView< Real, Device, Index >& a, const ExpType& exp ) { - return Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Pow, ExpType >( a, exp ); + return Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Pow, ExpType >( a, exp ); } //// // Floor template< typename Real, typename Device, typename Index > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Floor > -floor( const VectorView< Real, Device, Index >& a ) +const Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Floor > +floor( const Containers::VectorView< Real, Device, Index >& a ) { - return Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Floor >( a ); + return Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Floor >( a ); } //// // Ceil template< typename Real, typename Device, typename Index > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Ceil > -ceil( const VectorView< Real, Device, Index >& a ) +const Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Ceil > +ceil( const Containers::VectorView< Real, Device, Index >& a ) { - return Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Ceil >( a ); + return Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Ceil >( a ); } //// // Acos template< typename Real, typename Device, typename Index > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Acos > -acos( const VectorView< Real, Device, Index >& a ) +const Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Acos > +acos( const Containers::VectorView< Real, Device, Index >& a ) { - return Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Acos >( a ); + return Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Acos >( a ); } //// // Asin template< typename Real, typename Device, typename Index > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Asin > -asin( const VectorView< Real, Device, Index >& a ) +const Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Asin > +asin( const Containers::VectorView< Real, Device, Index >& a ) { - return Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Asin >( a ); + return Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Asin >( a ); } //// // Atan template< typename Real, typename Device, typename Index > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Atan > -atan( const VectorView< Real, Device, Index >& a ) +const Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Atan > +atan( const Containers::VectorView< Real, Device, Index >& a ) { - return Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Atan >( a ); + return Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Atan >( a ); } //// // Cosh template< typename Real, typename Device, typename Index > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Cosh > -cosh( const VectorView< Real, Device, Index >& a ) +const Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Cosh > +cosh( const Containers::VectorView< Real, Device, Index >& a ) { - return Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Cosh >( a ); + return Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Cosh >( a ); } //// // Tanh template< typename Real, typename Device, typename Index > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Tanh > -tanh( const VectorView< Real, Device, Index >& a ) +const Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Tanh > +tanh( const Containers::VectorView< Real, Device, Index >& a ) { - return Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Tanh >( a ); + return Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Tanh >( a ); } //// // Log template< typename Real, typename Device, typename Index > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Log > -log( const VectorView< Real, Device, Index >& a ) +const Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Log > +log( const Containers::VectorView< Real, Device, Index >& a ) { - return Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Log >( a ); + return Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Log >( a ); } //// // Log10 template< typename Real, typename Device, typename Index > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Log10 > -log10( const VectorView< Real, Device, Index >& a ) +const Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Log10 > +log10( const Containers::VectorView< Real, Device, Index >& a ) { - return Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Log10 >( a ); + return Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Log10 >( a ); } //// // Log2 template< typename Real, typename Device, typename Index > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Log2 > -log2( const VectorView< Real, Device, Index >& a ) +const Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Log2 > +log2( const Containers::VectorView< Real, Device, Index >& a ) { - return Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Log2 >( a ); + return Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Log2 >( a ); } //// // Exp template< typename Real, typename Device, typename Index > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Exp > -exp( const VectorView< Real, Device, Index >& a ) +const Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Exp > +exp( const Containers::VectorView< Real, Device, Index >& a ) { - return Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Exp >( a ); + return Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Exp >( a ); } //// // Sign template< typename Real, typename Device, typename Index > __cuda_callable__ -const Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Sign > -sign( const VectorView< Real, Device, Index >& a ) +const Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Sign > +sign( const Containers::VectorView< Real, Device, Index >& a ) { - return Expressions::UnaryExpressionTemplate< VectorView< Real, Device, Index >, Expressions::Sign >( a ); + return Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Sign >( a ); } @@ -516,31 +520,113 @@ sign( const VectorView< Real, Device, Index >& a ) // TODO: Replace this with multiplication when its safe template< typename Real, typename Device, typename Index, typename ET > __cuda_callable__ -VectorView< Real, Device, Index > -Scale( const VectorView< Real, Device, Index >& a, const ET& b ) +Containers::VectorView< Real, Device, Index > +Scale( const Containers::VectorView< Real, Device, Index >& a, const ET& b ) { - VectorView< Real, Device, Index > result = Expressions::BinaryExpressionTemplate< VectorView< Real, Device, Index >, ET, Expressions::Multiplication >( a, b ); + Containers::VectorView< Real, Device, Index > result = Containers::Expressions::BinaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, ET, Containers::Expressions::Multiplication >( a, b ); return result; } template< typename ET, typename Real, typename Device, typename Index > __cuda_callable__ -Expressions::BinaryExpressionTemplate< ET, VectorView< Real, Device, Index >, Expressions::Multiplication > -Scale( const ET& a, const VectorView< Real, Device, Index >& b ) +Containers::Expressions::BinaryExpressionTemplate< ET, Containers::VectorView< Real, Device, Index >, Containers::Expressions::Multiplication > +Scale( const ET& a, const Containers::VectorView< Real, Device, Index >& b ) { - VectorView< Real, Device, Index > result = Expressions::BinaryExpressionTemplate< ET, VectorView< Real, Device, Index >, Expressions::Multiplication >( a, b ); + Containers::VectorView< Real, Device, Index > result = Containers::Expressions::BinaryExpressionTemplate< ET, Containers::VectorView< Real, Device, Index >, Containers::Expressions::Multiplication >( a, b ); return result; } template< typename Real1, typename Real2, typename Device, typename Index > __cuda_callable__ -Expressions::BinaryExpressionTemplate< VectorView< Real1, Device, Index >, VectorView< Real2, Device, Index >, Expressions::Multiplication > -Scale( const VectorView< Real1, Device, Index >& a, const VectorView< Real2, Device, Index >& b ) +Containers::Expressions::BinaryExpressionTemplate< Containers::VectorView< Real1, Device, Index >, Containers::VectorView< Real2, Device, Index >, Containers::Expressions::Multiplication > +Scale( const Containers::VectorView< Real1, Device, Index >& a, const Containers::VectorView< Real2, Device, Index >& b ) { - VectorView< Real1, Device, Index > result = Expressions::BinaryExpressionTemplate< VectorView< Real1, Device, Index >, VectorView< Real2, Device, Index >, Expressions::Multiplication >( a, b ); + Containers::VectorView< Real1, Device, Index > result = Containers::Expressions::BinaryExpressionTemplate< Containers::VectorView< Real1, Device, Index >, Containers::VectorView< Real2, Device, Index >, Containers::Expressions::Multiplication >( a, b ); return result; } +//// +// Vertical operations - min +template< typename Real, + typename Device, + typename Index > +typename Containers::VectorView< Real, Device, Index >::RealType +min( const Containers::VectorView< Real, Device, Index >& a ) +{ + return Containers::Expressions::ExpressionMin( a ); +} + +template< typename Real, + typename Device, + typename Index > +typename Containers::VectorView< Real, Device, Index >::RealType +max( const Containers::VectorView< Real, Device, Index >& a ) +{ + return Containers::Expressions::ExpressionMax( a ); +} + +template< typename Real, + typename Device, + typename Index > +typename Containers::VectorView< Real, Device, Index >::RealType +sum( const Containers::VectorView< Real, Device, Index >& a ) +{ + return Containers::Expressions::ExpressionSum( a ); +} + +template< typename Real, + typename Device, + typename Index, + typename Real2 > +typename Containers::VectorView< Real, Device, Index >::RealType +lpNorm( const Containers::VectorView< Real, Device, Index >& a, const Real2& p ) +{ + return Containers::Expressions::ExpressionLpNorm( a, p ); +} + +template< typename Real, + typename Device, + typename Index > +typename Containers::VectorView< Real, Device, Index >::RealType +product( const Containers::VectorView< Real, Device, Index >& a ) +{ + return Containers::Expressions::ExpressionProduct( a ); +} + +template< typename Real, + typename Device, + typename Index > +bool +logicalOr( const Containers::VectorView< Real, Device, Index >& a ) +{ + return Containers::Expressions::ExpressionLogicalOr( a ); +} + +template< typename Real, + typename Device, + typename Index > +typename Containers::VectorView< Real, Device, Index >::RealType +binaryOr( const Containers::VectorView< Real, Device, Index >& a ) +{ + return Containers::Expressions::ExpressionBinaryOr( a ); +} + +template< typename Real, + typename Device, + typename Index > +bool +logicalAnd( const Containers::VectorView< Real, Device, Index >& a ) +{ + return Containers::Expressions::ExpressionLogicalAnd( a ); +} + +template< typename Real, + typename Device, + typename Index > +typename Containers::VectorView< Real, Device, Index >::RealType +binaryAnd( const Containers::VectorView< Real, Device, Index >& a ) +{ + return Containers::Expressions::ExpressionBinaryAnd( a ); +} - } //namespace Containers } // namespace TNL diff --git a/src/TNL/Meshes/DistributedMeshes/DistributedGrid.hpp b/src/TNL/Meshes/DistributedMeshes/DistributedGrid.hpp index 8d96c4e55..bb98ba23e 100644 --- a/src/TNL/Meshes/DistributedMeshes/DistributedGrid.hpp +++ b/src/TNL/Meshes/DistributedMeshes/DistributedGrid.hpp @@ -175,7 +175,7 @@ setOverlaps( const SubdomainOverlapsType& lower, this->localOrigin = this->globalGrid.getOrigin() + - Containers::Scale( this->globalGrid.getSpaceSteps(), + Scale( this->globalGrid.getSpaceSteps(), ( this->globalBegin - this->lowerOverlap ) ); this->localBegin = this->lowerOverlap; diff --git a/src/UnitTests/Containers/ArrayViewTest.h b/src/UnitTests/Containers/ArrayViewTest.h index 1d422f20f..6ee9e39f0 100644 --- a/src/UnitTests/Containers/ArrayViewTest.h +++ b/src/UnitTests/Containers/ArrayViewTest.h @@ -468,14 +468,15 @@ TYPED_TEST( ArrayViewTest, assignmentOperator ) v.setValue( 0 ); v = u; - EXPECT_EQ( u, v ); + EXPECT_TRUE( u == v ); EXPECT_EQ( v.getData(), b.getData() ); // assignment from host to device //v.setValue( 0 ); v = 0; v = u_host; - EXPECT_EQ( u, v ); + // TODO: Replace with EXPECT_EQ when nvcc accepts it + EXPECT_TRUE( u == v ); EXPECT_EQ( v.getData(), b.getData() ); // assignment from device to host diff --git a/src/UnitTests/Containers/StaticVectorTest.cpp b/src/UnitTests/Containers/StaticVectorTest.cpp index 19093996f..1437864dc 100644 --- a/src/UnitTests/Containers/StaticVectorTest.cpp +++ b/src/UnitTests/Containers/StaticVectorTest.cpp @@ -191,7 +191,9 @@ TYPED_TEST( StaticVectorTest, abs ) u[ i ] = i; VectorType v = -u; - EXPECT_EQ( abs( v ), u ); + // TODO: replace with EXPECT_EQ when nvcc accepts it + //EXPECT_EQ( abs( v ), u ); + EXPECT_TRUE( abs( v ) == u ); } TYPED_TEST( StaticVectorTest, sin ) @@ -206,7 +208,9 @@ TYPED_TEST( StaticVectorTest, sin ) v[ i ] = sin( u[ i ] ); } - EXPECT_EQ( sin( u ), v ); + // TODO: replace with EXPECT_EQ when nvcc accepts it + //EXPECT_EQ( sin( u ), v ); + EXPECT_TRUE( sin( u ) == v ); } TYPED_TEST( StaticVectorTest, cos ) @@ -221,7 +225,9 @@ TYPED_TEST( StaticVectorTest, cos ) v[ i ] = cos( u[ i ] ); } - EXPECT_EQ( cos( u ), v ); + // TODO: replace with EXPECT_EQ when nvcc accepts it + //EXPECT_EQ( cos( u ), v ); + EXPECT_TRUE( cos( u ) == v ); } TYPED_TEST( StaticVectorTest, tan ) @@ -236,7 +242,9 @@ TYPED_TEST( StaticVectorTest, tan ) v[ i ] = tan( u[ i ] ); } - EXPECT_EQ( tan( u ), v ); + // TODO: replace with EXPECT_EQ when nvcc accepts it + //EXPECT_EQ( tan( u ), v ); + EXPECT_TRUE( tan( u ) == v ); } TYPED_TEST( StaticVectorTest, sqrt ) @@ -251,7 +259,9 @@ TYPED_TEST( StaticVectorTest, sqrt ) v[ i ] = sqrt( u[ i ] ); } - EXPECT_EQ( sqrt( u ), v ); + // TODO: replace with EXPECT_EQ when nvcc accepts it + //EXPECT_EQ( sqrt( u ), v ); + EXPECT_TRUE( sqrt( u ) == v ); } TYPED_TEST( StaticVectorTest, cbrt ) @@ -284,8 +294,11 @@ TYPED_TEST( StaticVectorTest, pow ) w[ i ] = pow( u[ i ], 3.0 ); } - EXPECT_EQ( pow( u, 2.0 ), v ); - EXPECT_EQ( pow( u, 3.0 ), w ); + // TODO: replace with EXPECT_EQ when nvcc accepts it + //EXPECT_EQ( pow( u, 2.0 ), v ); + //EXPECT_EQ( pow( u, 3.0 ), w ); + EXPECT_TRUE( pow( u, 2.0 ) == v ); + EXPECT_TRUE( pow( u, 3.0 ) == w ); } TYPED_TEST( StaticVectorTest, floor ) @@ -300,7 +313,9 @@ TYPED_TEST( StaticVectorTest, floor ) v[ i ] = floor( u[ i ] ); } - EXPECT_EQ( floor( u ), v ); + // TODO: replace with EXPECT_EQ when nvcc accepts it + //EXPECT_EQ( floor( u ), v ); + EXPECT_TRUE( floor( u ) == v ); } TYPED_TEST( StaticVectorTest, ceil ) @@ -315,7 +330,9 @@ TYPED_TEST( StaticVectorTest, ceil ) v[ i ] = ceil( u[ i ] ); } - EXPECT_EQ( ceil( u ), v ); + // TODO: replace with EXPECT_EQ when nvcc accepts it + //EXPECT_EQ( ceil( u ), v ); + EXPECT_TRUE( ceil( u ) == v ); } TYPED_TEST( StaticVectorTest, acos ) @@ -330,7 +347,9 @@ TYPED_TEST( StaticVectorTest, acos ) v[ i ] = acos( u[ i ] ); } - EXPECT_EQ( acos( u ), v ); + // TODO: replace with EXPECT_EQ when nvcc accepts it + //EXPECT_EQ( acos( u ), v ); + EXPECT_TRUE( acos( u ) == v ); } TYPED_TEST( StaticVectorTest, asin ) @@ -345,7 +364,9 @@ TYPED_TEST( StaticVectorTest, asin ) v[ i ] = asin( u[ i ] ); } - EXPECT_EQ( asin( u ), v ); + // TODO: replace with EXPECT_EQ when nvcc accepts it + //EXPECT_EQ( asin( u ), v ); + EXPECT_TRUE( asin( u ) == v ); } TYPED_TEST( StaticVectorTest, atan ) @@ -360,7 +381,9 @@ TYPED_TEST( StaticVectorTest, atan ) v[ i ] = atan( u[ i ] ); } - EXPECT_EQ( atan( u ), v ); + // TODO: replace with EXPECT_EQ when nvcc accepts it + //EXPECT_EQ( atan( u ), v ); + EXPECT_TRUE( atan( u ) == v ); } TYPED_TEST( StaticVectorTest, cosh ) @@ -393,7 +416,9 @@ TYPED_TEST( StaticVectorTest, tanh ) v[ i ] = tanh( u[ i ] ); } - EXPECT_EQ( tanh( u ), v ); + // TODO: replace with EXPECT_EQ when nvcc accepts it + //EXPECT_EQ( tanh( u ), v ); + EXPECT_TRUE( tanh( u ) == v ); } TYPED_TEST( StaticVectorTest, log ) @@ -408,7 +433,9 @@ TYPED_TEST( StaticVectorTest, log ) v[ i ] = log( u[ i ] ); } - EXPECT_EQ( log( u ), v ); + // TODO: replace with EXPECT_EQ when nvcc accepts it + //EXPECT_EQ( log( u ), v ); + EXPECT_TRUE( log( u ) == v ); } TYPED_TEST( StaticVectorTest, log10 ) @@ -441,7 +468,9 @@ TYPED_TEST( StaticVectorTest, log2 ) v[ i ] = log2( u[ i ] ); } - EXPECT_EQ( log2( u ), v ); + // TODO: replace with EXPECT_EQ when nvcc accepts it + //EXPECT_EQ( log2( u ), v ); + EXPECT_TRUE( log2( u ) == v ); } TYPED_TEST( StaticVectorTest, exp ) @@ -456,7 +485,9 @@ TYPED_TEST( StaticVectorTest, exp ) v[ i ] = exp( u[ i ] ); } - EXPECT_EQ( exp( u ), v ); + // TODO: replace with EXPECT_EQ when nvcc accepts it + //EXPECT_EQ( exp( u ), v ); + EXPECT_TRUE( exp( u ) == v ); } TYPED_TEST( StaticVectorTest, sign ) @@ -471,7 +502,8 @@ TYPED_TEST( StaticVectorTest, sign ) v[ i ] = sign( u[ i ] ); } - EXPECT_EQ( sign( u ), v ); + // TODO: replace with EXPECT_EQ when nvcc accepts it + EXPECT_TRUE( sign( u ) == v ); } TYPED_TEST( StaticVectorTest, lpNorm ) @@ -490,6 +522,52 @@ TYPED_TEST( StaticVectorTest, lpNorm ) EXPECT_EQ( v.lpNorm( 2.0 ), expectedL2norm ); EXPECT_NEAR( v.lpNorm( 3.0 ), expectedL3norm, epsilon ); } + +TYPED_TEST( StaticVectorTest, verticalOperations ) +{ + using VectorType = typename TestFixture::VectorType; + + using RealType = typename VectorType::RealType; + constexpr int size = VectorType::size; + + VectorType u, v; + RealType sum_( 0.0 ), absSum( 0.0 ), diffSum( 0.0 ), diffAbsSum( 0.0 ), + absMin( size + 10.0 ), absMax( -size - 10.0 ), + diffMin( 2 * size + 10.0 ), diffMax( - 2.0 * size - 10.0 ), + l2Norm( 0.0 ), l2NormDiff( 0.0 ); + for( int i = 0; i < size; i++ ) + { + const RealType aux = ( RealType )( i - size / 2 ) / ( RealType ) size; + u[ i, aux ]; + v[ i, -aux ]; + absMin = TNL::min( absMin, TNL::abs( aux ) ); + absMax = TNL::max( absMax, TNL::abs( aux ) ); + diffMin = TNL::min( diffMin, 2 * aux ); + diffMax = TNL::max( diffMax, 2 * aux ); + sum_ += aux; + absSum += TNL::abs( aux ); + diffSum += 2.0 * aux; + diffAbsSum += TNL::abs( 2.0* aux ); + l2Norm += aux * aux; + l2NormDiff += 4.0 * aux * aux; + } + l2Norm = TNL::sqrt( l2Norm ); + l2NormDiff = TNL::sqrt( l2NormDiff ); + + + EXPECT_EQ( min( u ), u[ 0 ] ); + EXPECT_EQ( max( u ), u[ size - 1 ] ); + EXPECT_NEAR( sum( u ), sum_, 2.0e-5 ); + EXPECT_EQ( min( abs( u ) ), absMin ); + EXPECT_EQ( max( abs( u ) ), absMax ); + EXPECT_EQ( min( u - v ), diffMin ); + EXPECT_EQ( max( u - v ), diffMax ); + EXPECT_NEAR( sum( u - v ), diffSum, 2.0e-5 ); + EXPECT_NEAR( sum( abs( u - v ) ), diffAbsSum, 2.0e-5 ); + EXPECT_NEAR( lpNorm( u, 2.0 ), l2Norm, 2.0e-5 ); + EXPECT_NEAR( lpNorm( u - v, 2.0 ), l2NormDiff, 2.0e-5 ); +} + #endif diff --git a/src/UnitTests/Containers/VectorTest-4.h b/src/UnitTests/Containers/VectorTest-4.h index 5a913c4e8..b3fcd8d68 100644 --- a/src/UnitTests/Containers/VectorTest-4.h +++ b/src/UnitTests/Containers/VectorTest-4.h @@ -218,7 +218,7 @@ TYPED_TEST( VectorTest, abs ) u.setElement( i, i ); v = -u; - EXPECT_EQ( abs( v ), u ); + EXPECT_TRUE( abs( v ) == u ); } #endif // HAVE_GTEST diff --git a/src/UnitTests/Containers/VectorTest-7.h b/src/UnitTests/Containers/VectorTest-7.h index 02f2a00b2..6a2c8b590 100644 --- a/src/UnitTests/Containers/VectorTest-7.h +++ b/src/UnitTests/Containers/VectorTest-7.h @@ -31,26 +31,51 @@ using namespace TNL::Arithmetics; // and large enough to require multiple CUDA blocks for reduction constexpr int VECTOR_TEST_SIZE = 500; -TYPED_TEST( VectorTest, min ) +TYPED_TEST( VectorTest, verticalOperations ) { using VectorType = typename TestFixture::VectorType; using ViewType = typename TestFixture::ViewType; using RealType = typename VectorType::RealType; const int size = VECTOR_TEST_SIZE; - VectorType _u( size ); - ViewType u( _u ); + VectorType _u( size ), _v( size ); + ViewType u( _u ), v( _v ); + RealType sum_( 0.0 ), absSum( 0.0 ), diffSum( 0.0 ), diffAbsSum( 0.0 ), + absMin( size + 10.0 ), absMax( -size - 10.0 ), + diffMin( 2 * size + 10.0 ), diffMax( - 2.0 * size - 10.0 ), + l2Norm( 0.0 ), l2NormDiff( 0.0 ); for( int i = 0; i < size; i++ ) { - u.setElement( i, ( RealType )( i - size / 2 ) / ( RealType ) size ); + const RealType aux = ( RealType )( i - size / 2 ) / ( RealType ) size; + u.setElement( i, aux ); + v.setElement( i, -aux ); + absMin = TNL::min( absMin, TNL::abs( aux ) ); + absMax = TNL::max( absMax, TNL::abs( aux ) ); + diffMin = TNL::min( diffMin, 2 * aux ); + diffMax = TNL::max( diffMax, 2 * aux ); + sum_ += aux; + absSum += TNL::abs( aux ); + diffSum += 2.0 * aux; + diffAbsSum += TNL::abs( 2.0* aux ); + l2Norm += aux * aux; + l2NormDiff += 4.0 * aux * aux; } + l2Norm = TNL::sqrt( l2Norm ); + l2NormDiff = TNL::sqrt( l2NormDiff ); EXPECT_EQ( min( u ), u.getElement( 0 ) ); - //for( int i = 0; i < size; i++ ) - // EXPECT_NEAR( acos( u ).getElement( i ), v.getElement( i ), 1.0e-6 ); + EXPECT_EQ( max( u ), u.getElement( size - 1 ) ); + EXPECT_NEAR( sum( u ), sum_, 2.0e-5 ); + EXPECT_EQ( min( abs( u ) ), absMin ); + EXPECT_EQ( max( abs( u ) ), absMax ); + EXPECT_EQ( min( u - v ), diffMin ); + EXPECT_EQ( max( u - v ), diffMax ); + EXPECT_NEAR( sum( u - v ), diffSum, 2.0e-5 ); + EXPECT_NEAR( sum( abs( u - v ) ), diffAbsSum, 2.0e-5 ); + EXPECT_NEAR( lpNorm( u, 2.0 ), l2Norm, 2.0e-5 ); + EXPECT_NEAR( lpNorm( u - v, 2.0 ), l2NormDiff, 2.0e-5 ); } - #endif // HAVE_GTEST -- GitLab From d6ab350cdedaedafbcb5e374469ab5618289d636 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Fri, 3 May 2019 18:52:14 +0200 Subject: [PATCH 28/93] Fixed StaticVectorTest. --- src/UnitTests/Containers/StaticVectorTest.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/UnitTests/Containers/StaticVectorTest.cpp b/src/UnitTests/Containers/StaticVectorTest.cpp index 1437864dc..4788a823d 100644 --- a/src/UnitTests/Containers/StaticVectorTest.cpp +++ b/src/UnitTests/Containers/StaticVectorTest.cpp @@ -538,8 +538,8 @@ TYPED_TEST( StaticVectorTest, verticalOperations ) for( int i = 0; i < size; i++ ) { const RealType aux = ( RealType )( i - size / 2 ) / ( RealType ) size; - u[ i, aux ]; - v[ i, -aux ]; + u[ i ] = aux; + v[ i ] = -aux; absMin = TNL::min( absMin, TNL::abs( aux ) ); absMax = TNL::max( absMax, TNL::abs( aux ) ); diffMin = TNL::min( diffMin, 2 * aux ); -- GitLab From 680ce68a9d3a6d636f50a35d5802590829eba29a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Mon, 6 May 2019 07:38:29 +0200 Subject: [PATCH 29/93] Implementing ET benchmark. --- src/Benchmarks/BLAS/vector-operations.h | 28 +++++++++++++++++++++++-- src/TNL/Containers/VectorView.h | 1 + 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/src/Benchmarks/BLAS/vector-operations.h b/src/Benchmarks/BLAS/vector-operations.h index e191b8fbb..bb672a7ec 100644 --- a/src/Benchmarks/BLAS/vector-operations.h +++ b/src/Benchmarks/BLAS/vector-operations.h @@ -31,8 +31,11 @@ bool benchmarkVectorOperations( Benchmark & benchmark, const long & size ) { - typedef Containers::Vector< Real, Devices::Host, Index > HostVector; - typedef Containers::Vector< Real, Devices::Cuda, Index > CudaVector; + using HostVector = Containers::Vector< Real, Devices::Host, Index >; + using CudaVector = Containers::Vector< Real, Devices::Cuda, Index >; + using HostView = Containers::VectorView< Real, Devices::Host, Index >; + using CudaView = Containers::VectorView< Real, Devices::Cuda, Index >; + using namespace std; double datasetSize = (double) size * sizeof( Real ) / oneGB; @@ -46,6 +49,9 @@ benchmarkVectorOperations( Benchmark & benchmark, deviceVector2.setSize( size ); #endif + HostView hostView( hostVector ), hostView2( hostVector2 ); + CudaView deviceView( deviceVector ), deviceView2( deviceVector2 ); + Real resultHost, resultDevice; #ifdef HAVE_CUDA @@ -89,10 +95,19 @@ benchmarkVectorOperations( Benchmark & benchmark, auto maxCuda = [&]() { resultDevice = deviceVector.max(); }; + auto maxHostET = [&]() { + resultHost = max( hostView ); + }; + auto maxCudaET = [&]() { + resultDevice = max( deviceView ); + }; + benchmark.setOperation( "max", datasetSize ); benchmark.time< Devices::Host >( reset1, "CPU", maxHost ); + benchmark.time< Devices::Host >( reset1, "CPU ET", maxHostET ); #ifdef HAVE_CUDA benchmark.time< Devices::Cuda >( reset1, "GPU", maxCuda ); + benchmark.time< Devices::Cuda >( reset1, "GPU ET", maxCudaET ); #endif @@ -115,6 +130,13 @@ benchmarkVectorOperations( Benchmark & benchmark, auto absMaxCuda = [&]() { resultDevice = deviceVector.absMax(); }; + auto absMaxHostET = [&]() { + resultHost = max( abs( hostView ) ); + }; + auto absMaxCudaET = [&]() { + resultDevice = max( abs( deviceView ) ); + }; + #ifdef HAVE_CUDA auto absMaxCublas = [&]() { int index = 0; @@ -126,8 +148,10 @@ benchmarkVectorOperations( Benchmark & benchmark, #endif benchmark.setOperation( "absMax", datasetSize ); benchmark.time< Devices::Host >( reset1, "CPU", absMaxHost ); + benchmark.time< Devices::Host >( reset1, "CPU ET", absMaxHostET ); #ifdef HAVE_CUDA benchmark.time< Devices::Cuda >( reset1, "GPU", absMaxCuda ); + benchmark.time< Devices::Cuda >( reset1, "GPU ET", absMaxCudaET ); benchmark.time< Devices::Cuda >( reset1, "cuBLAS", absMaxCublas ); #endif diff --git a/src/TNL/Containers/VectorView.h b/src/TNL/Containers/VectorView.h index e53309f69..6b8626397 100644 --- a/src/TNL/Containers/VectorView.h +++ b/src/TNL/Containers/VectorView.h @@ -193,3 +193,4 @@ public: } // namespace TNL #include +#include -- GitLab From b26ea7675466105559e373e888ac238592174f4f Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Mon, 6 May 2019 13:36:12 +0200 Subject: [PATCH 30/93] Adding scalar product and benchmarks. --- src/Benchmarks/BLAS/vector-operations.h | 50 +++++- .../Expressions/ExpressionTemplates.h | 142 +++++++----------- .../Expressions/StaticExpressionTemplates.h | 57 +++++++ src/TNL/Containers/StaticVectorExpressions.h | 26 ++++ src/TNL/Containers/Vector.h | 14 +- src/TNL/Containers/Vector.hpp | 11 ++ src/TNL/Containers/VectorView.h | 8 + src/TNL/Containers/VectorViewExpressions.h | 83 ++++++---- src/TNL/Containers/VectorView_impl.h | 12 ++ src/UnitTests/Containers/VectorTest-7.h | 21 +++ 10 files changed, 302 insertions(+), 122 deletions(-) diff --git a/src/Benchmarks/BLAS/vector-operations.h b/src/Benchmarks/BLAS/vector-operations.h index bb672a7ec..f6006104f 100644 --- a/src/Benchmarks/BLAS/vector-operations.h +++ b/src/Benchmarks/BLAS/vector-operations.h @@ -117,10 +117,18 @@ benchmarkVectorOperations( Benchmark & benchmark, auto minCuda = [&]() { resultDevice = deviceVector.min(); }; + auto minHostET = [&]() { + resultHost = min( hostView ); + }; + auto minCudaET = [&]() { + resultDevice = min( deviceView ); + }; benchmark.setOperation( "min", datasetSize ); benchmark.time< Devices::Host >( reset1, "CPU", minHost ); + benchmark.time< Devices::Host >( reset1, "CPU", minHostET ); #ifdef HAVE_CUDA benchmark.time< Devices::Cuda >( reset1, "GPU", minCuda ); + benchmark.time< Devices::Cuda >( reset1, "GPU", minCudaET ); #endif @@ -136,7 +144,6 @@ benchmarkVectorOperations( Benchmark & benchmark, auto absMaxCudaET = [&]() { resultDevice = max( abs( deviceView ) ); }; - #ifdef HAVE_CUDA auto absMaxCublas = [&]() { int index = 0; @@ -162,6 +169,12 @@ benchmarkVectorOperations( Benchmark & benchmark, auto absMinCuda = [&]() { resultDevice = deviceVector.absMin(); }; + auto absMinHostET = [&]() { + resultHost = min( abs( hostView ) ); + }; + auto absMinCudaET = [&]() { + resultDevice = min( abs( deviceView ) ); + }; #ifdef HAVE_CUDA auto absMinCublas = [&]() { int index = 0; @@ -173,8 +186,10 @@ benchmarkVectorOperations( Benchmark & benchmark, #endif benchmark.setOperation( "absMin", datasetSize ); benchmark.time< Devices::Host >( reset1, "CPU", absMinHost ); + benchmark.time< Devices::Host >( reset1, "CPU", absMinHostET ); #ifdef HAVE_CUDA benchmark.time< Devices::Cuda >( reset1, "GPU", absMinCuda ); + benchmark.time< Devices::Cuda >( reset1, "GPU", absMinCudaET ); benchmark.time< Devices::Cuda >( reset1, "cuBLAS", absMinCublas ); #endif @@ -185,10 +200,18 @@ benchmarkVectorOperations( Benchmark & benchmark, auto sumCuda = [&]() { resultDevice = deviceVector.sum(); }; + auto sumHostET = [&]() { + resultHost = sum( hostView ); + }; + auto sumCudaET = [&]() { + resultDevice = sum( deviceView ); + }; benchmark.setOperation( "sum", datasetSize ); benchmark.time< Devices::Host >( reset1, "CPU", sumHost ); + benchmark.time< Devices::Host >( reset1, "CPU", sumHostET ); #ifdef HAVE_CUDA benchmark.time< Devices::Cuda >( reset1, "GPU", sumCuda ); + benchmark.time< Devices::Cuda >( reset1, "GPU", sumCudaET ); #endif @@ -198,6 +221,12 @@ benchmarkVectorOperations( Benchmark & benchmark, auto l1normCuda = [&]() { resultDevice = deviceVector.lpNorm( 1.0 ); }; + auto l1normHostET = [&]() { + resultHost = lpNorm( hostView, 1.0 ); + }; + auto l1normCudaET = [&]() { + resultDevice = lpNorm( deviceView, 1.0 ); + }; #ifdef HAVE_CUDA auto l1normCublas = [&]() { cublasGasum( cublasHandle, size, @@ -207,8 +236,10 @@ benchmarkVectorOperations( Benchmark & benchmark, #endif benchmark.setOperation( "l1 norm", datasetSize ); benchmark.time< Devices::Host >( reset1, "CPU", l1normHost ); + benchmark.time< Devices::Host >( reset1, "CPU", l1normHostET ); #ifdef HAVE_CUDA benchmark.time< Devices::Cuda >( reset1, "GPU", l1normCuda ); + benchmark.time< Devices::Cuda >( reset1, "GPU", l1normCudaET ); benchmark.time< Devices::Cuda >( reset1, "cuBLAS", l1normCublas ); #endif @@ -219,6 +250,12 @@ benchmarkVectorOperations( Benchmark & benchmark, auto l2normCuda = [&]() { resultDevice = deviceVector.lpNorm( 2.0 ); }; + auto l2normHostET = [&]() { + resultHost = lpNorm( hostView, 2.0 ); + }; + auto l2normCudaET = [&]() { + resultDevice = lpNorm( deviceView, 2.0 ); + }; #ifdef HAVE_CUDA auto l2normCublas = [&]() { cublasGnrm2( cublasHandle, size, @@ -228,8 +265,10 @@ benchmarkVectorOperations( Benchmark & benchmark, #endif benchmark.setOperation( "l2 norm", datasetSize ); benchmark.time< Devices::Host >( reset1, "CPU", l2normHost ); + benchmark.time< Devices::Host >( reset1, "CPU", l2normHostET ); #ifdef HAVE_CUDA benchmark.time< Devices::Cuda >( reset1, "GPU", l2normCuda ); + benchmark.time< Devices::Cuda >( reset1, "GPU", l2normCudaET ); benchmark.time< Devices::Cuda >( reset1, "cuBLAS", l2normCublas ); #endif @@ -240,10 +279,19 @@ benchmarkVectorOperations( Benchmark & benchmark, auto l3normCuda = [&]() { resultDevice = deviceVector.lpNorm( 3.0 ); }; + auto l3normHostET = [&]() { + resultHost = lpNorm( hostView, 3.0 ); + }; + auto l3normCudaET = [&]() { + resultDevice = lpNorm( deviceView, 3.0 ); + }; + benchmark.setOperation( "l3 norm", datasetSize ); benchmark.time< Devices::Host >( reset1, "CPU", l3normHost ); + benchmark.time< Devices::Host >( reset1, "CPU", l3normHostET ); #ifdef HAVE_CUDA benchmark.time< Devices::Cuda >( reset1, "GPU", l3normCuda ); + benchmark.time< Devices::Cuda >( reset1, "GPU", l3normCudaET ); #endif diff --git a/src/TNL/Containers/Expressions/ExpressionTemplates.h b/src/TNL/Containers/Expressions/ExpressionTemplates.h index 280b2817e..adfab7b95 100644 --- a/src/TNL/Containers/Expressions/ExpressionTemplates.h +++ b/src/TNL/Containers/Expressions/ExpressionTemplates.h @@ -278,7 +278,6 @@ template< typename L1, typename R1, typename R2, template< typename, typename > class ROperation > -__cuda_callable__ const Containers::Expressions::BinaryExpressionTemplate< Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, @@ -295,7 +294,6 @@ operator + ( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LO template< typename T1, typename T2, template< typename, typename > class Operation > -__cuda_callable__ const Containers::Expressions::BinaryExpressionTemplate< Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >, typename Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType, @@ -314,7 +312,6 @@ template< typename L1, typename R1, typename R2, template< typename, typename > class ROperation > -__cuda_callable__ const Containers::Expressions::BinaryExpressionTemplate< Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, typename Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, @@ -333,7 +330,6 @@ template< typename L1, template< typename, typename > class LOperation, typename R1, template< typename > class ROperation > -__cuda_callable__ const Containers::Expressions::BinaryExpressionTemplate< Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, typename Containers::Expressions::UnaryExpressionTemplate< R1, ROperation >, @@ -355,7 +351,6 @@ template< typename L1, typename R1, typename R2, template< typename, typename > class ROperation > -__cuda_callable__ const Containers::Expressions::BinaryExpressionTemplate< Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, @@ -372,7 +367,6 @@ operator - ( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LO template< typename T1, typename T2, template< typename, typename > class Operation > -__cuda_callable__ const Containers::Expressions::BinaryExpressionTemplate< Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >, typename Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType, @@ -391,7 +385,6 @@ template< typename L1, typename R1, typename R2, template< typename, typename > class ROperation > -__cuda_callable__ const Containers::Expressions::BinaryExpressionTemplate< Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, typename Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, @@ -410,7 +403,6 @@ template< typename L1, template< typename, typename > class LOperation, typename R1, template< typename > class ROperation > -__cuda_callable__ const Containers::Expressions::BinaryExpressionTemplate< Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, typename Containers::Expressions::UnaryExpressionTemplate< R1, ROperation >, @@ -432,7 +424,6 @@ template< typename L1, typename R1, typename R2, template< typename, typename > class ROperation > -__cuda_callable__ const Containers::Expressions::BinaryExpressionTemplate< Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, @@ -449,7 +440,6 @@ operator * ( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LO template< typename T1, typename T2, template< typename, typename > class Operation > -__cuda_callable__ const Containers::Expressions::BinaryExpressionTemplate< Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >, typename Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType, @@ -468,7 +458,6 @@ template< typename L1, typename R1, typename R2, template< typename, typename > class ROperation > -__cuda_callable__ const Containers::Expressions::BinaryExpressionTemplate< Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, typename Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, @@ -487,7 +476,6 @@ template< typename L1, template< typename, typename > class LOperation, typename R1, template< typename > class ROperation > -__cuda_callable__ const Containers::Expressions::BinaryExpressionTemplate< Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, typename Containers::Expressions::UnaryExpressionTemplate< R1, ROperation >, @@ -509,7 +497,6 @@ template< typename L1, typename R1, typename R2, template< typename, typename > class ROperation > -__cuda_callable__ const Containers::Expressions::BinaryExpressionTemplate< Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, @@ -526,7 +513,6 @@ operator / ( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LO template< typename T1, typename T2, template< typename, typename > class Operation > -__cuda_callable__ const Containers::Expressions::BinaryExpressionTemplate< Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >, typename Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType, @@ -545,7 +531,6 @@ template< typename L1, typename R1, typename R2, template< typename, typename > class ROperation > -__cuda_callable__ const Containers::Expressions::BinaryExpressionTemplate< Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, typename Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, @@ -564,7 +549,6 @@ template< typename L1, template< typename, typename > class LOperation, typename R1, template< typename > class ROperation > -__cuda_callable__ const Containers::Expressions::BinaryExpressionTemplate< Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, typename Containers::Expressions::UnaryExpressionTemplate< R1, ROperation >, @@ -586,7 +570,6 @@ template< typename L1, typename R1, typename R2, template< typename, typename > class ROperation > -__cuda_callable__ const Containers::Expressions::BinaryExpressionTemplate< Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, @@ -603,7 +586,6 @@ min ( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperatio template< typename T1, typename T2, template< typename, typename > class Operation > -__cuda_callable__ const Containers::Expressions::BinaryExpressionTemplate< Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >, typename Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType, @@ -622,7 +604,6 @@ template< typename L1, typename R1, typename R2, template< typename, typename > class ROperation > -__cuda_callable__ const Containers::Expressions::BinaryExpressionTemplate< Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, typename Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, @@ -641,7 +622,6 @@ template< typename L1, template< typename, typename > class LOperation, typename R1, template< typename > class ROperation > -__cuda_callable__ const Containers::Expressions::BinaryExpressionTemplate< Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, typename Containers::Expressions::UnaryExpressionTemplate< R1, ROperation >, @@ -663,7 +643,6 @@ template< typename L1, typename R1, typename R2, template< typename, typename > class ROperation > -__cuda_callable__ const Containers::Expressions::BinaryExpressionTemplate< Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, @@ -680,7 +659,6 @@ max( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation template< typename T1, typename T2, template< typename, typename > class Operation > -__cuda_callable__ const Containers::Expressions::BinaryExpressionTemplate< Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >, typename Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType, @@ -699,7 +677,6 @@ template< typename L1, typename R1, typename R2, template< typename, typename > class ROperation > -__cuda_callable__ const Containers::Expressions::BinaryExpressionTemplate< Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, typename Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >, @@ -718,7 +695,6 @@ template< typename L1, template< typename, typename > class LOperation, typename R1, template< typename > class ROperation > -__cuda_callable__ const Containers::Expressions::BinaryExpressionTemplate< Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, typename Containers::Expressions::UnaryExpressionTemplate< R1, ROperation >, @@ -740,7 +716,6 @@ template< typename L1, typename R1, typename R2, template< typename, typename > class ROperation > -__cuda_callable__ bool operator == ( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, const Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) @@ -751,7 +726,6 @@ operator == ( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, L template< typename T1, typename T2, template< typename, typename > class Operation > -__cuda_callable__ bool operator == ( const Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, const typename Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) @@ -764,7 +738,6 @@ template< typename L1, typename R1, typename R2, template< typename, typename > class ROperation > -__cuda_callable__ bool operator == ( const Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >& a, const typename Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) @@ -777,7 +750,6 @@ template< typename L1, template< typename, typename > class LOperation, typename R1, template< typename > class ROperation > -__cuda_callable__ bool operator == ( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, const typename Containers::Expressions::UnaryExpressionTemplate< R1,ROperation >& b ) @@ -793,7 +765,6 @@ template< typename L1, typename R1, typename R2, template< typename, typename > class ROperation > -__cuda_callable__ bool operator != ( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, const Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) @@ -804,7 +775,6 @@ operator != ( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, L template< typename T1, typename T2, template< typename, typename > class Operation > -__cuda_callable__ bool operator != ( const Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, const typename Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) @@ -817,7 +787,6 @@ template< typename L1, typename R1, typename R2, template< typename, typename > class ROperation > -__cuda_callable__ bool operator != ( const Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >& a, const typename Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) @@ -830,7 +799,6 @@ template< typename L1, template< typename, typename > class LOperation, typename R1, template< typename > class ROperation > -__cuda_callable__ bool operator != ( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, const typename Containers::Expressions::UnaryExpressionTemplate< R1, ROperation >& b ) @@ -846,7 +814,6 @@ template< typename L1, typename R1, typename R2, template< typename, typename > class ROperation > -__cuda_callable__ bool operator < ( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, const Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) @@ -857,7 +824,6 @@ operator < ( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LO template< typename T1, typename T2, template< typename, typename > class Operation > -__cuda_callable__ bool operator < ( const Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, const typename Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) @@ -870,7 +836,6 @@ template< typename L1, typename R1, typename R2, template< typename, typename > class ROperation > -__cuda_callable__ bool operator < ( const Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >& a, const typename Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) @@ -883,7 +848,6 @@ template< typename L1, template< typename, typename > class LOperation, typename R1, template< typename > class ROperation > -__cuda_callable__ bool operator < ( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, const typename Containers::Expressions::UnaryExpressionTemplate< R1, ROperation >& b ) @@ -899,7 +863,6 @@ template< typename L1, typename R1, typename R2, template< typename, typename > class ROperation > -__cuda_callable__ bool operator <= ( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, const Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) @@ -910,7 +873,6 @@ operator <= ( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, L template< typename T1, typename T2, template< typename, typename > class Operation > -__cuda_callable__ bool operator <= ( const Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, const typename Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) @@ -923,7 +885,6 @@ template< typename L1, typename R1, typename R2, template< typename, typename > class ROperation > -__cuda_callable__ bool operator <= ( const Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >& a, const typename Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) @@ -936,7 +897,6 @@ template< typename L1, template< typename, typename > class LOperation, typename R1, template< typename > class ROperation > -__cuda_callable__ bool operator <= ( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, const typename Containers::Expressions::UnaryExpressionTemplate< R1, ROperation >& b ) @@ -952,7 +912,6 @@ template< typename L1, typename R1, typename R2, template< typename, typename > class ROperation > -__cuda_callable__ bool operator > ( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, const Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) @@ -963,7 +922,6 @@ operator > ( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LO template< typename T1, typename T2, template< typename, typename > class Operation > -__cuda_callable__ bool operator > ( const Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, const typename Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) @@ -976,7 +934,6 @@ template< typename L1, typename R1, typename R2, template< typename, typename > class ROperation > -__cuda_callable__ bool operator > ( const Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >& a, const typename Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) @@ -989,7 +946,6 @@ template< typename L1, template< typename, typename > class LOperation, typename R1, template< typename > class ROperation > -__cuda_callable__ bool operator > ( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, const typename Containers::Expressions::UnaryExpressionTemplate< R1, ROperation >& b ) @@ -1005,7 +961,6 @@ template< typename L1, typename R1, typename R2, template< typename, typename > class ROperation > -__cuda_callable__ bool operator >= ( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, const Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) @@ -1016,7 +971,6 @@ operator >= ( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, L template< typename T1, typename T2, template< typename, typename > class Operation > -__cuda_callable__ bool operator >= ( const Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, const typename Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) @@ -1029,7 +983,6 @@ template< typename L1, typename R1, typename R2, template< typename, typename > class ROperation > -__cuda_callable__ bool operator >= ( const Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >& a, const typename Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) @@ -1042,7 +995,6 @@ template< typename L1, template< typename, typename > class LOperation, typename R1, template< typename > class ROperation > -__cuda_callable__ bool operator >= ( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, const typename Containers::Expressions::UnaryExpressionTemplate< R1, ROperation >& b ) @@ -1059,7 +1011,6 @@ operator >= ( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, L template< typename L1, typename L2, template< typename, typename > class LOperation > -__cuda_callable__ const Containers::Expressions::UnaryExpressionTemplate< Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, Containers::Expressions::Minus > @@ -1072,7 +1023,6 @@ operator -( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOp template< typename L1, template< typename > class LOperation > -__cuda_callable__ const Containers::Expressions::UnaryExpressionTemplate< Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, Containers::Expressions::Abs > @@ -1088,7 +1038,6 @@ operator -( const Containers::Expressions::UnaryExpressionTemplate< L1, LOperati template< typename L1, typename L2, template< typename, typename > class LOperation > -__cuda_callable__ const Containers::Expressions::UnaryExpressionTemplate< Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, Containers::Expressions::Abs > @@ -1101,7 +1050,6 @@ abs( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation template< typename L1, template< typename > class LOperation > -__cuda_callable__ const Containers::Expressions::UnaryExpressionTemplate< Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, Containers::Expressions::Abs > @@ -1117,7 +1065,6 @@ abs( const Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >& a template< typename L1, typename L2, template< typename, typename > class LOperation > -__cuda_callable__ const Containers::Expressions::UnaryExpressionTemplate< Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, Containers::Expressions::Sin > @@ -1130,7 +1077,6 @@ sin( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation template< typename L1, template< typename > class LOperation > -__cuda_callable__ const Containers::Expressions::UnaryExpressionTemplate< Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, Containers::Expressions::Sin > @@ -1146,7 +1092,6 @@ sin( const Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >& a template< typename L1, typename L2, template< typename, typename > class LOperation > -__cuda_callable__ const Containers::Expressions::UnaryExpressionTemplate< Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, Containers::Expressions::Cos > @@ -1159,7 +1104,6 @@ cos( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation template< typename L1, template< typename > class LOperation > -__cuda_callable__ const Containers::Expressions::UnaryExpressionTemplate< Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, Containers::Expressions::Cos > @@ -1175,7 +1119,6 @@ cos( const Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >& a template< typename L1, typename L2, template< typename, typename > class LOperation > -__cuda_callable__ const Containers::Expressions::UnaryExpressionTemplate< Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, Containers::Expressions::Tan > @@ -1188,7 +1131,6 @@ tan( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation template< typename L1, template< typename > class LOperation > -__cuda_callable__ const Containers::Expressions::UnaryExpressionTemplate< Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, Containers::Expressions::Tan > @@ -1204,7 +1146,6 @@ tan( const Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >& a template< typename L1, typename L2, template< typename, typename > class LOperation > -__cuda_callable__ const Containers::Expressions::UnaryExpressionTemplate< Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, Containers::Expressions::Sqrt > @@ -1217,7 +1158,6 @@ sqrt( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperatio template< typename L1, template< typename > class LOperation > -__cuda_callable__ const Containers::Expressions::UnaryExpressionTemplate< Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, Containers::Expressions::Sqrt > @@ -1233,7 +1173,6 @@ sqrt( const Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >& template< typename L1, typename L2, template< typename, typename > class LOperation > -__cuda_callable__ const Containers::Expressions::UnaryExpressionTemplate< Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, Containers::Expressions::Cbrt > @@ -1246,7 +1185,6 @@ cbrt( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperatio template< typename L1, template< typename > class LOperation > -__cuda_callable__ const Containers::Expressions::UnaryExpressionTemplate< Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, Containers::Expressions::Cbrt > @@ -1263,7 +1201,6 @@ template< typename L1, typename L2, template< typename, typename > class LOperation, typename Real > -__cuda_callable__ const Containers::Expressions::UnaryExpressionTemplate< Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, Containers::Expressions::Pow > @@ -1279,7 +1216,6 @@ pow( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation template< typename L1, template< typename > class LOperation, typename Real > -__cuda_callable__ const Containers::Expressions::UnaryExpressionTemplate< Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, Containers::Expressions::Pow > @@ -1297,7 +1233,6 @@ pow( const Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >& a template< typename L1, typename L2, template< typename, typename > class LOperation > -__cuda_callable__ const Containers::Expressions::UnaryExpressionTemplate< Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, Containers::Expressions::Sin > @@ -1310,7 +1245,6 @@ floor( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperati template< typename L1, template< typename > class LOperation > -__cuda_callable__ const Containers::Expressions::UnaryExpressionTemplate< Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, Containers::Expressions::Floor > @@ -1326,7 +1260,6 @@ floor( const Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >& template< typename L1, typename L2, template< typename, typename > class LOperation > -__cuda_callable__ const Containers::Expressions::UnaryExpressionTemplate< Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, Containers::Expressions::Ceil > @@ -1339,7 +1272,6 @@ ceil( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperatio template< typename L1, template< typename > class LOperation > -__cuda_callable__ const Containers::Expressions::UnaryExpressionTemplate< Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, Containers::Expressions::Ceil > @@ -1355,7 +1287,6 @@ sin( const Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >& a template< typename L1, typename L2, template< typename, typename > class LOperation > -__cuda_callable__ const Containers::Expressions::UnaryExpressionTemplate< Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, Containers::Expressions::Asin > @@ -1368,7 +1299,6 @@ asin( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperatio template< typename L1, template< typename > class LOperation > -__cuda_callable__ const Containers::Expressions::UnaryExpressionTemplate< Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, Containers::Expressions::Asin > @@ -1384,7 +1314,6 @@ asin( const Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >& template< typename L1, typename L2, template< typename, typename > class LOperation > -__cuda_callable__ const Containers::Expressions::UnaryExpressionTemplate< Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, Containers::Expressions::Acos > @@ -1397,7 +1326,6 @@ cos( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation template< typename L1, template< typename > class LOperation > -__cuda_callable__ const Containers::Expressions::UnaryExpressionTemplate< Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, Containers::Expressions::Acos > @@ -1413,7 +1341,6 @@ acos( const Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >& template< typename L1, typename L2, template< typename, typename > class LOperation > -__cuda_callable__ const Containers::Expressions::UnaryExpressionTemplate< Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, Containers::Expressions::Atan > @@ -1426,7 +1353,6 @@ tan( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation template< typename L1, template< typename > class LOperation > -__cuda_callable__ const Containers::Expressions::UnaryExpressionTemplate< Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, Containers::Expressions::Atan > @@ -1442,7 +1368,6 @@ atan( const Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >& template< typename L1, typename L2, template< typename, typename > class LOperation > -__cuda_callable__ const Containers::Expressions::UnaryExpressionTemplate< Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, Containers::Expressions::Sinh > @@ -1455,7 +1380,6 @@ sinh( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperatio template< typename L1, template< typename > class LOperation > -__cuda_callable__ const Containers::Expressions::UnaryExpressionTemplate< Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, Containers::Expressions::Sinh > @@ -1471,7 +1395,6 @@ sinh( const Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >& template< typename L1, typename L2, template< typename, typename > class LOperation > -__cuda_callable__ const Containers::Expressions::UnaryExpressionTemplate< Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, Containers::Expressions::Cosh > @@ -1484,7 +1407,6 @@ cosh( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperatio template< typename L1, template< typename > class LOperation > -__cuda_callable__ const Containers::Expressions::UnaryExpressionTemplate< Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, Containers::Expressions::Cosh > @@ -1500,7 +1422,6 @@ cosh( const Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >& template< typename L1, typename L2, template< typename, typename > class LOperation > -__cuda_callable__ const Containers::Expressions::UnaryExpressionTemplate< Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, Containers::Expressions::Tanh > @@ -1513,7 +1434,6 @@ cosh( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperatio template< typename L1, template< typename > class LOperation > -__cuda_callable__ const Containers::Expressions::UnaryExpressionTemplate< Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, Containers::Expressions::Tanh > @@ -1529,7 +1449,6 @@ tanh( const Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >& template< typename L1, typename L2, template< typename, typename > class LOperation > -__cuda_callable__ const Containers::Expressions::UnaryExpressionTemplate< Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, Containers::Expressions::Log > @@ -1542,7 +1461,6 @@ log( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation template< typename L1, template< typename > class LOperation > -__cuda_callable__ const Containers::Expressions::UnaryExpressionTemplate< Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, Containers::Expressions::Log > @@ -1558,7 +1476,6 @@ log( const Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >& a template< typename L1, typename L2, template< typename, typename > class LOperation > -__cuda_callable__ const Containers::Expressions::UnaryExpressionTemplate< Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, Containers::Expressions::Log10 > @@ -1571,7 +1488,6 @@ log10( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperati template< typename L1, template< typename > class LOperation > -__cuda_callable__ const Containers::Expressions::UnaryExpressionTemplate< Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, Containers::Expressions::Log10 > @@ -1587,7 +1503,6 @@ log10( const Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >& template< typename L1, typename L2, template< typename, typename > class LOperation > -__cuda_callable__ const Containers::Expressions::UnaryExpressionTemplate< Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, Containers::Expressions::Log2 > @@ -1600,7 +1515,6 @@ log2( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperatio template< typename L1, template< typename > class LOperation > -__cuda_callable__ const Containers::Expressions::UnaryExpressionTemplate< Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, Containers::Expressions::Log2 > @@ -1616,7 +1530,6 @@ log2( const Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >& template< typename L1, typename L2, template< typename, typename > class LOperation > -__cuda_callable__ const Containers::Expressions::UnaryExpressionTemplate< Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >, Containers::Expressions::Exp > @@ -1629,7 +1542,6 @@ exp( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation template< typename L1, template< typename > class LOperation > -__cuda_callable__ const Containers::Expressions::UnaryExpressionTemplate< Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >, Containers::Expressions::Exp > @@ -1770,6 +1682,60 @@ binaryAnd( const Containers::Expressions::UnaryExpressionTemplate< L1, LOperatio return ExpressionBinaryAnd( a ); } + +//// +// Scalar product +template< typename L1, + typename L2, + template< typename, typename > class LOperation, + typename R1, + typename R2, + template< typename, typename > class ROperation > +auto +operator,( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, + const Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) +-> decltype( TNL::sum( a * b ) ) +{ + return TNL::sum( a * b ); +} + +template< typename T1, + typename T2, + template< typename, typename > class Operation > +auto +operator,( const Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >& a, + const typename Containers::Expressions::BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) +-> decltype( TNL::sum( a * b ) ) +{ + return TNL::sum( a * b ); +} + +template< typename L1, + template< typename > class LOperation, + typename R1, + typename R2, + template< typename, typename > class ROperation > +auto +operator,( const Containers::Expressions::UnaryExpressionTemplate< L1, LOperation >& a, + const typename Containers::Expressions::BinaryExpressionTemplate< R1, R2, ROperation >& b ) +-> decltype( TNL::sum( a * b ) ) +{ + return TNL::sum( a * b ); +} + +template< typename L1, + typename L2, + template< typename, typename > class LOperation, + typename R1, + template< typename > class ROperation > +auto +operator,( const Containers::Expressions::BinaryExpressionTemplate< L1, L2, LOperation >& a, + const typename Containers::Expressions::UnaryExpressionTemplate< R1,ROperation >& b ) +-> decltype( TNL::sum( a * b ) ) +{ + return TNL::sum( a * b ); +} + //// // Output stream template< typename T1, diff --git a/src/TNL/Containers/Expressions/StaticExpressionTemplates.h b/src/TNL/Containers/Expressions/StaticExpressionTemplates.h index a1d6ae639..6a97948bc 100644 --- a/src/TNL/Containers/Expressions/StaticExpressionTemplates.h +++ b/src/TNL/Containers/Expressions/StaticExpressionTemplates.h @@ -1824,4 +1824,61 @@ binaryOr( const Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOpe return StaticExpressionBinaryOr( a ); } +//// +// Scalar product +template< typename L1, + typename L2, + template< typename, typename > class LOperation, + typename R1, + typename R2, + template< typename, typename > class ROperation > +__cuda_callable__ +auto +operator,( const Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, + const Containers::Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) +-> decltype( TNL::sum( a * b ) ) +{ + return TNL::sum( a * b ); +} + +template< typename T1, + typename T2, + template< typename, typename > class Operation > +__cuda_callable__ +auto +operator,( const Containers::Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >& a, + const typename Containers::Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) +-> decltype( TNL::sum( a * b ) ) +{ + return TNL::sum( a * b ); +} + +template< typename L1, + template< typename > class LOperation, + typename R1, + typename R2, + template< typename, typename > class ROperation > +__cuda_callable__ +auto +operator,( const Containers::Expressions::StaticUnaryExpressionTemplate< L1, LOperation >& a, + const typename Containers::Expressions::StaticBinaryExpressionTemplate< R1, R2, ROperation >& b ) +-> decltype( TNL::sum( a * b ) ) +{ + return TNL::sum( a * b ); +} + +template< typename L1, + typename L2, + template< typename, typename > class LOperation, + typename R1, + template< typename > class ROperation > +__cuda_callable__ +auto +operator,( const Containers::Expressions::StaticBinaryExpressionTemplate< L1, L2, LOperation >& a, + const typename Containers::Expressions::StaticUnaryExpressionTemplate< R1,ROperation >& b ) +-> decltype( TNL::sum( a * b ) ) +{ + return TNL::sum( a * b ); +} + } // namespace TNL diff --git a/src/TNL/Containers/StaticVectorExpressions.h b/src/TNL/Containers/StaticVectorExpressions.h index 311cc07d2..9b6b78d64 100644 --- a/src/TNL/Containers/StaticVectorExpressions.h +++ b/src/TNL/Containers/StaticVectorExpressions.h @@ -588,6 +588,32 @@ binaryAnd( const Containers::StaticVector< Size, Real >& a ) return Containers::Expressions::StaticExpressionBinaryAnd( a ); } +//// +// Scalar product +template< int Size, typename Real, typename ET > +__cuda_callable__ +auto operator,( const Containers::StaticVector< Size, Real >& a, const ET& b ) +->decltype( TNL::sum( a * b ) ) +{ + return TNL::sum( a * b ); +} + +template< typename ET, int Size, typename Real > +__cuda_callable__ +auto operator,( const ET& a, const Containers::StaticVector< Size, Real >& b ) +->decltype( TNL::sum( a * b ) ) +{ + return TNL::sum( a * b ); +} + +template< typename Real1, int Size, typename Real2 > +__cuda_callable__ +auto operator,( const Containers::StaticVector< Size, Real1 >& a, const Containers::StaticVector< Size, Real2 >& b ) +->decltype( TNL::sum( a * b ) ) +{ + return TNL::sum( a * b ); +} + //// // TODO: Replace this with multiplication when its safe template< int Size, typename Real, typename ET > diff --git a/src/TNL/Containers/Vector.h b/src/TNL/Containers/Vector.h index 457a556e2..084055607 100644 --- a/src/TNL/Containers/Vector.h +++ b/src/TNL/Containers/Vector.h @@ -18,7 +18,7 @@ namespace Containers { template< typename Real, typename Device, typename Index > class VectorView; - + /** * \brief This class extends TNL::Array with algebraic operations. * @@ -180,13 +180,13 @@ public: __cuda_callable__ const Real& operator[]( const Index& i ) const; Vector& operator = ( const Vector& v ); - + template< typename Real_, typename Device_, typename Index_ > Vector& operator = ( const Vector< Real_, Device_, Index_ >& v ); template< typename Real_, typename Device_, typename Index_ > Vector& operator = ( const VectorView< Real_, Device_, Index_ >& v ); - + template< typename VectorExpression > Vector& operator = ( const VectorExpression& expression ); @@ -226,6 +226,14 @@ public: template< typename Scalar > Vector& operator /= ( const Scalar c ); + /** + * \brief Scalar product + * @param v + * @return + */ + template< typename Vector_ > + Real operator, ( const Vector_& v ) const; + /** * \brief Returns the maximum value out of all vector elements. */ diff --git a/src/TNL/Containers/Vector.hpp b/src/TNL/Containers/Vector.hpp index dc6fce8b2..26b34d803 100644 --- a/src/TNL/Containers/Vector.hpp +++ b/src/TNL/Containers/Vector.hpp @@ -329,6 +329,17 @@ Real Vector< Real, Device, Index >::min() const } +template< typename Real, + typename Device, + typename Index > + template< typename Vector_ > +Real Vector< Real, Device, Index >:: +operator,( const Vector_& v ) const +{ + static_assert( std::is_same< DeviceType, typename Vector_::DeviceType >::value, "Cannot compute product of vectors allocated on different devices." ); + return Algorithms::VectorOperations< Device >::getScalarProduct( *this, v ); +} + template< typename Real, typename Device, typename Index > diff --git a/src/TNL/Containers/VectorView.h b/src/TNL/Containers/VectorView.h index 6b8626397..59dc29f83 100644 --- a/src/TNL/Containers/VectorView.h +++ b/src/TNL/Containers/VectorView.h @@ -127,6 +127,14 @@ public: template< typename Real_, typename Device_, typename Index_ > bool operator!=( const VectorView< Real_, Device_, Index_ >& v ); + /** + * \brief Scalar product + * @param v + * @return + */ + template< typename Vector_ > + NonConstReal operator, ( const Vector_& v ) const; + NonConstReal max() const; NonConstReal min() const; diff --git a/src/TNL/Containers/VectorViewExpressions.h b/src/TNL/Containers/VectorViewExpressions.h index 077a6651a..f83641f2f 100644 --- a/src/TNL/Containers/VectorViewExpressions.h +++ b/src/TNL/Containers/VectorViewExpressions.h @@ -515,36 +515,6 @@ sign( const Containers::VectorView< Real, Device, Index >& a ) return Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Sign >( a ); } - -//// -// TODO: Replace this with multiplication when its safe -template< typename Real, typename Device, typename Index, typename ET > -__cuda_callable__ -Containers::VectorView< Real, Device, Index > -Scale( const Containers::VectorView< Real, Device, Index >& a, const ET& b ) -{ - Containers::VectorView< Real, Device, Index > result = Containers::Expressions::BinaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, ET, Containers::Expressions::Multiplication >( a, b ); - return result; -} - -template< typename ET, typename Real, typename Device, typename Index > -__cuda_callable__ -Containers::Expressions::BinaryExpressionTemplate< ET, Containers::VectorView< Real, Device, Index >, Containers::Expressions::Multiplication > -Scale( const ET& a, const Containers::VectorView< Real, Device, Index >& b ) -{ - Containers::VectorView< Real, Device, Index > result = Containers::Expressions::BinaryExpressionTemplate< ET, Containers::VectorView< Real, Device, Index >, Containers::Expressions::Multiplication >( a, b ); - return result; -} - -template< typename Real1, typename Real2, typename Device, typename Index > -__cuda_callable__ -Containers::Expressions::BinaryExpressionTemplate< Containers::VectorView< Real1, Device, Index >, Containers::VectorView< Real2, Device, Index >, Containers::Expressions::Multiplication > -Scale( const Containers::VectorView< Real1, Device, Index >& a, const Containers::VectorView< Real2, Device, Index >& b ) -{ - Containers::VectorView< Real1, Device, Index > result = Containers::Expressions::BinaryExpressionTemplate< Containers::VectorView< Real1, Device, Index >, Containers::VectorView< Real2, Device, Index >, Containers::Expressions::Multiplication >( a, b ); - return result; -} - //// // Vertical operations - min template< typename Real, @@ -629,4 +599,57 @@ binaryAnd( const Containers::VectorView< Real, Device, Index >& a ) return Containers::Expressions::ExpressionBinaryAnd( a ); } +//// +// Scalar product +template< typename Real, typename Device, typename Index, typename ET > +__cuda_callable__ +Real operator,( const Containers::VectorView< Real, Device, Index >& a, const ET& b ) +{ + return TNL::sum( a * b ); +} + +template< typename ET, typename Real, typename Device, typename Index > +__cuda_callable__ +Real operator,( const ET& a, const Containers::VectorView< Real, Device, Index >& b ) +{ + return TNL::sum( a * b ); +} + +template< typename Real1, typename Real2, typename Device, typename Index > +__cuda_callable__ +auto operator,( const Containers::VectorView< Real1, Device, Index >& a, const Containers::VectorView< Real2, Device, Index >& b ) +->decltype( TNL::sum( a * b ) ) +{ + return TNL::sum( a * b ); +} + +//// +// TODO: Replace this with multiplication when its safe +template< typename Real, typename Device, typename Index, typename ET > +__cuda_callable__ +Containers::VectorView< Real, Device, Index > +Scale( const Containers::VectorView< Real, Device, Index >& a, const ET& b ) +{ + Containers::VectorView< Real, Device, Index > result = Containers::Expressions::BinaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, ET, Containers::Expressions::Multiplication >( a, b ); + return result; +} + +template< typename ET, typename Real, typename Device, typename Index > +__cuda_callable__ +Containers::Expressions::BinaryExpressionTemplate< ET, Containers::VectorView< Real, Device, Index >, Containers::Expressions::Multiplication > +Scale( const ET& a, const Containers::VectorView< Real, Device, Index >& b ) +{ + Containers::VectorView< Real, Device, Index > result = Containers::Expressions::BinaryExpressionTemplate< ET, Containers::VectorView< Real, Device, Index >, Containers::Expressions::Multiplication >( a, b ); + return result; +} + +template< typename Real1, typename Real2, typename Device, typename Index > +__cuda_callable__ +Containers::Expressions::BinaryExpressionTemplate< Containers::VectorView< Real1, Device, Index >, Containers::VectorView< Real2, Device, Index >, Containers::Expressions::Multiplication > +Scale( const Containers::VectorView< Real1, Device, Index >& a, const Containers::VectorView< Real2, Device, Index >& b ) +{ + Containers::VectorView< Real1, Device, Index > result = Containers::Expressions::BinaryExpressionTemplate< Containers::VectorView< Real1, Device, Index >, Containers::VectorView< Real2, Device, Index >, Containers::Expressions::Multiplication >( a, b ); + return result; +} + } // namespace TNL diff --git a/src/TNL/Containers/VectorView_impl.h b/src/TNL/Containers/VectorView_impl.h index d8b927d51..2433721b7 100644 --- a/src/TNL/Containers/VectorView_impl.h +++ b/src/TNL/Containers/VectorView_impl.h @@ -197,6 +197,18 @@ operator!=( const VectorView< Real_, Device_, Index_ >& v ) return !ArrayView< Real, Device, Index >::operator ==( v ); } +template< typename Real, + typename Device, + typename Index > + template< typename Vector_ > +typename VectorView< Real, Device, Index >::NonConstReal +VectorView< Real, Device, Index >:: +operator,( const Vector_& v ) const +{ + static_assert( std::is_same< DeviceType, typename Vector_::DeviceType >::value, "Cannot compute product of vectors allocated on different devices." ); + return Algorithms::VectorOperations< Device >::getScalarProduct( *this, v ); +} + template< typename Real, typename Device, diff --git a/src/UnitTests/Containers/VectorTest-7.h b/src/UnitTests/Containers/VectorTest-7.h index 6a2c8b590..4843383d7 100644 --- a/src/UnitTests/Containers/VectorTest-7.h +++ b/src/UnitTests/Containers/VectorTest-7.h @@ -76,6 +76,27 @@ TYPED_TEST( VectorTest, verticalOperations ) EXPECT_NEAR( lpNorm( u - v, 2.0 ), l2NormDiff, 2.0e-5 ); } +TYPED_TEST( VectorTest, scalarProduct ) +{ + using VectorType = typename TestFixture::VectorType; + using ViewType = typename TestFixture::ViewType; + using RealType = typename VectorType::RealType; + const int size = VECTOR_TEST_SIZE; + + VectorType _u( size ), _v( size ); + ViewType u( _u ), v( _v ); + RealType aux( 0.0 ); + for( int i = 0; i < size; i++ ) + { + const RealType x = i; + const RealType y = size / 2 - i; + u.setElement( i, x ); + v.setElement( i, y ); + aux += x * y; + } + EXPECT_NEAR( ( u, v ), aux, 1.0e-5 ); +} + #endif // HAVE_GTEST -- GitLab From 9ad7cd4dbddd5ae4563fa0ba505315d3c9cf9300 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Mon, 6 May 2019 13:51:51 +0200 Subject: [PATCH 31/93] Added test for blas to cmake. --- CMakeLists.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7d0f8ca5a..2659e8c69 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -317,6 +317,11 @@ if( ${WITH_TESTS} ) endif() endif() +FindBLAS() +if( BLAS_FOUND ) + set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHAVE_BLAS" ) + include_directories( ${JPEG_INCLUDE_DIRS} ) +endif() #if( BUILD_MPI ) # FIND_PATH( PETSC_INCLUDE_DIR petsc.h # /usr/include/petsc -- GitLab From 51642ea916c696630115291d0e61956df8945f1c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Mon, 6 May 2019 20:40:27 +0200 Subject: [PATCH 32/93] Added BLAS wrappers for BLAS benchmark. --- CMakeLists.txt | 4 +- src/Benchmarks/BLAS/CMakeLists.txt | 1 + src/Benchmarks/BLAS/blasWrappers.h | 111 ++++++++++++++++++++++++ src/Benchmarks/BLAS/vector-operations.h | 2 + 4 files changed, 116 insertions(+), 2 deletions(-) create mode 100644 src/Benchmarks/BLAS/blasWrappers.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 2659e8c69..ccc0d9750 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -317,10 +317,10 @@ if( ${WITH_TESTS} ) endif() endif() -FindBLAS() +find_package( BLAS ) if( BLAS_FOUND ) set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHAVE_BLAS" ) - include_directories( ${JPEG_INCLUDE_DIRS} ) + #message( "BLAS: ${BLAS_LIBRARIES}" ) endif() #if( BUILD_MPI ) # FIND_PATH( PETSC_INCLUDE_DIR petsc.h diff --git a/src/Benchmarks/BLAS/CMakeLists.txt b/src/Benchmarks/BLAS/CMakeLists.txt index a6c99a994..4e5a1199b 100644 --- a/src/Benchmarks/BLAS/CMakeLists.txt +++ b/src/Benchmarks/BLAS/CMakeLists.txt @@ -4,5 +4,6 @@ if( BUILD_CUDA ) else() ADD_EXECUTABLE( tnl-benchmark-blas tnl-benchmark-blas.cpp ) endif() +target_link_libraries( tnl-benchmark-blas ${BLAS_LIBRARIES} ) install( TARGETS tnl-benchmark-blas RUNTIME DESTINATION bin ) diff --git a/src/Benchmarks/BLAS/blasWrappers.h b/src/Benchmarks/BLAS/blasWrappers.h new file mode 100644 index 000000000..c54981af4 --- /dev/null +++ b/src/Benchmarks/BLAS/blasWrappers.h @@ -0,0 +1,111 @@ +#pragma once + +#ifdef HAVE_CUDA + +#ifdef HAVE_BLAS + +#include + +inline int blasIgamax( int n, const float *x, int incx ) +{ + return cblas_isamax( n, x, incx ); +} + +inline int blasIgamax( int n, const double *x, int incx ) +{ + return cblas_idamax( n, x, incx ); +} + + +inline int blasIgamin( int n, const float *x, int incx ) +{ + return cblas_Isamin( n, x, incx ); +} + +inline int blasIgamin( int n, const double *x, int incx ) +{ + return cblas_Idamin( n, x, incx ); +} + + +inline float blasGasum( int n, const float *x, int incx ) +{ + return cblas_sasum( n, x, incx ); +} + +inline double blasGasum( int n, const double *x, int incx ) +{ + return cblas_dasum( n, x, incx ); +} + + +inline void +blasGaxpy( int n, const float *alpha, + const float *x, int incx, + float *y, int incy ) +{ + cblas_saxpy( n, alpha, x, incx, y, incy ); +} + +inline blasStatus_t +blasGaxpy( blasHandle_t int n, + const double *alpha, + const double *x, int incx, + double *y, int incy ) +{ + return cblas_Daxpy( n, alpha, x, incx, y, incy ); +} + + +inline blasStatus_t +blasGdot( blasHandle_t int n, + const float *x, int incx, + const float *y, int incy, + float *result ) +{ + return cblas_Sdot( n, x, incx, y, incy, result ); +} + +inline blasStatus_t +blasGdot( blasHandle_t int n, + const double *x, int incx, + const double *y, int incy, + double *result ) +{ + return cblas_Ddot( n, x, incx, y, incy, result ); +} + + +inline blasStatus_t +blasGnrm2( blasHandle_t int n, + const float *x, int incx, float *result ) +{ + return cblas_Snrm2( n, x, incx, result ); +} + +inline blasStatus_t +blasGnrm2( blasHandle_t int n, + const double *x, int incx, double *result ) +{ + return cblas_Dnrm2( n, x, incx, result ); +} + + +inline blasStatus_t +blasGscal( blasHandle_t int n, + const float *alpha, + float *x, int incx ) +{ + return cblas_Sscal( n, alpha, x, incx ); +} + +inline blasStatus_t +blasGscal( blasHandle_t int n, + const double *alpha, + double *x, int incx ) +{ + return cblas_Dscal( n, alpha, x, incx ); +} + +#endif +#endif diff --git a/src/Benchmarks/BLAS/vector-operations.h b/src/Benchmarks/BLAS/vector-operations.h index f6006104f..a732726db 100644 --- a/src/Benchmarks/BLAS/vector-operations.h +++ b/src/Benchmarks/BLAS/vector-operations.h @@ -18,6 +18,8 @@ #include +#include "blasWrappers.h" + #ifdef HAVE_CUDA #include "cublasWrappers.h" #endif -- GitLab From 2b7be771b644dd5ab70d758791adf573073b3ccb Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Tue, 7 May 2019 13:37:49 +0200 Subject: [PATCH 33/93] Implementing BLAS benchmark. --- CMakeLists.txt | 3 +- src/Benchmarks/BLAS/CMakeLists.txt | 4 +- src/Benchmarks/BLAS/blasWrappers.h | 75 +++++++----------- src/Benchmarks/BLAS/vector-operations.h | 91 +++++++++++++++++++--- src/TNL/Containers/VectorViewExpressions.h | 62 --------------- 5 files changed, 111 insertions(+), 124 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ccc0d9750..2447f8540 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -320,7 +320,7 @@ endif() find_package( BLAS ) if( BLAS_FOUND ) set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHAVE_BLAS" ) - #message( "BLAS: ${BLAS_LIBRARIES}" ) + set( HAVE_BLAS TRUE) endif() #if( BUILD_MPI ) # FIND_PATH( PETSC_INCLUDE_DIR petsc.h @@ -455,3 +455,4 @@ message( " CMAKE_SHARED_LINKER_FLAGS_DEBUG = ${CMAKE_SHARED_LINKER_FLAGS_DEBUG message( " CMAKE_SHARED_LINKER_FLAGS_RELEASE = ${CMAKE_SHARED_LINKER_FLAGS_RELEASE}" ) message( " CUDA_NVCC_FLAGS = ${CUDA_NVCC_FLAGS}" ) message( " GMP_LIBRARIES = ${GMP_LIBRARIES}" ) +message( " BLAS_libraries = ${BLAS_LIBRARIES}" ) diff --git a/src/Benchmarks/BLAS/CMakeLists.txt b/src/Benchmarks/BLAS/CMakeLists.txt index 4e5a1199b..96611d003 100644 --- a/src/Benchmarks/BLAS/CMakeLists.txt +++ b/src/Benchmarks/BLAS/CMakeLists.txt @@ -4,6 +4,8 @@ if( BUILD_CUDA ) else() ADD_EXECUTABLE( tnl-benchmark-blas tnl-benchmark-blas.cpp ) endif() -target_link_libraries( tnl-benchmark-blas ${BLAS_LIBRARIES} ) +if( HAVE_BLAS ) + target_link_libraries( tnl-benchmark-blas ${BLAS_LIBRARIES} ) +endif() install( TARGETS tnl-benchmark-blas RUNTIME DESTINATION bin ) diff --git a/src/Benchmarks/BLAS/blasWrappers.h b/src/Benchmarks/BLAS/blasWrappers.h index c54981af4..d1e0edff1 100644 --- a/src/Benchmarks/BLAS/blasWrappers.h +++ b/src/Benchmarks/BLAS/blasWrappers.h @@ -1,7 +1,5 @@ #pragma once -#ifdef HAVE_CUDA - #ifdef HAVE_BLAS #include @@ -17,15 +15,15 @@ inline int blasIgamax( int n, const double *x, int incx ) } -inline int blasIgamin( int n, const float *x, int incx ) +/*inline int blasIgamin( int n, const float *x, int incx ) { - return cblas_Isamin( n, x, incx ); + return cblas_isamin( n, x, incx ); } inline int blasIgamin( int n, const double *x, int incx ) { - return cblas_Idamin( n, x, incx ); -} + return cblas_idamin( n, x, incx ); +}*/ inline float blasGasum( int n, const float *x, int incx ) @@ -39,73 +37,54 @@ inline double blasGasum( int n, const double *x, int incx ) } -inline void -blasGaxpy( int n, const float *alpha, - const float *x, int incx, - float *y, int incy ) +inline void blasGaxpy( int n, const float alpha, + const float *x, int incx, + float *y, int incy ) { cblas_saxpy( n, alpha, x, incx, y, incy ); } -inline blasStatus_t -blasGaxpy( blasHandle_t int n, - const double *alpha, - const double *x, int incx, - double *y, int incy ) +inline void blasGaxpy( int n, const double alpha, + const double* x, int incx, + double *y, int incy ) { - return cblas_Daxpy( n, alpha, x, incx, y, incy ); + cblas_daxpy( n, alpha, x, incx, y, incy ); } -inline blasStatus_t -blasGdot( blasHandle_t int n, - const float *x, int incx, - const float *y, int incy, - float *result ) +inline float blasGdot( int n, const float* x, int incx, + const float* y, int incy ) { - return cblas_Sdot( n, x, incx, y, incy, result ); + return cblas_sdot( n, x, incx, y, incy ); } -inline blasStatus_t -blasGdot( blasHandle_t int n, - const double *x, int incx, - const double *y, int incy, - double *result ) +inline double blasGdot( int n, const double* x, int incx, + const double* y, int incy ) { - return cblas_Ddot( n, x, incx, y, incy, result ); + return cblas_ddot( n, x, incx, y, incy ); } -inline blasStatus_t -blasGnrm2( blasHandle_t int n, - const float *x, int incx, float *result ) +inline float blasGnrm2( int n, const float* x, int incx ) { - return cblas_Snrm2( n, x, incx, result ); + return cblas_snrm2( n, x, incx ); } -inline blasStatus_t -blasGnrm2( blasHandle_t int n, - const double *x, int incx, double *result ) +inline double blasGnrm2( int n, const double* x, int incx ) { - return cblas_Dnrm2( n, x, incx, result ); + return cblas_dnrm2( n, x, incx ); } -inline blasStatus_t -blasGscal( blasHandle_t int n, - const float *alpha, - float *x, int incx ) +inline void blasGscal( int n, const float alpha, + float* x, int incx ) { - return cblas_Sscal( n, alpha, x, incx ); + cblas_sscal( n, alpha, x, incx ); } -inline blasStatus_t -blasGscal( blasHandle_t int n, - const double *alpha, - double *x, int incx ) +inline void blasGscal( int n, const double alpha, + double* x, int incx ) { - return cblas_Dscal( n, alpha, x, incx ); + cblas_dscal( n, alpha, x, incx ); } - -#endif #endif diff --git a/src/Benchmarks/BLAS/vector-operations.h b/src/Benchmarks/BLAS/vector-operations.h index a732726db..32c843b43 100644 --- a/src/Benchmarks/BLAS/vector-operations.h +++ b/src/Benchmarks/BLAS/vector-operations.h @@ -18,7 +18,9 @@ #include +#ifdef HAVE_BLAS #include "blasWrappers.h" +#endif #ifdef HAVE_CUDA #include "cublasWrappers.h" @@ -87,10 +89,10 @@ benchmarkVectorOperations( Benchmark & benchmark, reset2(); }; - reset12(); - + //// + // Max auto maxHost = [&]() { resultHost = hostVector.max(); }; @@ -112,7 +114,8 @@ benchmarkVectorOperations( Benchmark & benchmark, benchmark.time< Devices::Cuda >( reset1, "GPU ET", maxCudaET ); #endif - + //// + // Min auto minHost = [&]() { resultHost = hostVector.min(); }; @@ -133,7 +136,8 @@ benchmarkVectorOperations( Benchmark & benchmark, benchmark.time< Devices::Cuda >( reset1, "GPU", minCudaET ); #endif - + //// + // Absmax auto absMaxHost = [&]() { resultHost = hostVector.absMax(); }; @@ -146,6 +150,12 @@ benchmarkVectorOperations( Benchmark & benchmark, auto absMaxCudaET = [&]() { resultDevice = max( abs( deviceView ) ); }; +#ifdef HAVE_BLAS + auto absMaxBlas = [&]() { + int index = blasIgamax( size, hostVector.getData(), 1 ); + resultHost = hostVector.getElement( index ); + }; +#endif #ifdef HAVE_CUDA auto absMaxCublas = [&]() { int index = 0; @@ -158,13 +168,15 @@ benchmarkVectorOperations( Benchmark & benchmark, benchmark.setOperation( "absMax", datasetSize ); benchmark.time< Devices::Host >( reset1, "CPU", absMaxHost ); benchmark.time< Devices::Host >( reset1, "CPU ET", absMaxHostET ); + benchmark.time< Devices::Host >( reset1, "BLAS", absMaxBlas ); #ifdef HAVE_CUDA benchmark.time< Devices::Cuda >( reset1, "GPU", absMaxCuda ); benchmark.time< Devices::Cuda >( reset1, "GPU ET", absMaxCudaET ); benchmark.time< Devices::Cuda >( reset1, "cuBLAS", absMaxCublas ); #endif - + //// + // Absmin auto absMinHost = [&]() { resultHost = hostVector.absMin(); }; @@ -177,6 +189,12 @@ benchmarkVectorOperations( Benchmark & benchmark, auto absMinCudaET = [&]() { resultDevice = min( abs( deviceView ) ); }; +/*#ifdef HAVE_BLAS + auto absMinBlas = [&]() { + int index = blasIgamin( size, hostVector.getData(), 1 ); + resultHost = hostVector.getElement( index ); + }; +#endif*/ #ifdef HAVE_CUDA auto absMinCublas = [&]() { int index = 0; @@ -188,14 +206,16 @@ benchmarkVectorOperations( Benchmark & benchmark, #endif benchmark.setOperation( "absMin", datasetSize ); benchmark.time< Devices::Host >( reset1, "CPU", absMinHost ); - benchmark.time< Devices::Host >( reset1, "CPU", absMinHostET ); + benchmark.time< Devices::Host >( reset1, "CPU ET", absMinHostET ); + //benchmark.time< Devices::Host >( reset1, "BLAS", absMinBlas ); #ifdef HAVE_CUDA benchmark.time< Devices::Cuda >( reset1, "GPU", absMinCuda ); benchmark.time< Devices::Cuda >( reset1, "GPU", absMinCudaET ); benchmark.time< Devices::Cuda >( reset1, "cuBLAS", absMinCublas ); #endif - + //// + // Sum auto sumHost = [&]() { resultHost = hostVector.sum(); }; @@ -216,7 +236,8 @@ benchmarkVectorOperations( Benchmark & benchmark, benchmark.time< Devices::Cuda >( reset1, "GPU", sumCudaET ); #endif - + //// + // L1 norm auto l1normHost = [&]() { resultHost = hostVector.lpNorm( 1.0 ); }; @@ -245,7 +266,8 @@ benchmarkVectorOperations( Benchmark & benchmark, benchmark.time< Devices::Cuda >( reset1, "cuBLAS", l1normCublas ); #endif - + //// + // L2 norm auto l2normHost = [&]() { resultHost = hostVector.lpNorm( 2.0 ); }; @@ -258,6 +280,11 @@ benchmarkVectorOperations( Benchmark & benchmark, auto l2normCudaET = [&]() { resultDevice = lpNorm( deviceView, 2.0 ); }; +#ifdef HAVE_BLAS + auto l2normBlas = [&]() { + resultHost = blasGnrm2( size, hostVector.getData(), 1 ); + }; +#endif #ifdef HAVE_CUDA auto l2normCublas = [&]() { cublasGnrm2( cublasHandle, size, @@ -268,13 +295,15 @@ benchmarkVectorOperations( Benchmark & benchmark, benchmark.setOperation( "l2 norm", datasetSize ); benchmark.time< Devices::Host >( reset1, "CPU", l2normHost ); benchmark.time< Devices::Host >( reset1, "CPU", l2normHostET ); + benchmark.time< Devices::Host >( reset1, "BLAS", l2normBlas ); #ifdef HAVE_CUDA benchmark.time< Devices::Cuda >( reset1, "GPU", l2normCuda ); benchmark.time< Devices::Cuda >( reset1, "GPU", l2normCudaET ); benchmark.time< Devices::Cuda >( reset1, "cuBLAS", l2normCublas ); #endif - + //// + // L3 norm auto l3normHost = [&]() { resultHost = hostVector.lpNorm( 3.0 ); }; @@ -296,13 +325,26 @@ benchmarkVectorOperations( Benchmark & benchmark, benchmark.time< Devices::Cuda >( reset1, "GPU", l3normCudaET ); #endif - + //// + // Scalar product auto scalarProductHost = [&]() { resultHost = hostVector.scalarProduct( hostVector2 ); }; auto scalarProductCuda = [&]() { resultDevice = deviceVector.scalarProduct( deviceVector2 ); }; + auto scalarProductHostET = [&]() { + resultHost = ( hostView, hostView2 ); + }; + auto scalarProductCudaET = [&]() { + resultDevice = ( deviceView, deviceView2 ); + }; + +#ifdef HAVE_BLAS + auto scalarProductBlas = [&]() { + resultHost = blasGdot( size, hostVector.getData(), 1, hostVector2.getData(), 1 ); + }; +#endif #ifdef HAVE_CUDA auto scalarProductCublas = [&]() { cublasGdot( cublasHandle, size, @@ -313,11 +355,16 @@ benchmarkVectorOperations( Benchmark & benchmark, #endif benchmark.setOperation( "scalar product", 2 * datasetSize ); benchmark.time< Devices::Host >( reset1, "CPU", scalarProductHost ); + benchmark.time< Devices::Host >( reset1, "CPU ET", scalarProductHostET ); + benchmark.time< Devices::Host >( reset1, "BLAS", scalarProductBlas ); #ifdef HAVE_CUDA benchmark.time< Devices::Cuda >( reset1, "GPU", scalarProductCuda ); + benchmark.time< Devices::Cuda >( reset1, "GPU ET", scalarProductCudaET ); benchmark.time< Devices::Cuda >( reset1, "cuBLAS", scalarProductCublas ); #endif + //// + // Prefix sum /* std::cout << "Benchmarking prefix-sum:" << std::endl; timer.reset(); @@ -348,6 +395,8 @@ benchmarkVectorOperations( Benchmark & benchmark, */ + //// + // Scalar multiplication auto multiplyHost = [&]() { hostVector *= 0.5; }; @@ -369,13 +418,28 @@ benchmarkVectorOperations( Benchmark & benchmark, benchmark.time< Devices::Cuda >( reset1, "cuBLAS", multiplyCublas ); #endif - + //// + // Vector addition auto addVectorHost = [&]() { hostVector.addVector( hostVector2 ); }; auto addVectorCuda = [&]() { deviceVector.addVector( deviceVector2 ); }; + auto addVectorHostET = [&]() { + hostView += hostView2; + }; + auto addVectorCudaET = [&]() { + deviceView += deviceView2; + }; +#ifdef HAVE_CUDA + auto addVectorBlas = [&]() { + const Real alpha = 1.0; + blasGaxpy( size, alpha, + deviceVector2.getData(), 1, + deviceVector.getData(), 1 ); + }; +#endif #ifdef HAVE_CUDA auto addVectorCublas = [&]() { const Real alpha = 1.0; @@ -387,8 +451,11 @@ benchmarkVectorOperations( Benchmark & benchmark, #endif benchmark.setOperation( "vector addition", 3 * datasetSize ); benchmark.time< Devices::Host >( reset1, "CPU", addVectorHost ); + benchmark.time< Devices::Host >( reset1, "CPU ET", addVectorHostET ); + benchmark.time< Devices::Host >( reset1, "BLAS", addVectorBlas ); #ifdef HAVE_CUDA benchmark.time< Devices::Cuda >( reset1, "GPU", addVectorCuda ); + benchmark.time< Devices::Cuda >( reset1, "GPU ET", addVectorCudaET ); benchmark.time< Devices::Cuda >( reset1, "cuBLAS", addVectorCublas ); #endif diff --git a/src/TNL/Containers/VectorViewExpressions.h b/src/TNL/Containers/VectorViewExpressions.h index f83641f2f..b57089fd7 100644 --- a/src/TNL/Containers/VectorViewExpressions.h +++ b/src/TNL/Containers/VectorViewExpressions.h @@ -24,7 +24,6 @@ namespace TNL { //// // Addition template< typename Real, typename Device, typename Index, typename ET > -__cuda_callable__ const Containers::Expressions::BinaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, ET, Containers::Expressions::Addition > operator+( const Containers::VectorView< Real, Device, Index >& a, const ET& b ) { @@ -32,7 +31,6 @@ operator+( const Containers::VectorView< Real, Device, Index >& a, const ET& b ) } template< typename ET, typename Real, typename Device, typename Index > -__cuda_callable__ const Containers::Expressions::BinaryExpressionTemplate< ET, Containers::VectorView< Real, Device, Index >, Containers::Expressions::Addition > operator+( const ET& a, const Containers::VectorView< Real, Device, Index >& b ) { @@ -40,7 +38,6 @@ operator+( const ET& a, const Containers::VectorView< Real, Device, Index >& b ) } template< typename Real1, typename Real2, typename Device, typename Index > -__cuda_callable__ const Containers::Expressions::BinaryExpressionTemplate< Containers::VectorView< Real1, Device, Index >, Containers::VectorView< Real2, Device, Index >, Containers::Expressions::Addition > operator+( const Containers::VectorView< Real1, Device, Index >& a, const Containers::VectorView< Real2, Device, Index >& b ) { @@ -50,7 +47,6 @@ operator+( const Containers::VectorView< Real1, Device, Index >& a, const Contai //// // Subtraction template< typename Real, typename Device, typename Index, typename ET > -__cuda_callable__ const Containers::Expressions::BinaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, ET, Containers::Expressions::Subtraction > operator-( const Containers::VectorView< Real, Device, Index >& a, const ET& b ) { @@ -58,7 +54,6 @@ operator-( const Containers::VectorView< Real, Device, Index >& a, const ET& b ) } template< typename ET, typename Real, typename Device, typename Index > -__cuda_callable__ const Containers::Expressions::BinaryExpressionTemplate< ET, Containers::VectorView< Real, Device, Index >, Containers::Expressions::Subtraction > operator-( const ET& a, const Containers::VectorView< Real, Device, Index >& b ) { @@ -66,7 +61,6 @@ operator-( const ET& a, const Containers::VectorView< Real, Device, Index >& b ) } template< typename Real1, typename Real2, typename Device, typename Index > -__cuda_callable__ const Containers::Expressions::BinaryExpressionTemplate< Containers::VectorView< Real1, Device, Index >, Containers::VectorView< Real2, Device, Index >, Containers::Expressions::Subtraction > operator-( const Containers::VectorView< Real1, Device, Index >& a, const Containers::VectorView< Real2, Device, Index >& b ) { @@ -76,7 +70,6 @@ operator-( const Containers::VectorView< Real1, Device, Index >& a, const Contai //// // Multiplication template< typename Real, typename Device, typename Index, typename ET > -__cuda_callable__ const Containers::Expressions::BinaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, ET, Containers::Expressions::Multiplication > operator*( const Containers::VectorView< Real, Device, Index >& a, const ET& b ) { @@ -84,7 +77,6 @@ operator*( const Containers::VectorView< Real, Device, Index >& a, const ET& b ) } template< typename ET, typename Real, typename Device, typename Index > -__cuda_callable__ const Containers::Expressions::BinaryExpressionTemplate< ET, Containers::VectorView< Real, Device, Index >, Containers::Expressions::Multiplication > operator*( const ET& a, const Containers::VectorView< Real, Device, Index >& b ) { @@ -92,7 +84,6 @@ operator*( const ET& a, const Containers::VectorView< Real, Device, Index >& b ) } template< typename Real1, typename Real2, typename Device, typename Index > -__cuda_callable__ const Containers::Expressions::BinaryExpressionTemplate< Containers::VectorView< Real1, Device, Index >, Containers::VectorView< Real2, Device, Index >, Containers::Expressions::Multiplication > operator*( const Containers::VectorView< Real1, Device, Index >& a, const Containers::VectorView< Real2, Device, Index >& b ) { @@ -102,7 +93,6 @@ operator*( const Containers::VectorView< Real1, Device, Index >& a, const Contai //// // Division template< typename Real, typename Device, typename Index, typename ET > -__cuda_callable__ const Containers::Expressions::BinaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, ET, Containers::Expressions::Division > operator/( const Containers::VectorView< Real, Device, Index >& a, const ET& b ) { @@ -110,7 +100,6 @@ operator/( const Containers::VectorView< Real, Device, Index >& a, const ET& b ) } template< typename ET, typename Real, typename Device, typename Index > -__cuda_callable__ const Containers::Expressions::BinaryExpressionTemplate< ET, Containers::VectorView< Real, Device, Index >, Containers::Expressions::Division > operator/( const ET& a, const Containers::VectorView< Real, Device, Index >& b ) { @@ -118,7 +107,6 @@ operator/( const ET& a, const Containers::VectorView< Real, Device, Index >& b ) } template< typename Real1, typename Real2, typename Device, typename Index > -__cuda_callable__ const Containers::Expressions::BinaryExpressionTemplate< Containers::VectorView< Real1, Device, Index >, Containers::VectorView< Real2, Device, Index >, Containers::Expressions::Division > operator/( const Containers::VectorView< Real1, Device, Index >& a, const Containers::VectorView< Real2, Device, Index >& b ) { @@ -128,7 +116,6 @@ operator/( const Containers::VectorView< Real1, Device, Index >& a, const Contai //// // Min template< typename Real, typename Device, typename Index, typename ET > -__cuda_callable__ const Containers::Expressions::BinaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, ET, Containers::Expressions::Min > min( const Containers::VectorView< Real, Device, Index >& a, const ET& b ) { @@ -136,7 +123,6 @@ min( const Containers::VectorView< Real, Device, Index >& a, const ET& b ) } template< typename ET, typename Real, typename Device, typename Index > -__cuda_callable__ const Containers::Expressions::BinaryExpressionTemplate< ET, Containers::VectorView< Real, Device, Index >, Containers::Expressions::Min > min( const ET& a, const Containers::VectorView< Real, Device, Index >& b ) { @@ -144,7 +130,6 @@ min( const ET& a, const Containers::VectorView< Real, Device, Index >& b ) } template< typename Real1, typename Real2, typename Device, typename Index > -__cuda_callable__ const Containers::Expressions::BinaryExpressionTemplate< Containers::VectorView< Real1, Device, Index >, Containers::VectorView< Real2, Device, Index >, Containers::Expressions::Min > min( const Containers::VectorView< Real1, Device, Index >& a, const Containers::VectorView< Real2, Device, Index >& b ) { @@ -154,7 +139,6 @@ min( const Containers::VectorView< Real1, Device, Index >& a, const Containers:: //// // Max template< typename Real, typename Device, typename Index, typename ET > -__cuda_callable__ const Containers::Expressions::BinaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, ET, Containers::Expressions::Max > max( const Containers::VectorView< Real, Device, Index >& a, const ET& b ) { @@ -162,7 +146,6 @@ max( const Containers::VectorView< Real, Device, Index >& a, const ET& b ) } template< typename ET, typename Real, typename Device, typename Index > -__cuda_callable__ const Containers::Expressions::BinaryExpressionTemplate< ET, Containers::VectorView< Real, Device, Index >, Containers::Expressions::Max > max( const ET& a, const Containers::VectorView< Real, Device, Index >& b ) { @@ -170,7 +153,6 @@ max( const ET& a, const Containers::VectorView< Real, Device, Index >& b ) } template< typename Real1, typename Real2, typename Device, typename Index > -__cuda_callable__ const Containers::Expressions::BinaryExpressionTemplate< Containers::VectorView< Real1, Device, Index >, Containers::VectorView< Real2, Device, Index >, Containers::Expressions::Max > max( const Containers::VectorView< Real1, Device, Index >& a, const Containers::VectorView< Real2, Device, Index >& b ) { @@ -180,21 +162,18 @@ max( const Containers::VectorView< Real1, Device, Index >& a, const Containers:: //// // Comparison operations - operator == template< typename Real, typename Device, typename Index, typename ET > -__cuda_callable__ bool operator==( const Containers::VectorView< Real, Device, Index >& a, const ET& b ) { return Containers::Expressions::ComparisonEQ( a, b ); } template< typename ET, typename Real, typename Device, typename Index > -__cuda_callable__ bool operator==( const ET& a, const Containers::VectorView< Real, Device, Index >& b ) { return Containers::Expressions::ComparisonEQ( a, b ); } template< typename Real1, typename Real2, typename Device, typename Index > -__cuda_callable__ bool operator==( const Containers::VectorView< Real1, Device, Index >& a, const Containers::VectorView< Real2, Device, Index >& b ) { return Containers::Expressions::ComparisonEQ( a, b ); @@ -203,21 +182,18 @@ bool operator==( const Containers::VectorView< Real1, Device, Index >& a, const //// // Comparison operations - operator != template< typename Real, typename Device, typename Index, typename ET > -__cuda_callable__ bool operator!=( const Containers::VectorView< Real, Device, Index >& a, const ET& b ) { return Containers::Expressions::ComparisonNE( a, b ); } template< typename ET, typename Real, typename Device, typename Index > -__cuda_callable__ bool operator!=( const ET& a, const Containers::VectorView< Real, Device, Index >& b ) { return Containers::Expressions::ComparisonNE( a, b ); } template< typename Real1, typename Real2, typename Device, typename Index > -__cuda_callable__ bool operator!=( const Containers::VectorView< Real1, Device, Index >& a, const Containers::VectorView< Real2, Device, Index >& b ) { return Containers::Expressions::ComparisonNE( a, b ); @@ -226,21 +202,18 @@ bool operator!=( const Containers::VectorView< Real1, Device, Index >& a, const //// // Comparison operations - operator < template< typename Real, typename Device, typename Index, typename ET > -__cuda_callable__ bool operator<( const Containers::VectorView< Real, Device, Index >& a, const ET& b ) { return Containers::Expressions::ComparisonLT( a, b ); } template< typename ET, typename Real, typename Device, typename Index > -__cuda_callable__ bool operator<( const ET& a, const Containers::VectorView< Real, Device, Index >& b ) { return Containers::Expressions::ComparisonLT( a, b ); } template< typename Real1, typename Real2, typename Device, typename Index > -__cuda_callable__ bool operator<( const Containers::VectorView< Real1, Device, Index >& a, const Containers::VectorView< Real2, Device, Index >& b ) { return Containers::Expressions::ComparisonLT( a, b ); @@ -249,21 +222,18 @@ bool operator<( const Containers::VectorView< Real1, Device, Index >& a, const C //// // Comparison operations - operator <= template< typename Real, typename Device, typename Index, typename ET > -__cuda_callable__ bool operator<=( const Containers::VectorView< Real, Device, Index >& a, const ET& b ) { return Containers::Expressions::ComparisonLE( a, b ); } template< typename ET, typename Real, typename Device, typename Index > -__cuda_callable__ bool operator<=( const ET& a, const Containers::VectorView< Real, Device, Index >& b ) { return Containers::Expressions::ComparisonLE( a, b ); } template< typename Real1, typename Real2, typename Device, typename Index > -__cuda_callable__ bool operator<=( const Containers::VectorView< Real1, Device, Index >& a, const Containers::VectorView< Real2, Device, Index >& b ) { return Containers::Expressions::ComparisonLE( a, b ); @@ -272,21 +242,18 @@ bool operator<=( const Containers::VectorView< Real1, Device, Index >& a, const //// // Comparison operations - operator > template< typename Real, typename Device, typename Index, typename ET > -__cuda_callable__ bool operator>( const Containers::VectorView< Real, Device, Index >& a, const ET& b ) { return Containers::Expressions::ComparisonGT( a, b ); } template< typename ET, typename Real, typename Device, typename Index > -__cuda_callable__ bool operator>( const ET& a, const Containers::VectorView< Real, Device, Index >& b ) { return Containers::Expressions::ComparisonGT( a, b ); } template< typename Real1, typename Real2, typename Device, typename Index > -__cuda_callable__ bool operator>( const Containers::VectorView< Real1, Device, Index >& a, const Containers::VectorView< Real2, Device, Index >& b ) { return Containers::Expressions::ComparisonGT( a, b ); @@ -295,21 +262,18 @@ bool operator>( const Containers::VectorView< Real1, Device, Index >& a, const C //// // Comparison operations - operator >= template< typename Real, typename Device, typename Index, typename ET > -__cuda_callable__ bool operator>=( const Containers::VectorView< Real, Device, Index >& a, const ET& b ) { return Containers::Expressions::ComparisonGE( a, b ); } template< typename ET, typename Real, typename Device, typename Index > -__cuda_callable__ bool operator>=( const ET& a, const Containers::VectorView< Real, Device, Index >& b ) { return Containers::Expressions::ComparisonGE( a, b ); } template< typename Real1, typename Real2, typename Device, typename Index > -__cuda_callable__ bool operator>=( const Containers::VectorView< Real1, Device, Index >& a, const Containers::VectorView< Real2, Device, Index >& b ) { return Containers::Expressions::ComparisonGE( a, b ); @@ -318,7 +282,6 @@ bool operator>=( const Containers::VectorView< Real1, Device, Index >& a, const //// // Minus template< typename Real, typename Device, typename Index > -__cuda_callable__ const Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Minus > operator-( const Containers::VectorView< Real, Device, Index >& a ) { @@ -328,7 +291,6 @@ operator-( const Containers::VectorView< Real, Device, Index >& a ) //// // Abs template< typename Real, typename Device, typename Index > -__cuda_callable__ const Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Abs > abs( const Containers::VectorView< Real, Device, Index >& a ) { @@ -338,7 +300,6 @@ abs( const Containers::VectorView< Real, Device, Index >& a ) //// // Sine template< typename Real, typename Device, typename Index > -__cuda_callable__ const Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Sin > sin( const Containers::VectorView< Real, Device, Index >& a ) { @@ -348,7 +309,6 @@ sin( const Containers::VectorView< Real, Device, Index >& a ) //// // Cosine template< typename Real, typename Device, typename Index > -__cuda_callable__ const Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Cos > cos( const Containers::VectorView< Real, Device, Index >& a ) { @@ -358,7 +318,6 @@ cos( const Containers::VectorView< Real, Device, Index >& a ) //// // Tangent template< typename Real, typename Device, typename Index > -__cuda_callable__ const Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Tan > tan( const Containers::VectorView< Real, Device, Index >& a ) { @@ -368,7 +327,6 @@ tan( const Containers::VectorView< Real, Device, Index >& a ) //// // Sqrt template< typename Real, typename Device, typename Index > -__cuda_callable__ const Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Sqrt > sqrt( const Containers::VectorView< Real, Device, Index >& a ) { @@ -378,7 +336,6 @@ sqrt( const Containers::VectorView< Real, Device, Index >& a ) //// // Cbrt template< typename Real, typename Device, typename Index > -__cuda_callable__ const Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Cbrt > cbrt( const Containers::VectorView< Real, Device, Index >& a ) { @@ -388,7 +345,6 @@ cbrt( const Containers::VectorView< Real, Device, Index >& a ) //// // Power template< typename Real, typename Device, typename Index, typename ExpType > -__cuda_callable__ const Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Pow, ExpType > pow( const Containers::VectorView< Real, Device, Index >& a, const ExpType& exp ) { @@ -398,7 +354,6 @@ pow( const Containers::VectorView< Real, Device, Index >& a, const ExpType& exp //// // Floor template< typename Real, typename Device, typename Index > -__cuda_callable__ const Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Floor > floor( const Containers::VectorView< Real, Device, Index >& a ) { @@ -408,7 +363,6 @@ floor( const Containers::VectorView< Real, Device, Index >& a ) //// // Ceil template< typename Real, typename Device, typename Index > -__cuda_callable__ const Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Ceil > ceil( const Containers::VectorView< Real, Device, Index >& a ) { @@ -418,7 +372,6 @@ ceil( const Containers::VectorView< Real, Device, Index >& a ) //// // Acos template< typename Real, typename Device, typename Index > -__cuda_callable__ const Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Acos > acos( const Containers::VectorView< Real, Device, Index >& a ) { @@ -428,7 +381,6 @@ acos( const Containers::VectorView< Real, Device, Index >& a ) //// // Asin template< typename Real, typename Device, typename Index > -__cuda_callable__ const Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Asin > asin( const Containers::VectorView< Real, Device, Index >& a ) { @@ -438,7 +390,6 @@ asin( const Containers::VectorView< Real, Device, Index >& a ) //// // Atan template< typename Real, typename Device, typename Index > -__cuda_callable__ const Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Atan > atan( const Containers::VectorView< Real, Device, Index >& a ) { @@ -448,7 +399,6 @@ atan( const Containers::VectorView< Real, Device, Index >& a ) //// // Cosh template< typename Real, typename Device, typename Index > -__cuda_callable__ const Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Cosh > cosh( const Containers::VectorView< Real, Device, Index >& a ) { @@ -458,7 +408,6 @@ cosh( const Containers::VectorView< Real, Device, Index >& a ) //// // Tanh template< typename Real, typename Device, typename Index > -__cuda_callable__ const Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Tanh > tanh( const Containers::VectorView< Real, Device, Index >& a ) { @@ -468,7 +417,6 @@ tanh( const Containers::VectorView< Real, Device, Index >& a ) //// // Log template< typename Real, typename Device, typename Index > -__cuda_callable__ const Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Log > log( const Containers::VectorView< Real, Device, Index >& a ) { @@ -478,7 +426,6 @@ log( const Containers::VectorView< Real, Device, Index >& a ) //// // Log10 template< typename Real, typename Device, typename Index > -__cuda_callable__ const Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Log10 > log10( const Containers::VectorView< Real, Device, Index >& a ) { @@ -488,7 +435,6 @@ log10( const Containers::VectorView< Real, Device, Index >& a ) //// // Log2 template< typename Real, typename Device, typename Index > -__cuda_callable__ const Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Log2 > log2( const Containers::VectorView< Real, Device, Index >& a ) { @@ -498,7 +444,6 @@ log2( const Containers::VectorView< Real, Device, Index >& a ) //// // Exp template< typename Real, typename Device, typename Index > -__cuda_callable__ const Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Exp > exp( const Containers::VectorView< Real, Device, Index >& a ) { @@ -508,7 +453,6 @@ exp( const Containers::VectorView< Real, Device, Index >& a ) //// // Sign template< typename Real, typename Device, typename Index > -__cuda_callable__ const Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Sign > sign( const Containers::VectorView< Real, Device, Index >& a ) { @@ -602,21 +546,18 @@ binaryAnd( const Containers::VectorView< Real, Device, Index >& a ) //// // Scalar product template< typename Real, typename Device, typename Index, typename ET > -__cuda_callable__ Real operator,( const Containers::VectorView< Real, Device, Index >& a, const ET& b ) { return TNL::sum( a * b ); } template< typename ET, typename Real, typename Device, typename Index > -__cuda_callable__ Real operator,( const ET& a, const Containers::VectorView< Real, Device, Index >& b ) { return TNL::sum( a * b ); } template< typename Real1, typename Real2, typename Device, typename Index > -__cuda_callable__ auto operator,( const Containers::VectorView< Real1, Device, Index >& a, const Containers::VectorView< Real2, Device, Index >& b ) ->decltype( TNL::sum( a * b ) ) { @@ -626,7 +567,6 @@ auto operator,( const Containers::VectorView< Real1, Device, Index >& a, const C //// // TODO: Replace this with multiplication when its safe template< typename Real, typename Device, typename Index, typename ET > -__cuda_callable__ Containers::VectorView< Real, Device, Index > Scale( const Containers::VectorView< Real, Device, Index >& a, const ET& b ) { @@ -635,7 +575,6 @@ Scale( const Containers::VectorView< Real, Device, Index >& a, const ET& b ) } template< typename ET, typename Real, typename Device, typename Index > -__cuda_callable__ Containers::Expressions::BinaryExpressionTemplate< ET, Containers::VectorView< Real, Device, Index >, Containers::Expressions::Multiplication > Scale( const ET& a, const Containers::VectorView< Real, Device, Index >& b ) { @@ -644,7 +583,6 @@ Scale( const ET& a, const Containers::VectorView< Real, Device, Index >& b ) } template< typename Real1, typename Real2, typename Device, typename Index > -__cuda_callable__ Containers::Expressions::BinaryExpressionTemplate< Containers::VectorView< Real1, Device, Index >, Containers::VectorView< Real2, Device, Index >, Containers::Expressions::Multiplication > Scale( const Containers::VectorView< Real1, Device, Index >& a, const Containers::VectorView< Real2, Device, Index >& b ) { -- GitLab From b1db16cac6453164205311633dcf7c387ec5fd64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Tue, 7 May 2019 16:26:20 +0200 Subject: [PATCH 34/93] Fixed BLAS benchmark. --- src/Benchmarks/BLAS/vector-operations.h | 38 ++++++++++++------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/src/Benchmarks/BLAS/vector-operations.h b/src/Benchmarks/BLAS/vector-operations.h index 32c843b43..7d71d2ba3 100644 --- a/src/Benchmarks/BLAS/vector-operations.h +++ b/src/Benchmarks/BLAS/vector-operations.h @@ -130,10 +130,10 @@ benchmarkVectorOperations( Benchmark & benchmark, }; benchmark.setOperation( "min", datasetSize ); benchmark.time< Devices::Host >( reset1, "CPU", minHost ); - benchmark.time< Devices::Host >( reset1, "CPU", minHostET ); + benchmark.time< Devices::Host >( reset1, "CPU ET", minHostET ); #ifdef HAVE_CUDA benchmark.time< Devices::Cuda >( reset1, "GPU", minCuda ); - benchmark.time< Devices::Cuda >( reset1, "GPU", minCudaET ); + benchmark.time< Devices::Cuda >( reset1, "GPU ET", minCudaET ); #endif //// @@ -168,7 +168,7 @@ benchmarkVectorOperations( Benchmark & benchmark, benchmark.setOperation( "absMax", datasetSize ); benchmark.time< Devices::Host >( reset1, "CPU", absMaxHost ); benchmark.time< Devices::Host >( reset1, "CPU ET", absMaxHostET ); - benchmark.time< Devices::Host >( reset1, "BLAS", absMaxBlas ); + benchmark.time< Devices::Host >( reset1, "CPU BLAS", absMaxBlas ); #ifdef HAVE_CUDA benchmark.time< Devices::Cuda >( reset1, "GPU", absMaxCuda ); benchmark.time< Devices::Cuda >( reset1, "GPU ET", absMaxCudaET ); @@ -207,10 +207,10 @@ benchmarkVectorOperations( Benchmark & benchmark, benchmark.setOperation( "absMin", datasetSize ); benchmark.time< Devices::Host >( reset1, "CPU", absMinHost ); benchmark.time< Devices::Host >( reset1, "CPU ET", absMinHostET ); - //benchmark.time< Devices::Host >( reset1, "BLAS", absMinBlas ); + //benchmark.time< Devices::Host >( reset1, "CPU BLAS", absMinBlas ); #ifdef HAVE_CUDA benchmark.time< Devices::Cuda >( reset1, "GPU", absMinCuda ); - benchmark.time< Devices::Cuda >( reset1, "GPU", absMinCudaET ); + benchmark.time< Devices::Cuda >( reset1, "GPU ET", absMinCudaET ); benchmark.time< Devices::Cuda >( reset1, "cuBLAS", absMinCublas ); #endif @@ -230,10 +230,10 @@ benchmarkVectorOperations( Benchmark & benchmark, }; benchmark.setOperation( "sum", datasetSize ); benchmark.time< Devices::Host >( reset1, "CPU", sumHost ); - benchmark.time< Devices::Host >( reset1, "CPU", sumHostET ); + benchmark.time< Devices::Host >( reset1, "CPU ET", sumHostET ); #ifdef HAVE_CUDA benchmark.time< Devices::Cuda >( reset1, "GPU", sumCuda ); - benchmark.time< Devices::Cuda >( reset1, "GPU", sumCudaET ); + benchmark.time< Devices::Cuda >( reset1, "GPU ET", sumCudaET ); #endif //// @@ -259,10 +259,10 @@ benchmarkVectorOperations( Benchmark & benchmark, #endif benchmark.setOperation( "l1 norm", datasetSize ); benchmark.time< Devices::Host >( reset1, "CPU", l1normHost ); - benchmark.time< Devices::Host >( reset1, "CPU", l1normHostET ); + benchmark.time< Devices::Host >( reset1, "CPU ET", l1normHostET ); #ifdef HAVE_CUDA benchmark.time< Devices::Cuda >( reset1, "GPU", l1normCuda ); - benchmark.time< Devices::Cuda >( reset1, "GPU", l1normCudaET ); + benchmark.time< Devices::Cuda >( reset1, "GPU ET", l1normCudaET ); benchmark.time< Devices::Cuda >( reset1, "cuBLAS", l1normCublas ); #endif @@ -294,11 +294,11 @@ benchmarkVectorOperations( Benchmark & benchmark, #endif benchmark.setOperation( "l2 norm", datasetSize ); benchmark.time< Devices::Host >( reset1, "CPU", l2normHost ); - benchmark.time< Devices::Host >( reset1, "CPU", l2normHostET ); - benchmark.time< Devices::Host >( reset1, "BLAS", l2normBlas ); + benchmark.time< Devices::Host >( reset1, "CPU ET", l2normHostET ); + benchmark.time< Devices::Host >( reset1, "CPU BLAS", l2normBlas ); #ifdef HAVE_CUDA benchmark.time< Devices::Cuda >( reset1, "GPU", l2normCuda ); - benchmark.time< Devices::Cuda >( reset1, "GPU", l2normCudaET ); + benchmark.time< Devices::Cuda >( reset1, "GPU ET", l2normCudaET ); benchmark.time< Devices::Cuda >( reset1, "cuBLAS", l2normCublas ); #endif @@ -319,10 +319,10 @@ benchmarkVectorOperations( Benchmark & benchmark, benchmark.setOperation( "l3 norm", datasetSize ); benchmark.time< Devices::Host >( reset1, "CPU", l3normHost ); - benchmark.time< Devices::Host >( reset1, "CPU", l3normHostET ); + benchmark.time< Devices::Host >( reset1, "CPU ET", l3normHostET ); #ifdef HAVE_CUDA benchmark.time< Devices::Cuda >( reset1, "GPU", l3normCuda ); - benchmark.time< Devices::Cuda >( reset1, "GPU", l3normCudaET ); + benchmark.time< Devices::Cuda >( reset1, "GPU ET", l3normCudaET ); #endif //// @@ -356,7 +356,7 @@ benchmarkVectorOperations( Benchmark & benchmark, benchmark.setOperation( "scalar product", 2 * datasetSize ); benchmark.time< Devices::Host >( reset1, "CPU", scalarProductHost ); benchmark.time< Devices::Host >( reset1, "CPU ET", scalarProductHostET ); - benchmark.time< Devices::Host >( reset1, "BLAS", scalarProductBlas ); + benchmark.time< Devices::Host >( reset1, "CPU BLAS", scalarProductBlas ); #ifdef HAVE_CUDA benchmark.time< Devices::Cuda >( reset1, "GPU", scalarProductCuda ); benchmark.time< Devices::Cuda >( reset1, "GPU ET", scalarProductCudaET ); @@ -432,12 +432,12 @@ benchmarkVectorOperations( Benchmark & benchmark, auto addVectorCudaET = [&]() { deviceView += deviceView2; }; -#ifdef HAVE_CUDA +#ifdef HAVE_BLAS auto addVectorBlas = [&]() { const Real alpha = 1.0; blasGaxpy( size, alpha, - deviceVector2.getData(), 1, - deviceVector.getData(), 1 ); + hostVector2.getData(), 1, + hostVector.getData(), 1 ); }; #endif #ifdef HAVE_CUDA @@ -452,7 +452,7 @@ benchmarkVectorOperations( Benchmark & benchmark, benchmark.setOperation( "vector addition", 3 * datasetSize ); benchmark.time< Devices::Host >( reset1, "CPU", addVectorHost ); benchmark.time< Devices::Host >( reset1, "CPU ET", addVectorHostET ); - benchmark.time< Devices::Host >( reset1, "BLAS", addVectorBlas ); + benchmark.time< Devices::Host >( reset1, "CPU BLAS", addVectorBlas ); #ifdef HAVE_CUDA benchmark.time< Devices::Cuda >( reset1, "GPU", addVectorCuda ); benchmark.time< Devices::Cuda >( reset1, "GPU ET", addVectorCudaET ); -- GitLab From c81fb4ccb25f79c9d34f95ad74979d940ee88d09 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Tue, 7 May 2019 17:52:32 +0200 Subject: [PATCH 35/93] Fixed vector addition and added vector addition benchmark to BLAS benchmark. --- src/Benchmarks/BLAS/vector-operations.h | 172 ++++++++++++++++-- .../Algorithms/VectorOperationsCuda_impl.h | 19 +- 2 files changed, 172 insertions(+), 19 deletions(-) diff --git a/src/Benchmarks/BLAS/vector-operations.h b/src/Benchmarks/BLAS/vector-operations.h index 7d71d2ba3..1ae37f3d2 100644 --- a/src/Benchmarks/BLAS/vector-operations.h +++ b/src/Benchmarks/BLAS/vector-operations.h @@ -44,17 +44,17 @@ benchmarkVectorOperations( Benchmark & benchmark, double datasetSize = (double) size * sizeof( Real ) / oneGB; - HostVector hostVector, hostVector2; - CudaVector deviceVector, deviceVector2; - hostVector.setSize( size ); - hostVector2.setSize( size ); + HostVector hostVector( size ), hostVector2( size ), hostVector3( size ), hostVector4( size ); + CudaVector deviceVector, deviceVector2, deviceVector3, deviceVector4; #ifdef HAVE_CUDA deviceVector.setSize( size ); deviceVector2.setSize( size ); + deviceVector3.setSize( size ); + deviceVector4.setSize( size ); #endif - HostView hostView( hostVector ), hostView2( hostVector2 ); - CudaView deviceView( deviceVector ), deviceView2( deviceVector2 ); + HostView hostView( hostVector ), hostView2( hostVector2 ), hostView3( hostVector3 ), hostView4( hostVector4 ); + CudaView deviceView( deviceVector ), deviceView2( deviceVector2 ), deviceView3( deviceVector3 ), deviceView4( deviceVector4 ); Real resultHost, resultDevice; @@ -84,12 +84,28 @@ benchmarkVectorOperations( Benchmark & benchmark, deviceVector2.setValue( 1.0 ); #endif }; - auto reset12 = [&]() { + auto reset3 = [&]() { + hostVector3.setValue( 1.0 ); +#ifdef HAVE_CUDA + deviceVector3.setValue( 1.0 ); +#endif + }; + auto reset4 = [&]() { + hostVector4.setValue( 1.0 ); +#ifdef HAVE_CUDA + deviceVector4.setValue( 1.0 ); +#endif + }; + + + auto resetAll = [&]() { reset1(); reset2(); + reset3(); + reset4(); }; - reset12(); + resetAll(); //// // Max @@ -168,7 +184,9 @@ benchmarkVectorOperations( Benchmark & benchmark, benchmark.setOperation( "absMax", datasetSize ); benchmark.time< Devices::Host >( reset1, "CPU", absMaxHost ); benchmark.time< Devices::Host >( reset1, "CPU ET", absMaxHostET ); +#ifdef HAVE_BLAS benchmark.time< Devices::Host >( reset1, "CPU BLAS", absMaxBlas ); +#endif #ifdef HAVE_CUDA benchmark.time< Devices::Cuda >( reset1, "GPU", absMaxCuda ); benchmark.time< Devices::Cuda >( reset1, "GPU ET", absMaxCudaET ); @@ -250,6 +268,11 @@ benchmarkVectorOperations( Benchmark & benchmark, auto l1normCudaET = [&]() { resultDevice = lpNorm( deviceView, 1.0 ); }; +#ifdef HAVE_BLAS + auto l1normBlas = [&]() { + resultHost = blasGasum( size, hostVector.getData(), 1 ); + }; +#endif #ifdef HAVE_CUDA auto l1normCublas = [&]() { cublasGasum( cublasHandle, size, @@ -260,6 +283,9 @@ benchmarkVectorOperations( Benchmark & benchmark, benchmark.setOperation( "l1 norm", datasetSize ); benchmark.time< Devices::Host >( reset1, "CPU", l1normHost ); benchmark.time< Devices::Host >( reset1, "CPU ET", l1normHostET ); +#ifdef HAVE_BLAS + benchmark.time< Devices::Host >( reset1, "CPU BLAS", l1normBlas ); +#endif #ifdef HAVE_CUDA benchmark.time< Devices::Cuda >( reset1, "GPU", l1normCuda ); benchmark.time< Devices::Cuda >( reset1, "GPU ET", l1normCudaET ); @@ -295,7 +321,9 @@ benchmarkVectorOperations( Benchmark & benchmark, benchmark.setOperation( "l2 norm", datasetSize ); benchmark.time< Devices::Host >( reset1, "CPU", l2normHost ); benchmark.time< Devices::Host >( reset1, "CPU ET", l2normHostET ); +#ifdef HAVE_BLAS benchmark.time< Devices::Host >( reset1, "CPU BLAS", l2normBlas ); +#endif #ifdef HAVE_CUDA benchmark.time< Devices::Cuda >( reset1, "GPU", l2normCuda ); benchmark.time< Devices::Cuda >( reset1, "GPU ET", l2normCudaET ); @@ -356,7 +384,9 @@ benchmarkVectorOperations( Benchmark & benchmark, benchmark.setOperation( "scalar product", 2 * datasetSize ); benchmark.time< Devices::Host >( reset1, "CPU", scalarProductHost ); benchmark.time< Devices::Host >( reset1, "CPU ET", scalarProductHostET ); +#ifdef HAVE_BLAS benchmark.time< Devices::Host >( reset1, "CPU BLAS", scalarProductBlas ); +#endif #ifdef HAVE_CUDA benchmark.time< Devices::Cuda >( reset1, "GPU", scalarProductCuda ); benchmark.time< Devices::Cuda >( reset1, "GPU ET", scalarProductCudaET ); @@ -450,15 +480,129 @@ benchmarkVectorOperations( Benchmark & benchmark, }; #endif benchmark.setOperation( "vector addition", 3 * datasetSize ); - benchmark.time< Devices::Host >( reset1, "CPU", addVectorHost ); - benchmark.time< Devices::Host >( reset1, "CPU ET", addVectorHostET ); - benchmark.time< Devices::Host >( reset1, "CPU BLAS", addVectorBlas ); + benchmark.time< Devices::Host >( resetAll, "CPU", addVectorHost ); + benchmark.time< Devices::Host >( resetAll, "CPU ET", addVectorHostET ); +#ifdef HAVE_BLAS + benchmark.time< Devices::Host >( resetAll, "CPU BLAS", addVectorBlas ); +#endif +#ifdef HAVE_CUDA + benchmark.time< Devices::Cuda >( resetAll, "GPU", addVectorCuda ); + benchmark.time< Devices::Cuda >( resetAll, "GPU ET", addVectorCudaET ); + benchmark.time< Devices::Cuda >( resetAll, "cuBLAS", addVectorCublas ); +#endif + + //// + // Two vectors addition + auto addTwoVectorsHost = [&]() { + hostVector.addVector( hostVector2 ); + hostVector.addVector( hostVector3 ); + }; + auto addTwoVectorsCuda = [&]() { + deviceVector.addVector( deviceVector2 ); + deviceVector.addVector( deviceVector3 ); + }; + auto addTwoVectorsHostET = [&]() { + hostView += hostView2 + hostView3; + }; + auto addTwoVectorsCudaET = [&]() { + deviceView += deviceView2 + deviceView3; + }; +#ifdef HAVE_BLAS + auto addTwoVectorsBlas = [&]() { + const Real alpha = 1.0; + blasGaxpy( size, alpha, + hostVector2.getData(), 1, + hostVector.getData(), 1 ); + blasGaxpy( size, alpha, + hostVector3.getData(), 1, + hostVector.getData(), 1 ); + }; +#endif +#ifdef HAVE_CUDA + auto addTwoVectorsCublas = [&]() { + const Real alpha = 1.0; + cublasGaxpy( cublasHandle, size, + &alpha, + deviceVector2.getData(), 1, + deviceVector.getData(), 1 ); + cublasGaxpy( cublasHandle, size, + &alpha, + deviceVector3.getData(), 1, + deviceVector.getData(), 1 ); + }; +#endif + benchmark.setOperation( "two vectors addition", 4 * datasetSize ); + benchmark.time< Devices::Host >( resetAll, "CPU", addTwoVectorsHost ); + benchmark.time< Devices::Host >( resetAll, "CPU ET", addTwoVectorsHostET ); +#ifdef HAVE_BLAS + benchmark.time< Devices::Host >( resetAll, "CPU BLAS", addTwoVectorsBlas ); +#endif #ifdef HAVE_CUDA - benchmark.time< Devices::Cuda >( reset1, "GPU", addVectorCuda ); - benchmark.time< Devices::Cuda >( reset1, "GPU ET", addVectorCudaET ); - benchmark.time< Devices::Cuda >( reset1, "cuBLAS", addVectorCublas ); + benchmark.time< Devices::Cuda >( resetAll, "GPU", addTwoVectorsCuda ); + benchmark.time< Devices::Cuda >( resetAll, "GPU ET", addTwoVectorsCudaET ); + benchmark.time< Devices::Cuda >( resetAll, "cuBLAS", addTwoVectorsCublas ); #endif + //// + // Three vectors addition + auto addThreeVectorsHost = [&]() { + hostVector.addVector( hostVector2 ); + hostVector.addVector( hostVector3 ); + hostVector.addVector( hostVector4 ); + }; + auto addThreeVectorsCuda = [&]() { + deviceVector.addVector( deviceVector2 ); + deviceVector.addVector( deviceVector3 ); + deviceVector.addVector( deviceVector4 ); + }; + auto addThreeVectorsHostET = [&]() { + hostView += hostView2 + hostView3 + hostView4; + }; + auto addThreeVectorsCudaET = [&]() { + deviceView += deviceView2 + deviceView3 + deviceView4; + }; +#ifdef HAVE_BLAS + auto addThreeVectorsBlas = [&]() { + const Real alpha = 1.0; + blasGaxpy( size, alpha, + hostVector2.getData(), 1, + hostVector.getData(), 1 ); + blasGaxpy( size, alpha, + hostVector3.getData(), 1, + hostVector.getData(), 1 ); + blasGaxpy( size, alpha, + hostVector4.getData(), 1, + hostVector.getData(), 1 ); + }; +#endif +#ifdef HAVE_CUDA + auto addThreeVectorsCublas = [&]() { + const Real alpha = 1.0; + cublasGaxpy( cublasHandle, size, + &alpha, + deviceVector2.getData(), 1, + deviceVector.getData(), 1 ); + cublasGaxpy( cublasHandle, size, + &alpha, + deviceVector3.getData(), 1, + deviceVector.getData(), 1 ); + cublasGaxpy( cublasHandle, size, + &alpha, + deviceVector4.getData(), 1, + deviceVector.getData(), 1 ); + }; +#endif + benchmark.setOperation( "three vectors addition", 5 * datasetSize ); + benchmark.time< Devices::Host >( resetAll, "CPU", addThreeVectorsHost ); + benchmark.time< Devices::Host >( resetAll, "CPU ET", addThreeVectorsHostET ); +#ifdef HAVE_BLAS + benchmark.time< Devices::Host >( resetAll, "CPU BLAS", addThreeVectorsBlas ); +#endif +#ifdef HAVE_CUDA + benchmark.time< Devices::Cuda >( resetAll, "GPU", addThreeVectorsCuda ); + benchmark.time< Devices::Cuda >( resetAll, "GPU ET", addThreeVectorsCudaET ); + benchmark.time< Devices::Cuda >( resetAll, "cuBLAS", addThreeVectorsCublas ); +#endif #ifdef HAVE_CUDA cublasDestroy( cublasHandle ); diff --git a/src/TNL/Containers/Algorithms/VectorOperationsCuda_impl.h b/src/TNL/Containers/Algorithms/VectorOperationsCuda_impl.h index ca7ce908a..e8e6555cf 100644 --- a/src/TNL/Containers/Algorithms/VectorOperationsCuda_impl.h +++ b/src/TNL/Containers/Algorithms/VectorOperationsCuda_impl.h @@ -403,18 +403,27 @@ vectorAddVectorCudaKernel( Real1* y, template< typename Vector1, typename Vector2, typename Scalar1, typename Scalar2 > void VectorOperations< Devices::Cuda >:: -addVector( Vector1& y, +addVector( Vector1& _y, const Vector2& x, const Scalar1 alpha, const Scalar2 thisMultiplicator ) { TNL_ASSERT_GT( x.getSize(), 0, "Vector size must be positive." ); - TNL_ASSERT_EQ( x.getSize(), y.getSize(), "The vector sizes must be the same." ); + TNL_ASSERT_EQ( x.getSize(), _y.getSize(), "The vector sizes must be the same." ); #ifdef HAVE_CUDA - typedef typename Vector1::IndexType Index; + using IndexType = typename Vector1::IndexType; + using RealType = typename Vector1::RealType; + + RealType* y = _y.getData(); + auto add1 = [=] __cuda_callable__ ( IndexType i ) { y[ i ] += alpha * x[ i ]; }; + auto add2 = [=] __cuda_callable__ ( IndexType i ) { y[ i ] = thisMultiplicator * y[ i ] + alpha * x[ i ]; }; - const Index& size = x.getSize(); + if( thisMultiplicator == 1.0 ) + ParallelFor< Devices::Cuda >::exec( ( IndexType ) 0, _y.getSize(), add1 ); + else + ParallelFor< Devices::Cuda >::exec( ( IndexType ) 0, _y.getSize(), add2 ); + /*const Index& size = x.getSize(); dim3 cudaBlockSize( 256 ); dim3 cudaBlocks; cudaBlocks.x = TNL::min( Devices::Cuda::getMaxGridSize(), Devices::Cuda::getNumberOfBlocks( size, cudaBlockSize.x ) ); @@ -423,7 +432,7 @@ addVector( Vector1& y, x.getData(), size, alpha, - thisMultiplicator); + thisMultiplicator);*/ TNL_CHECK_CUDA_DEVICE; #else throw Exceptions::CudaSupportMissing(); -- GitLab From 7a51ba8aaba59163b55b1661372592e01f1e1017 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Tue, 7 May 2019 18:13:14 +0200 Subject: [PATCH 36/93] Added option for turning off build with BLAS. --- CMakeLists.txt | 10 ++++++---- build | 7 ++++++- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2447f8540..f179cc30a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -317,10 +317,12 @@ if( ${WITH_TESTS} ) endif() endif() -find_package( BLAS ) -if( BLAS_FOUND ) - set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHAVE_BLAS" ) - set( HAVE_BLAS TRUE) +if( ${WITH_BLAS} ) + find_package( BLAS ) + if( BLAS_FOUND ) + set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHAVE_BLAS" ) + set( HAVE_BLAS TRUE) + endif() endif() #if( BUILD_MPI ) # FIND_PATH( PETSC_INCLUDE_DIR petsc.h diff --git a/build b/build index c009a2608..989e63ad7 100755 --- a/build +++ b/build @@ -28,6 +28,7 @@ WITH_EXAMPLES="yes" WITH_PYTHON="yes" WITH_TOOLS="yes" WITH_BENCHMARKS="yes" +WITH_BLAS="yes" WITH_TEMPLATE_INSTANTIATION="no" INSTANTIATE_LONG_INT="no" @@ -63,6 +64,7 @@ do --with-examples=* ) WITH_EXAMPLES="${option#*=}" ;; --with-tools=* ) WITH_TOOLS="${option#*=}" ;; --with-benchmarks=* ) WITH_BENCHMARKS="${option#*=}" ;; + --with-blas=* ) WITH_BLAS="${option#*=}" ;; --with-python=* ) WITH_PYTHON="${option#*=}" ;; --with-templates-instantiation=* ) WITH_TEMPLATE_INSTANTIATION="${option#*=}" ;; --instantiate-long-int=* ) INSTANTIATE_LONG_INT="${option#*=}" ;; @@ -102,6 +104,8 @@ if [[ ${HELP} == "yes" ]]; then echo " --with-examples=yes/no Compile the 'examples' directory. 'yes' by default." echo " --with-tools=yes/no Compile the 'src/Tools' directory. 'yes' by default." echo " --with-python=yes/no Compile the Python bindings. 'yes' by default." + echo " --with-benchmarks=yes/no Compile the 'src/Benchmarks' directory. 'yes' by default." + echo " --with-blas=yes/no Compile the BLAS support - for benchmark purposes only. 'yes' by default." echo " --with-templates-instantiation=yes/no Precompiles some TNL templates during the build. 'no' by default." echo " --cmake=CMAKE Path to cmake. 'cmake' by default." echo " --verbose It enables verbose build." @@ -172,8 +176,9 @@ cmake_command=( -DWITH_COVERAGE=${WITH_COVERAGE} -DWITH_EXAMPLES=${WITH_EXAMPLES} -DWITH_TOOLS=${WITH_TOOLS} - -DWITH_BENCHMARKS=${WITH_BENCHMARKS} -DWITH_PYTHON=${WITH_PYTHON} + -DWITH_BENCHMARKS=${WITH_BENCHMARKS} + -DWITH_BLAS=${WITH_BLAS} -DDCMTK_DIR=${DCMTK_DIR} -DWITH_TEMPLATE_INSTANTIATION=${WITH_TEMPLATE_INSTANTIATION} -DINSTANTIATE_FLOAT=${INSTANTIATE_FLOAT} -- GitLab From 7a49e4780f22b667d8280ad1ad66d2f0707b165e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Tue, 7 May 2019 23:06:20 +0200 Subject: [PATCH 37/93] Optimized CUDA reduction by decreasing desired grid size. --- src/TNL/Containers/Algorithms/CudaReductionKernel.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/TNL/Containers/Algorithms/CudaReductionKernel.h b/src/TNL/Containers/Algorithms/CudaReductionKernel.h index 3ef43a055..d7a711cc7 100644 --- a/src/TNL/Containers/Algorithms/CudaReductionKernel.h +++ b/src/TNL/Containers/Algorithms/CudaReductionKernel.h @@ -195,11 +195,16 @@ struct CudaReductionKernelLauncher // where blocksPerMultiprocessor is determined according to the number of // available registers on the multiprocessor. // On Tesla K40c, desGridSize = 8 * 15 = 120. + // + // Update: + // It seems to be better to map only one CUDA block per one multiprocessor or maybe + // just slightly more. Therefore we omit blocksdPerMultiprocessor in the following. CudaReductionKernelLauncher( const Index size ) : activeDevice( Devices::CudaDeviceInfo::getActiveDevice() ), blocksdPerMultiprocessor( Devices::CudaDeviceInfo::getRegistersPerMultiprocessor( activeDevice ) / ( Reduction_maxThreadsPerBlock * Reduction_registersPerThread ) ), - desGridSize( blocksdPerMultiprocessor * Devices::CudaDeviceInfo::getCudaMultiprocessors( activeDevice ) ), + //desGridSize( blocksdPerMultiprocessor * Devices::CudaDeviceInfo::getCudaMultiprocessors( activeDevice ) ), + desGridSize( Devices::CudaDeviceInfo::getCudaMultiprocessors( activeDevice ) ), originalSize( size ) { } -- GitLab From 61f8e317c6d03c0257d9391dca6aca58e5d27a83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Wed, 8 May 2019 22:14:20 +0200 Subject: [PATCH 38/93] Implementing reduction with argument. --- .../Algorithms/CudaReductionKernel.h | 470 ++++++++++++++---- src/TNL/Containers/Algorithms/Reduction.h | 112 +++-- src/TNL/Containers/Algorithms/Reduction.hpp | 260 +++++++++- .../Expressions/VerticalOperations.h | 82 +++ src/TNL/Containers/StaticVectorExpressions.h | 16 + src/TNL/Containers/VectorViewExpressions.h | 18 + src/UnitTests/Containers/VectorTest-7.h | 22 +- 7 files changed, 810 insertions(+), 170 deletions(-) diff --git a/src/TNL/Containers/Algorithms/CudaReductionKernel.h b/src/TNL/Containers/Algorithms/CudaReductionKernel.h index d7a711cc7..70c3fbab0 100644 --- a/src/TNL/Containers/Algorithms/CudaReductionKernel.h +++ b/src/TNL/Containers/Algorithms/CudaReductionKernel.h @@ -74,22 +74,19 @@ CudaReductionKernel( const Result zero, * Read data into the shared memory. We start with the * sequential reduction. */ - while( gid + 4 * gridSize < size ) - { + while( gid + 4 * gridSize < size ) { reduction( sdata[ tid ], dataFetcher( gid ) ); reduction( sdata[ tid ], dataFetcher( gid + gridSize ) ); reduction( sdata[ tid ], dataFetcher( gid + 2 * gridSize ) ); reduction( sdata[ tid ], dataFetcher( gid + 3 * gridSize ) ); gid += 4 * gridSize; } - while( gid + 2 * gridSize < size ) - { + while( gid + 2 * gridSize < size ) { reduction( sdata[ tid ], dataFetcher( gid ) ); reduction( sdata[ tid ], dataFetcher( gid + gridSize ) ); gid += 2 * gridSize; } - while( gid < size ) - { + while( gid < size ) { reduction( sdata[ tid ], dataFetcher( gid ) ); gid += gridSize; } @@ -99,71 +96,51 @@ CudaReductionKernel( const Result zero, /*** * Perform the parallel reduction. */ - if( blockSize >= 1024 ) - { + if( blockSize >= 1024 ) { if( tid < 512 ) reduction( sdata[ tid ], sdata[ tid + 512 ] ); __syncthreads(); } - if( blockSize >= 512 ) - { + if( blockSize >= 512 ) { if( tid < 256 ) reduction( sdata[ tid ], sdata[ tid + 256 ] ); __syncthreads(); } - if( blockSize >= 256 ) - { + if( blockSize >= 256 ) { if( tid < 128 ) reduction( sdata[ tid ], sdata[ tid + 128 ] ); __syncthreads(); - //printf( "2: tid %d data %f \n", tid, sdata[ tid ] ); } - - if( blockSize >= 128 ) - { + if( blockSize >= 128 ) { if( tid < 64 ) reduction( sdata[ tid ], sdata[ tid + 64 ] ); __syncthreads(); - //printf( "3: tid %d data %f \n", tid, sdata[ tid ] ); } /*** * This runs in one warp so it is synchronized implicitly. */ - if( tid < 32 ) - { + if( tid < 32 ) { volatile ResultType* vsdata = sdata; - if( blockSize >= 64 ) - { + if( blockSize >= 64 ) { volatileReduction( vsdata[ tid ], vsdata[ tid + 32 ] ); - //printf( "4: tid %d data %f \n", tid, sdata[ tid ] ); } // TODO: If blocksize == 32, the following does not work // We do not check if tid < 16. Fix it!!! - if( blockSize >= 32 ) - { + if( blockSize >= 32 ) { volatileReduction( vsdata[ tid ], vsdata[ tid + 16 ] ); - //printf( "5: tid %d data %f \n", tid, sdata[ tid ] ); } - if( blockSize >= 16 ) - { + if( blockSize >= 16 ) { volatileReduction( vsdata[ tid ], vsdata[ tid + 8 ] ); - //printf( "6: tid %d data %f \n", tid, sdata[ tid ] ); } - if( blockSize >= 8 ) - { + if( blockSize >= 8 ) { volatileReduction( vsdata[ tid ], vsdata[ tid + 4 ] ); - //printf( "7: tid %d data %f \n", tid, sdata[ tid ] ); } - if( blockSize >= 4 ) - { + if( blockSize >= 4 ) { volatileReduction( vsdata[ tid ], vsdata[ tid + 2 ] ); - //printf( "8: tid %d data %f \n", tid, sdata[ tid ] ); } - if( blockSize >= 2 ) - { + if( blockSize >= 2 ) { volatileReduction( vsdata[ tid ], vsdata[ tid + 1 ] ); - //printf( "9: tid %d data %f \n", tid, sdata[ tid ] ); } } @@ -171,13 +148,152 @@ CudaReductionKernel( const Result zero, * Store the result back in the global memory. */ if( tid == 0 ) - { - //printf( "Block %d result = %f \n", blockIdx.x, sdata[ 0 ] ); output[ blockIdx.x ] = sdata[ 0 ]; +} + +template< int blockSize, + typename Result, + typename DataFetcher, + typename Reduction, + typename VolatileReduction, + typename Index > +__global__ void +__launch_bounds__( Reduction_maxThreadsPerBlock, Reduction_minBlocksPerMultiprocessor ) +CudaReductionWithArgumentKernel( const Result zero, + const DataFetcher dataFetcher, + const Reduction reduction, + const VolatileReduction volatileReduction, + const Index size, + Result* output, + Index* idxOutput, + const Index* idxInput = nullptr ) +{ + using IndexType = Index; + using ResultType = Result; + + ResultType* sdata = Devices::Cuda::getSharedMemory< ResultType >(); + IndexType* sidx = static_cast< IndexType* >( static_cast< void* >( &sdata[ blockDim.x ] ) ); + + /*** + * Get thread id (tid) and global thread id (gid). + * gridSize is the number of element processed by all blocks at the + * same time. + */ + const IndexType tid = threadIdx.x; + IndexType gid = blockIdx.x * blockDim. x + threadIdx.x; + const IndexType gridSize = blockDim.x * gridDim.x; + + /*** + * Read data into the shared memory. We start with the + * sequential reduction. + */ + if( idxInput ) { + sdata[ tid ] = dataFetcher( gid ); + sidx[ tid ] = idxInput[ gid ]; + gid += gridSize; + while( gid + 4 * gridSize < size ) { + reduction( sidx[ tid ], idxInput[ gid ], sdata[ tid ], dataFetcher( gid ) ); + reduction( sidx[ tid ], idxInput[ gid + gridSize ], sdata[ tid ], dataFetcher( gid + gridSize ) ); + reduction( sidx[ tid ], idxInput[ gid + 2 * gridSize ], sdata[ tid ], dataFetcher( gid + 2 * gridSize ) ); + reduction( sidx[ tid ], idxInput[ gid + 3 * gridSize ], sdata[ tid ], dataFetcher( gid + 3 * gridSize ) ); + gid += 4 * gridSize; + } + while( gid + 2 * gridSize < size ) { + reduction( sidx[ tid ], idxInput[ gid ], sdata[ tid ], dataFetcher( gid ) ); + reduction( sidx[ tid ], idxInput[ gid + gridSize ], sdata[ tid ], dataFetcher( gid + gridSize ) ); + gid += 2 * gridSize; + } + while( gid < size ) { + reduction( sidx[ tid ], idxInput[ gid ], sdata[ tid ], dataFetcher( gid ) ); + gid += gridSize; + } + } + else { + sdata[ tid ] = dataFetcher( gid ); + sidx[ tid ] = gid; + gid += gridSize; + while( gid + 4 * gridSize < size ) { + reduction( sidx[ tid ], gid, sdata[ tid ], dataFetcher( gid ) ); + reduction( sidx[ tid ], gid + gridSize, sdata[ tid ], dataFetcher( gid + gridSize ) ); + reduction( sidx[ tid ], gid + 2 * gridSize, sdata[ tid ], dataFetcher( gid + 2 * gridSize ) ); + reduction( sidx[ tid ], gid + 3 * gridSize, sdata[ tid ], dataFetcher( gid + 3 * gridSize ) ); + gid += 4 * gridSize; + } + while( gid + 2 * gridSize < size ) { + reduction( sidx[ tid ], gid, sdata[ tid ], dataFetcher( gid ) ); + reduction( sidx[ tid ], gid + gridSize, sdata[ tid ], dataFetcher( gid + gridSize ) ); + gid += 2 * gridSize; + } + while( gid < size ) { + reduction( sidx[ tid ], gid, sdata[ tid ], dataFetcher( gid ) ); + gid += gridSize; + } + } + __syncthreads(); + + //printf( "1: tid %d data %f \n", tid, sdata[ tid ] ); + /*** + * Perform the parallel reduction. + */ + if( blockSize >= 1024 ) { + if( tid < 512 ) + reduction( sidx[ tid ], sidx[ tid + 512 ], sdata[ tid ], sdata[ tid + 512 ] ); + __syncthreads(); + } + if( blockSize >= 512 ) { + if( tid < 256 ) + reduction( sidx[ tid ], sidx[ tid + 256 ], sdata[ tid ], sdata[ tid + 256 ] ); + __syncthreads(); + } + if( blockSize >= 256 ) { + if( tid < 128 ) + reduction( sidx[ tid ], sidx[ tid + 128 ], sdata[ tid ], sdata[ tid + 128 ] ); + __syncthreads(); + } + if( blockSize >= 128 ) { + if( tid < 64 ) + reduction( sidx[ tid ], sidx[ tid + 64 ], sdata[ tid ], sdata[ tid + 64 ] ); + __syncthreads(); + } + + /*** + * This runs in one warp so it is synchronized implicitly. + */ + if( tid < 32 ) { + volatile ResultType* vsdata = sdata; + volatile IndexType* vsidx = sidx; + if( blockSize >= 64 ) { + volatileReduction( vsidx[ tid ], vsidx[ tid + 32 ], vsdata[ tid ], vsdata[ tid + 32 ] ); + } + // TODO: If blocksize == 32, the following does not work + // We do not check if tid < 16. Fix it!!! + if( blockSize >= 32 ) { + volatileReduction( vsidx[ tid ], vsidx[ tid + 16 ], vsdata[ tid ], vsdata[ tid + 16 ] ); + } + if( blockSize >= 16 ) { + volatileReduction( vsidx[ tid ], vsidx[ tid + 8 ], vsdata[ tid ], vsdata[ tid + 8 ] ); + } + if( blockSize >= 8 ) { + volatileReduction( vsidx[ tid ], vsidx[ tid + 4 ], vsdata[ tid ], vsdata[ tid + 4 ] ); + } + if( blockSize >= 4 ) { + volatileReduction( vsidx[ tid ], vsidx[ tid + 2 ], vsdata[ tid ], vsdata[ tid + 2 ] ); + } + if( blockSize >= 2 ) { + volatileReduction( vsidx[ tid ], vsidx[ tid + 1 ], vsdata[ tid ], vsdata[ tid + 1 ] ); + } } + /*** + * Store the result back in the global memory. + */ + if( tid == 0 ) { + output[ blockIdx.x ] = sdata[ 0 ]; + idxOutput[ blockIdx.x ] = sidx[ 0 ]; + } } + template< typename Index, typename Result > struct CudaReductionKernelLauncher @@ -229,6 +345,28 @@ struct CudaReductionKernelLauncher return this->reducedSize; } + template< typename DataFetcher, + typename Reduction, + typename VolatileReduction > + int startWithArgument( const Reduction& reduction, + const VolatileReduction& volatileReduction, + const DataFetcher& dataFetcher, + const Result& zero, + ResultType*& output, + IndexType*& idxOutput ) + { + //// + // create reference to the reduction buffer singleton and set size + const size_t buf_size = 2 * desGridSize * ( sizeof( ResultType ) + sizeof( IndexType ) ); + CudaReductionBuffer& cudaReductionBuffer = CudaReductionBuffer::getInstance(); + cudaReductionBuffer.setSize( buf_size ); + output = cudaReductionBuffer.template getData< ResultType >(); + idxOutput = static_cast< IndexType* >( static_cast< void* >( &output[ 2 * desGridSize ] ) ); + + this-> reducedSize = this->launchWithArgument( originalSize, reduction, volatileReduction, dataFetcher, zero, output, idxOutput, nullptr ); + return this->reducedSize; + } + template< typename Reduction, typename VolatileReduction > Result finish( const Reduction& reduction, @@ -256,6 +394,38 @@ struct CudaReductionKernelLauncher return result; } + template< typename Reduction, + typename VolatileReduction > + Result finishWithArgument( IndexType& argument, + const Reduction& reduction, + const VolatileReduction& volatileReduction, + const Result& zero ) + { + //// + // Input is the first half of the buffer, output is the second half + //const size_t buf_size = desGridSize * sizeof( ResultType ); + CudaReductionBuffer& cudaReductionBuffer = CudaReductionBuffer::getInstance(); + ResultType* input = cudaReductionBuffer.template getData< ResultType >(); + ResultType* output = &input[ desGridSize ]; + IndexType* idxInput = static_cast< IndexType* >( &output[ desGridSize ] ); + IndexType* idxOutput = &idxInput[ desGridSize ]; + + auto copyFetch = [=] __cuda_callable__ ( IndexType i ) { return input[ i ]; }; + while( this->reducedSize > 1 ) + { + this-> reducedSize = this->launchWithArgument( this->reducedSize, reduction, volatileReduction, copyFetch, zero, output, idxOutput, idxInput ); + std::swap( input, output ); + } + + //// + // Copy result on CPU + ResultType result; + ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory( &result, output, 1 ); + ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory( &argument, idxOutput, 1 ); + return result; + } + + protected: template< typename DataFetcher, typename Reduction, @@ -278,75 +448,167 @@ struct CudaReductionKernelLauncher ? 2 * blockSize.x * sizeof( ResultType ) : blockSize.x * sizeof( ResultType ); - /*** - * Depending on the blockSize we generate appropriate template instance. - */ - switch( blockSize.x ) - { - case 512: - CudaReductionKernel< 512 > - <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, volatileReduction, size, output); - break; - case 256: - cudaFuncSetCacheConfig(CudaReductionKernel< 256, Result, DataFetcher, Reduction, VolatileReduction, Index >, cudaFuncCachePreferShared); - - CudaReductionKernel< 256 > - <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, volatileReduction, size, output); - break; - case 128: - cudaFuncSetCacheConfig(CudaReductionKernel< 128, Result, DataFetcher, Reduction, VolatileReduction, Index >, cudaFuncCachePreferShared); - - CudaReductionKernel< 128 > - <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, volatileReduction, size, output); - break; - case 64: - cudaFuncSetCacheConfig(CudaReductionKernel< 64, Result, DataFetcher, Reduction, VolatileReduction, Index >, cudaFuncCachePreferShared); - - CudaReductionKernel< 64 > - <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, volatileReduction, size, output); - break; - case 32: - cudaFuncSetCacheConfig(CudaReductionKernel< 32, Result, DataFetcher, Reduction, VolatileReduction, Index >, cudaFuncCachePreferShared); - - CudaReductionKernel< 32 > - <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, volatileReduction, size, output); - break; - case 16: - cudaFuncSetCacheConfig(CudaReductionKernel< 16, Result, DataFetcher, Reduction, VolatileReduction, Index >, cudaFuncCachePreferShared); - - CudaReductionKernel< 16 > - <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, volatileReduction, size, output); - break; - case 8: - cudaFuncSetCacheConfig(CudaReductionKernel< 8, Result, DataFetcher, Reduction, VolatileReduction, Index >, cudaFuncCachePreferShared); - - CudaReductionKernel< 8 > - <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, volatileReduction, size, output); - break; - case 4: - cudaFuncSetCacheConfig(CudaReductionKernel< 4, Result, DataFetcher, Reduction, VolatileReduction, Index >, cudaFuncCachePreferShared); - - CudaReductionKernel< 4 > - <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, volatileReduction, size, output); - break; - case 2: - cudaFuncSetCacheConfig(CudaReductionKernel< 2, Result, DataFetcher, Reduction, VolatileReduction, Index >, cudaFuncCachePreferShared); - - CudaReductionKernel< 2 > - <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, volatileReduction, size, output); - break; - case 1: - throw std::logic_error( "blockSize should not be 1." ); - default: - throw std::logic_error( "Block size is " + std::to_string(blockSize.x) + " which is none of 1, 2, 4, 8, 16, 32, 64, 128, 256 or 512." ); - } - TNL_CHECK_CUDA_DEVICE; + ///// + // Depending on the blockSize we generate appropriate template instance. + switch( blockSize.x ) + { + case 512: + CudaReductionKernel< 512 > + <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, volatileReduction, size, output); + break; + case 256: + cudaFuncSetCacheConfig(CudaReductionKernel< 256, Result, DataFetcher, Reduction, VolatileReduction, Index >, cudaFuncCachePreferShared); + + CudaReductionKernel< 256 > + <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, volatileReduction, size, output); + break; + case 128: + cudaFuncSetCacheConfig(CudaReductionKernel< 128, Result, DataFetcher, Reduction, VolatileReduction, Index >, cudaFuncCachePreferShared); + + CudaReductionKernel< 128 > + <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, volatileReduction, size, output); + break; + case 64: + cudaFuncSetCacheConfig(CudaReductionKernel< 64, Result, DataFetcher, Reduction, VolatileReduction, Index >, cudaFuncCachePreferShared); + + CudaReductionKernel< 64 > + <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, volatileReduction, size, output); + break; + case 32: + cudaFuncSetCacheConfig(CudaReductionKernel< 32, Result, DataFetcher, Reduction, VolatileReduction, Index >, cudaFuncCachePreferShared); + + CudaReductionKernel< 32 > + <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, volatileReduction, size, output); + break; + case 16: + cudaFuncSetCacheConfig(CudaReductionKernel< 16, Result, DataFetcher, Reduction, VolatileReduction, Index >, cudaFuncCachePreferShared); + + CudaReductionKernel< 16 > + <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, volatileReduction, size, output); + break; + case 8: + cudaFuncSetCacheConfig(CudaReductionKernel< 8, Result, DataFetcher, Reduction, VolatileReduction, Index >, cudaFuncCachePreferShared); + + CudaReductionKernel< 8 > + <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, volatileReduction, size, output); + break; + case 4: + cudaFuncSetCacheConfig(CudaReductionKernel< 4, Result, DataFetcher, Reduction, VolatileReduction, Index >, cudaFuncCachePreferShared); + + CudaReductionKernel< 4 > + <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, volatileReduction, size, output); + break; + case 2: + cudaFuncSetCacheConfig(CudaReductionKernel< 2, Result, DataFetcher, Reduction, VolatileReduction, Index >, cudaFuncCachePreferShared); + + CudaReductionKernel< 2 > + <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, volatileReduction, size, output); + break; + case 1: + TNL_ASSERT( false, std::cerr << "blockSize should not be 1." << std::endl ); + default: + TNL_ASSERT( false, std::cerr << "Block size is " << blockSize. x << " which is none of 1, 2, 4, 8, 16, 32, 64, 128, 256 or 512." ); + } + TNL_CHECK_CUDA_DEVICE; + + //// + // Return the size of the output array on the CUDA device + return gridSize.x; + } + + template< typename DataFetcher, + typename Reduction, + typename VolatileReduction > + int launchWithArgument( const Index size, + const Reduction& reduction, + const VolatileReduction& volatileReduction, + const DataFetcher& dataFetcher, + const Result& zero, + Result* output, + Index* idxOutput, + const Index* idxInput ) + { + dim3 blockSize, gridSize; + blockSize.x = Reduction_maxThreadsPerBlock; + gridSize.x = TNL::min( Devices::Cuda::getNumberOfBlocks( size, blockSize.x ), desGridSize ); //// - // return the size of the output array on the CUDA device - return gridSize.x; + // when there is only one warp per blockSize.x, we need to allocate two warps + // worth of shared memory so that we don't index shared memory out of bounds + const IndexType shmem = (blockSize.x <= 32) + ? 2 * blockSize.x * ( sizeof( ResultType ) + sizeof( Index ) ) + : blockSize.x * ( sizeof( ResultType ) + sizeof( Index ) ); + + /*** + * Depending on the blockSize we generate appropriate template instance. + */ + switch( blockSize.x ) + { + case 512: + CudaReductionWithArgumentKernel< 512 > + <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, volatileReduction, size, output, idxOutput, idxInput ); + break; + case 256: + cudaFuncSetCacheConfig(CudaReductionWithArgumentKernel< 256, Result, DataFetcher, Reduction, VolatileReduction, Index >, cudaFuncCachePreferShared); + + CudaReductionWithArgumentKernel< 256 > + <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, volatileReduction, size, output, idxOutput, idxInput ); + break; + case 128: + cudaFuncSetCacheConfig(CudaReductionWithArgumentKernel< 128, Result, DataFetcher, Reduction, VolatileReduction, Index >, cudaFuncCachePreferShared); + + CudaReductionWithArgumentKernel< 128 > + <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, volatileReduction, size, output, idxOutput, idxInput ); + break; + case 64: + cudaFuncSetCacheConfig(CudaReductionWithArgumentKernel< 64, Result, DataFetcher, Reduction, VolatileReduction, Index >, cudaFuncCachePreferShared); + + CudaReductionWithArgumentKernel< 64 > + <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, volatileReduction, size, output, idxOutput, idxInput ); + break; + case 32: + cudaFuncSetCacheConfig(CudaReductionWithArgumentKernel< 32, Result, DataFetcher, Reduction, VolatileReduction, Index >, cudaFuncCachePreferShared); + + CudaReductionWithArgumentKernel< 32 > + <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, volatileReduction, size, output, idxOutput, idxInput ); + break; + case 16: + cudaFuncSetCacheConfig(CudaReductionWithArgumentKernel< 16, Result, DataFetcher, Reduction, VolatileReduction, Index >, cudaFuncCachePreferShared); + + CudaReductionWithArgumentKernel< 16 > + <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, volatileReduction, size, output, idxOutput, idxInput ); + break; + case 8: + cudaFuncSetCacheConfig(CudaReductionWithArgumentKernel< 8, Result, DataFetcher, Reduction, VolatileReduction, Index >, cudaFuncCachePreferShared); + + CudaReductionWithArgumentKernel< 8 > + <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, volatileReduction, size, output, idxOutput, idxInput ); + break; + case 4: + cudaFuncSetCacheConfig(CudaReductionWithArgumentKernel< 4, Result, DataFetcher, Reduction, VolatileReduction, Index >, cudaFuncCachePreferShared); + + CudaReductionWithArgumentKernel< 4 > + <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, volatileReduction, size, output, idxOutput, idxInput ); + break; + case 2: + cudaFuncSetCacheConfig(CudaReductionWithArgumentKernel< 2, Result, DataFetcher, Reduction, VolatileReduction, Index >, cudaFuncCachePreferShared); + + CudaReductionWithArgumentKernel< 2 > + <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, volatileReduction, size, output, idxOutput, idxInput ); + break; + case 1: + TNL_ASSERT( false, std::cerr << "blockSize should not be 1." << std::endl ); + default: + TNL_ASSERT( false, std::cerr << "Block size is " << blockSize. x << " which is none of 1, 2, 4, 8, 16, 32, 64, 128, 256 or 512." ); + } + TNL_CHECK_CUDA_DEVICE; + + //// + // return the size of the output array on the CUDA device + return gridSize.x; } + const int activeDevice; const int blocksdPerMultiprocessor; const int desGridSize; diff --git a/src/TNL/Containers/Algorithms/Reduction.h b/src/TNL/Containers/Algorithms/Reduction.h index 7ac0d4c02..8c7af3da9 100644 --- a/src/TNL/Containers/Algorithms/Reduction.h +++ b/src/TNL/Containers/Algorithms/Reduction.h @@ -28,52 +28,92 @@ class Reduction template<> class Reduction< Devices::Cuda > { -public: - template< typename Index, - typename Result, - typename ReductionOperation, - typename VolatileReductionOperation, - typename DataFetcher > - static Result - reduce( const Index size, - ReductionOperation& reduction, - VolatileReductionOperation& volatileReduction, - DataFetcher& dataFetcher, - const Result& zero ); + public: + template< typename Index, + typename Result, + typename ReductionOperation, + typename VolatileReductionOperation, + typename DataFetcher > + static Result + reduce( const Index size, + ReductionOperation& reduction, + VolatileReductionOperation& volatileReduction, + DataFetcher& dataFetcher, + const Result& zero ); + + template< typename Index, + typename Result, + typename ReductionOperation, + typename VolatileReductionOperation, + typename DataFetcher > + static Result + reduceWithArgument( const Index size, + Index& argument, + ReductionOperation& reduction, + VolatileReductionOperation& volatileReduction, + DataFetcher& dataFetcher, + const Result& zero ); }; template<> class Reduction< Devices::Host > { -public: - template< typename Index, - typename Result, - typename ReductionOperation, - typename VolatileReductionOperation, - typename DataFetcher > - static Result - reduce( const Index size, - ReductionOperation& reduction, - VolatileReductionOperation& volatileReduction, - DataFetcher& dataFetcher, - const Result& zero ); + public: + template< typename Index, + typename Result, + typename ReductionOperation, + typename VolatileReductionOperation, + typename DataFetcher > + static Result + reduce( const Index size, + ReductionOperation& reduction, + VolatileReductionOperation& volatileReduction, + DataFetcher& dataFetcher, + const Result& zero ); + + template< typename Index, + typename Result, + typename ReductionOperation, + typename VolatileReductionOperation, + typename DataFetcher > + static Result + reduceWithArgument( const Index size, + Index& argument, + ReductionOperation& reduction, + VolatileReductionOperation& volatileReduction, + DataFetcher& dataFetcher, + const Result& zero ); }; template<> class Reduction< Devices::MIC > { -public: - template< typename Index, - typename Result, - typename ReductionOperation, - typename VolatileReductionOperation, - typename DataFetcher > - static Result - reduce( const Index size, - ReductionOperation& reduction, - VolatileReductionOperation& volatileReduction, - DataFetcher& dataFetcher, - const Result& zero ); + public: + template< typename Index, + typename Result, + typename ReductionOperation, + typename VolatileReductionOperation, + typename DataFetcher > + static Result + reduce( const Index size, + ReductionOperation& reduction, + VolatileReductionOperation& volatileReduction, + DataFetcher& dataFetcher, + const Result& zero ); + + template< typename Index, + typename Result, + typename ReductionOperation, + typename VolatileReductionOperation, + typename DataFetcher > + static Result + reduceWithArgument( const Index size, + Index& argument, + ReductionOperation& reduction, + VolatileReductionOperation& volatileReduction, + DataFetcher& dataFetcher, + const Result& zero ); + }; } // namespace Algorithms diff --git a/src/TNL/Containers/Algorithms/Reduction.hpp b/src/TNL/Containers/Algorithms/Reduction.hpp index 31c93504c..7647ff94b 100644 --- a/src/TNL/Containers/Algorithms/Reduction.hpp +++ b/src/TNL/Containers/Algorithms/Reduction.hpp @@ -65,26 +65,6 @@ Reduction< Devices::Cuda >:: //constexpr bool can_reduce_all_on_host = std::is_fundamental< DataType1 >::value || std::is_fundamental< DataType2 >::value || std::is_pointer< DataType1 >::value || std::is_pointer< DataType2 >::value; constexpr bool can_reduce_later_on_host = std::is_fundamental< ResultType >::value || std::is_pointer< ResultType >::value; - /*** - * First check if the input array(s) is/are large enough for the reduction on GPU. - * Otherwise copy it/them to host and reduce on CPU. - */ - // With lambda function we cannot reduce data on host - we do not know the data here. - /*if( can_reduce_all_on_host && size <= Reduction_minGpuDataSize ) - { - typename std::remove_const< DataType1 >::type hostArray1[ Reduction_minGpuDataSize ]; - ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory( hostArray1, deviceInput1, size ); - if( deviceInput2 ) { - using _DT2 = typename std::conditional< std::is_same< DataType2, void >::value, DataType1, DataType2 >::type; - typename std::remove_const< _DT2 >::type hostArray2[ Reduction_minGpuDataSize ]; - ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory( hostArray2, (_DT2*) deviceInput2, size ); - return Reduction< Devices::Host >::reduce( zero, dataFetcher, reduction, size, hostArray1, hostArray2 ); - } - else { - return Reduction< Devices::Host >::reduce( operation, size, hostArray1, (DataType2*) nullptr ); - } - }*/ - #ifdef CUDA_REDUCTION_PROFILING Timer timer; timer.reset(); @@ -149,22 +129,115 @@ Reduction< Devices::Cuda >:: timer.start(); #endif - //ResultType resultArray[ 1 ]; - //ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory( resultArray, deviceAux1, reducedSize ); - //const ResultType result = resultArray[ 0 ]; + return result; + } +#else + throw Exceptions::CudaSupportMissing(); +#endif +}; + +template< typename Index, + typename Result, + typename ReductionOperation, + typename VolatileReductionOperation, + typename DataFetcher > +Result +Reduction< Devices::Cuda >:: +reduceWithArgument( const Index size, + Index& argument, + ReductionOperation& reduction, + VolatileReductionOperation& volatileReduction, + DataFetcher& dataFetcher, + const Result& zero ) +{ + #ifdef HAVE_CUDA + + using IndexType = Index; + using ResultType = Result; + + /*** + * Only fundamental and pointer types can be safely reduced on host. Complex + * objects stored on the device might contain pointers into the device memory, + * in which case reduction on host might fail. + */ + //constexpr bool can_reduce_all_on_host = std::is_fundamental< DataType1 >::value || std::is_fundamental< DataType2 >::value || std::is_pointer< DataType1 >::value || std::is_pointer< DataType2 >::value; + constexpr bool can_reduce_later_on_host = std::is_fundamental< ResultType >::value || std::is_pointer< ResultType >::value; + + #ifdef CUDA_REDUCTION_PROFILING + Timer timer; + timer.reset(); + timer.start(); + #endif + + CudaReductionKernelLauncher< IndexType, ResultType > reductionLauncher( size ); + + /**** + * Reduce the data on the CUDA device. + */ + ResultType* deviceAux1( nullptr ); + IndexType* deviceIndexes( nullptr ); + IndexType reducedSize = reductionLauncher.startWithArgument( + reduction, + volatileReduction, + dataFetcher, + zero, + deviceAux1, + deviceIndexes ); + #ifdef CUDA_REDUCTION_PROFILING + timer.stop(); + std::cout << " Reduction on GPU to size " << reducedSize << " took " << timer.getRealTime() << " sec. " << std::endl; + timer.reset(); + timer.start(); + #endif + + if( can_reduce_later_on_host ) { + /*** + * Transfer the reduced data from device to host. + */ + std::unique_ptr< ResultType[] > resultArray{ new ResultType[ reducedSize ] }; + ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory( resultArray.get(), deviceAux1, reducedSize ); + + #ifdef CUDA_REDUCTION_PROFILING + timer.stop(); + std::cout << " Transferring data to CPU took " << timer.getRealTime() << " sec. " << std::endl; + timer.reset(); + timer.start(); + #endif + + /*** + * Reduce the data on the host system. + */ + auto fetch = [&] ( IndexType i ) { return resultArray[ i ]; }; + const ResultType result = Reduction< Devices::Host >::reduceWithArgument( reducedSize, reduction, volatileReduction, fetch, zero ); + + #ifdef CUDA_REDUCTION_PROFILING + timer.stop(); + std::cout << " Reduction of small data set on CPU took " << timer.getRealTime() << " sec. " << std::endl; + #endif + return result; + } + else { + /*** + * Data can't be safely reduced on host, so continue with the reduction on the CUDA device. + */ + auto result = reductionLauncher.finishWithArgument( argument, reduction, volatileReduction, zero ); - /*#ifdef CUDA_REDUCTION_PROFILING + #ifdef CUDA_REDUCTION_PROFILING timer.stop(); - std::cout << " Transferring the result to CPU took " << timer.getRealTime() << " sec. " << std::endl; - #endif*/ + std::cout << " Reduction of small data set on GPU took " << timer.getRealTime() << " sec. " << std::endl; + timer.reset(); + timer.start(); + #endif return result; } #else throw Exceptions::CudaSupportMissing(); #endif -}; +} +//// +// Reduction on host template< typename Index, typename Result, typename ReductionOperation, @@ -263,6 +336,139 @@ Reduction< Devices::Host >:: #endif } +template< typename Index, + typename Result, + typename ReductionOperation, + typename VolatileReductionOperation, + typename DataFetcher > +Result +Reduction< Devices::Host >:: +reduceWithArgument( const Index size, + Index& argument, + ReductionOperation& reduction, + VolatileReductionOperation& volatileReduction, + DataFetcher& dataFetcher, + const Result& zero ) +{ + using IndexType = Index; + using ResultType = Result; + + constexpr int block_size = 128; + const int blocks = size / block_size; + +#ifdef HAVE_OPENMP + if( TNL::Devices::Host::isOMPEnabled() && size >= 2 * block_size ) { + // global result variable + ResultType result = zero; + argument = -1; +#pragma omp parallel + { + // initialize array for thread-local results + ResultType r[ 4 ] = { zero, zero, zero, zero }; + IndexType arg[ 4 ] = { 0, 0, 0, 0 }; + bool initialised( false ); + + #pragma omp for nowait + for( int b = 0; b < blocks; b++ ) { + const IndexType offset = b * block_size; + for( int i = 0; i < block_size; i += 4 ) { + if( ! initialised ) { + arg[ 0 ] = offset + i; + arg[ 1 ] = offset + i + 1; + arg[ 2 ] = offset + i + 2; + arg[ 3 ] = offset + i + 3; + r[ 0 ] = dataFetcher( offset + i ); + r[ 1 ] = dataFetcher( offset + i + 1 ); + r[ 2 ] = dataFetcher( offset + i + 2 ); + r[ 3 ] = dataFetcher( offset + i + 3 ); + initialised = true; + continue; + } + reduction( arg[ 0 ], offset + i, r[ 0 ], dataFetcher( offset + i ) ); + reduction( arg[ 1 ], offset + i + 1, r[ 1 ], dataFetcher( offset + i + 1 ) ); + reduction( arg[ 2 ], offset + i + 2, r[ 2 ], dataFetcher( offset + i + 2 ) ); + reduction( arg[ 3 ], offset + i + 3, r[ 3 ], dataFetcher( offset + i + 3 ) ); + } + } + + // the first thread that reaches here processes the last, incomplete block + #pragma omp single nowait + { + for( IndexType i = blocks * block_size; i < size; i++ ) + reduction( arg[ 0 ], i, r[ 0 ], dataFetcher( i ) ); + } + + // local reduction of unrolled results + reduction( arg[ 0 ], arg[ 2 ], r[ 0 ], r[ 2 ] ); + reduction( arg[ 1 ], arg[ 3 ], r[ 1 ], r[ 3 ] ); + reduction( arg[ 0 ], arg[ 1 ], r[ 0 ], r[ 1 ] ); + + // inter-thread reduction of local results + #pragma omp critical + { + if( argument == - 1 ) + argument = arg[ 0 ]; + reduction( argument, arg[ 0 ], result, r[ 0 ] ); + } + } + return result; + } + else { +#endif + if( blocks > 1 ) { + // initialize array for unrolled results + ResultType r[ 4 ] = { zero, zero, zero, zero }; + IndexType arg[ 4 ] = { 0, 0, 0, 0 }; + bool initialised( false ); + + // main reduction (explicitly unrolled loop) + for( int b = 0; b < blocks; b++ ) { + const IndexType offset = b * block_size; + for( int i = 0; i < block_size; i += 4 ) { + if( ! initialised ) + { + arg[ 0 ] = offset + i; + arg[ 1 ] = offset + i + 1; + arg[ 2 ] = offset + i + 2; + arg[ 3 ] = offset + i + 3; + r[ 0 ] = dataFetcher( offset + i ); + r[ 1 ] = dataFetcher( offset + i + 1 ); + r[ 2 ] = dataFetcher( offset + i + 2 ); + r[ 3 ] = dataFetcher( offset + i + 3 ); + initialised = true; + continue; + } + reduction( arg[ 0 ], offset + i, r[ 0 ], dataFetcher( offset + i ) ); + reduction( arg[ 1 ], offset + i + 1, r[ 1 ], dataFetcher( offset + i + 1 ) ); + reduction( arg[ 2 ], offset + i + 2, r[ 2 ], dataFetcher( offset + i + 2 ) ); + reduction( arg[ 3 ], offset + i + 3, r[ 3 ], dataFetcher( offset + i + 3 ) ); + } + } + + // reduction of the last, incomplete block (not unrolled) + for( IndexType i = blocks * block_size; i < size; i++ ) + reduction( arg[ 0 ], i, r[ 0 ], dataFetcher( i ) ); + + // reduction of unrolled results + reduction( arg[ 0 ], arg[ 2 ], r[ 0 ], r[ 2 ] ); + reduction( arg[ 1 ], arg[ 3 ], r[ 1 ], r[ 3 ] ); + reduction( arg[ 0 ], arg[ 1 ], r[ 0 ], r[ 1 ] ); + argument = arg[ 0 ]; + return r[ 0 ]; + } + else { + ResultType result = dataFetcher( 0 ); + argument = 0; + for( IndexType i = 1; i < size; i++ ) + reduction( argument, i, result, dataFetcher( i ) ); + return result; + } +#ifdef HAVE_OPENMP + } +#endif +} + + } // namespace Algorithms } // namespace Containers } // namespace TNL diff --git a/src/TNL/Containers/Expressions/VerticalOperations.h b/src/TNL/Containers/Expressions/VerticalOperations.h index e7549e90a..94c9781ba 100644 --- a/src/TNL/Containers/Expressions/VerticalOperations.h +++ b/src/TNL/Containers/Expressions/VerticalOperations.h @@ -31,6 +31,23 @@ auto StaticExpressionMin( const Expression& expression ) -> typename std::remove return aux; } +template< typename Expression, typename Real > +__cuda_callable__ +auto StaticExpressionArgMin( const Expression& expression, int& arg ) -> typename std::remove_reference< decltype( expression[ 0 ] ) >::type +{ + auto value = expression[ 0 ]; + arg = 0; + for( int i = 1; i < expression.getSize(); i++ ) + { + if( expression[ i ] < value ) + { + value = expression[ i ]; + arg = i; + } + } + return value; +} + template< typename Expression > __cuda_callable__ auto StaticExpressionMax( const Expression& expression ) -> typename std::remove_reference< decltype( expression[ 0 ] ) >::type @@ -41,6 +58,23 @@ auto StaticExpressionMax( const Expression& expression ) -> typename std::remove return aux; } +template< typename Expression, typename Real > +__cuda_callable__ +auto StaticExpressionArgMax( const Expression& expression, int& arg ) -> typename std::remove_reference< decltype( expression[ 0 ] ) >::type +{ + auto value = expression[ 0 ]; + arg = 0; + for( int i = 1; i < expression.getSize(); i++ ) + { + if( expression[ i ] > value ) + { + value = expression[ i ]; + arg = i; + } + } + return value; +} + template< typename Expression > __cuda_callable__ auto StaticExpressionSum( const Expression& expression ) -> typename std::remove_reference< decltype( expression[ 0 ] ) >::type @@ -140,6 +174,28 @@ auto ExpressionMin( const Expression& expression ) -> typename std::remove_refer return Algorithms::Reduction< typename Expression::DeviceType >::reduce( expression.getSize(), reduction, volatileReduction, fetch, std::numeric_limits< ResultType >::max() ); } +template< typename Expression > +auto ExpressionArgMin( const Expression& expression, typename Expression::IndexType& arg ) -> typename std::remove_reference< decltype( expression[ 0 ] ) >::type +{ + using ResultType = typename std::remove_cv< typename std::remove_reference< decltype( expression[ 0 ] ) >::type >::type; + using IndexType = typename Expression::IndexType; + + auto fetch = [=] __cuda_callable__ ( IndexType i ) { return expression[ i ]; }; + auto reduction = [=] __cuda_callable__ ( IndexType& aIdx, const IndexType& bIdx, ResultType& a, const ResultType& b ) { + if( a < b ) { + a = b; + aIdx = bIdx; + } + }; + auto volatileReduction = [=] __cuda_callable__ ( volatile IndexType& aIdx, volatile IndexType& bIdx, volatile ResultType& a, volatile ResultType& b ) { + if( a < b ) { + a = b; + aIdx = bIdx; + } + }; + return Algorithms::Reduction< typename Expression::DeviceType >::reduceWithArgument( expression.getSize(), arg, reduction, volatileReduction, fetch, std::numeric_limits< ResultType >::max() ); +} + template< typename Expression > auto ExpressionMax( const Expression& expression ) -> typename std::remove_reference< decltype( expression[ 0 ] ) >::type { @@ -152,6 +208,32 @@ auto ExpressionMax( const Expression& expression ) -> typename std::remove_refer return Algorithms::Reduction< typename Expression::DeviceType >::reduce( expression.getSize(), reduction, volatileReduction, fetch, std::numeric_limits< ResultType >::min() ); } +template< typename Expression > +auto ExpressionArgMax( const Expression& expression, typename Expression::IndexType& arg ) -> typename std::remove_reference< decltype( expression[ 0 ] ) >::type +{ + using ResultType = typename std::remove_cv< typename std::remove_reference< decltype( expression[ 0 ] ) >::type >::type; + using IndexType = typename Expression::IndexType; + + auto fetch = [=] __cuda_callable__ ( IndexType i ) { return expression[ i ]; }; + auto reduction = [=] __cuda_callable__ ( IndexType& aIdx, const IndexType& bIdx, ResultType& a, const ResultType& b ) { + if( a > b ) { + a = b; + aIdx = bIdx; + } + else if( a == b && bIdx < aIdx ) + aIdx = bIdx; + }; + auto volatileReduction = [=] __cuda_callable__ ( volatile IndexType& aIdx, volatile IndexType& bIdx, volatile ResultType& a, volatile ResultType& b ) { + if( a > b ) { + a = b; + aIdx = bIdx; + } + else if( a == b && bIdx < aIdx ) + aIdx = bIdx; + }; + return Algorithms::Reduction< typename Expression::DeviceType >::reduceWithArgument( expression.getSize(), arg, reduction, volatileReduction, fetch, std::numeric_limits< ResultType >::min() ); +} + template< typename Expression > auto ExpressionSum( const Expression& expression ) -> typename std::remove_reference< decltype( expression[ 0 ] ) >::type { diff --git a/src/TNL/Containers/StaticVectorExpressions.h b/src/TNL/Containers/StaticVectorExpressions.h index 9b6b78d64..e5c5a4900 100644 --- a/src/TNL/Containers/StaticVectorExpressions.h +++ b/src/TNL/Containers/StaticVectorExpressions.h @@ -524,6 +524,14 @@ min( const Containers::StaticVector< Size, Real >& a ) return Containers::Expressions::StaticExpressionMin( a ); } +template< int Size, typename Real > +__cuda_callable__ +typename Containers::StaticVector< Size, Real >::RealType +argMin( const Containers::StaticVector< Size, Real >& a, int& arg ) +{ + return Containers::Expressions::StaticExpressionArgMin( a, arg ); +} + template< int Size, typename Real > __cuda_callable__ typename Containers::StaticVector< Size, Real >::RealType @@ -532,6 +540,14 @@ max( const Containers::StaticVector< Size, Real >& a ) return Containers::Expressions::StaticExpressionMax( a ); } +template< int Size, typename Real > +__cuda_callable__ +typename Containers::StaticVector< Size, Real >::RealType +argMax( const Containers::StaticVector< Size, Real >& a, int& arg ) +{ + return Containers::Expressions::StaticExpressionArgMax( a, arg ); +} + template< int Size, typename Real > __cuda_callable__ typename Containers::StaticVector< Size, Real >::RealType diff --git a/src/TNL/Containers/VectorViewExpressions.h b/src/TNL/Containers/VectorViewExpressions.h index b57089fd7..e1aaeaac6 100644 --- a/src/TNL/Containers/VectorViewExpressions.h +++ b/src/TNL/Containers/VectorViewExpressions.h @@ -470,6 +470,15 @@ min( const Containers::VectorView< Real, Device, Index >& a ) return Containers::Expressions::ExpressionMin( a ); } +template< typename Real, + typename Device, + typename Index > +typename Containers::VectorView< Real, Device, Index >::RealType +argMin( const Containers::VectorView< Real, Device, Index >& a, Index& arg ) +{ + return Containers::Expressions::ExpressionArgMin( a, arg ); +} + template< typename Real, typename Device, typename Index > @@ -479,6 +488,15 @@ max( const Containers::VectorView< Real, Device, Index >& a ) return Containers::Expressions::ExpressionMax( a ); } +template< typename Real, + typename Device, + typename Index > +typename Containers::VectorView< Real, Device, Index >::RealType +argMax( const Containers::VectorView< Real, Device, Index >& a, Index& arg ) +{ + return Containers::Expressions::ExpressionArgMax( a, arg ); +} + template< typename Real, typename Device, typename Index > diff --git a/src/UnitTests/Containers/VectorTest-7.h b/src/UnitTests/Containers/VectorTest-7.h index 4843383d7..4ed937d4a 100644 --- a/src/UnitTests/Containers/VectorTest-7.h +++ b/src/UnitTests/Containers/VectorTest-7.h @@ -36,19 +36,23 @@ TYPED_TEST( VectorTest, verticalOperations ) using VectorType = typename TestFixture::VectorType; using ViewType = typename TestFixture::ViewType; using RealType = typename VectorType::RealType; + using IndexType = typename VectorType::IndexType; const int size = VECTOR_TEST_SIZE; - VectorType _u( size ), _v( size ); - ViewType u( _u ), v( _v ); + VectorType _u( size ), _v( size ), _w( size ); + ViewType u( _u ), v( _v ), w( _w ); RealType sum_( 0.0 ), absSum( 0.0 ), diffSum( 0.0 ), diffAbsSum( 0.0 ), absMin( size + 10.0 ), absMax( -size - 10.0 ), diffMin( 2 * size + 10.0 ), diffMax( - 2.0 * size - 10.0 ), - l2Norm( 0.0 ), l2NormDiff( 0.0 ); + l2Norm( 0.0 ), l2NormDiff( 0.0 ), argMinValue( size * size ), argMaxValue( -size * size ); + IndexType argMin( 0 ), argMax( 0 ); for( int i = 0; i < size; i++ ) { const RealType aux = ( RealType )( i - size / 2 ) / ( RealType ) size; + const RealType w_value = aux * aux - 5.0; u.setElement( i, aux ); v.setElement( i, -aux ); + w.setElement( i, w_value ); absMin = TNL::min( absMin, TNL::abs( aux ) ); absMax = TNL::max( absMax, TNL::abs( aux ) ); diffMin = TNL::min( diffMin, 2 * aux ); @@ -59,6 +63,14 @@ TYPED_TEST( VectorTest, verticalOperations ) diffAbsSum += TNL::abs( 2.0* aux ); l2Norm += aux * aux; l2NormDiff += 4.0 * aux * aux; + if( w_value < argMinValue ) { + argMinValue = w_value; + argMin = i; + } + if( w_value > argMaxValue ) { + argMaxValue = w_value; + argMax = i; + } } l2Norm = TNL::sqrt( l2Norm ); l2NormDiff = TNL::sqrt( l2NormDiff ); @@ -74,6 +86,10 @@ TYPED_TEST( VectorTest, verticalOperations ) EXPECT_NEAR( sum( abs( u - v ) ), diffAbsSum, 2.0e-5 ); EXPECT_NEAR( lpNorm( u, 2.0 ), l2Norm, 2.0e-5 ); EXPECT_NEAR( lpNorm( u - v, 2.0 ), l2NormDiff, 2.0e-5 ); + IndexType wArgMin, wArgMax; + EXPECT_NEAR( TNL::argMin( w, wArgMin ), argMinValue, 2.0e-5 ); + EXPECT_NEAR( TNL::argMax( w, wArgMax ), argMaxValue, 2.0e-5 ); + EXPECT_EQ( argMax, wArgMax ); } TYPED_TEST( VectorTest, scalarProduct ) -- GitLab From 9852ca1c5f6e15f54b86396f6abafa13fc4f0416 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Thu, 9 May 2019 15:27:54 +0200 Subject: [PATCH 39/93] Fixed reduction with argument. --- .../Algorithms/CudaReductionKernel.h | 22 +++++++++++++------ src/TNL/Containers/Algorithms/Reduction.hpp | 15 ++++++++----- .../Expressions/VerticalOperations.h | 22 ++++++++++++------- src/UnitTests/Containers/VectorTest-7.h | 1 + 4 files changed, 40 insertions(+), 20 deletions(-) diff --git a/src/TNL/Containers/Algorithms/CudaReductionKernel.h b/src/TNL/Containers/Algorithms/CudaReductionKernel.h index 70c3fbab0..ea3bbb6cd 100644 --- a/src/TNL/Containers/Algorithms/CudaReductionKernel.h +++ b/src/TNL/Containers/Algorithms/CudaReductionKernel.h @@ -188,9 +188,13 @@ CudaReductionWithArgumentKernel( const Result zero, * sequential reduction. */ if( idxInput ) { - sdata[ tid ] = dataFetcher( gid ); - sidx[ tid ] = idxInput[ gid ]; - gid += gridSize; + if( gid < size ) { + sdata[ tid ] = dataFetcher( gid ); + sidx[ tid ] = idxInput[ gid ]; + gid += gridSize; + } else { + sdata[ tid ] = zero; + } while( gid + 4 * gridSize < size ) { reduction( sidx[ tid ], idxInput[ gid ], sdata[ tid ], dataFetcher( gid ) ); reduction( sidx[ tid ], idxInput[ gid + gridSize ], sdata[ tid ], dataFetcher( gid + gridSize ) ); @@ -209,9 +213,13 @@ CudaReductionWithArgumentKernel( const Result zero, } } else { - sdata[ tid ] = dataFetcher( gid ); - sidx[ tid ] = gid; - gid += gridSize; + if( gid < size ) { + sdata[ tid ] = dataFetcher( gid ); + sidx[ tid ] = gid; + gid += gridSize; + } else { + sdata[ tid ] = zero; + } while( gid + 4 * gridSize < size ) { reduction( sidx[ tid ], gid, sdata[ tid ], dataFetcher( gid ) ); reduction( sidx[ tid ], gid + gridSize, sdata[ tid ], dataFetcher( gid + gridSize ) ); @@ -407,7 +415,7 @@ struct CudaReductionKernelLauncher CudaReductionBuffer& cudaReductionBuffer = CudaReductionBuffer::getInstance(); ResultType* input = cudaReductionBuffer.template getData< ResultType >(); ResultType* output = &input[ desGridSize ]; - IndexType* idxInput = static_cast< IndexType* >( &output[ desGridSize ] ); + IndexType* idxInput = static_cast< IndexType* >( static_cast< void* >( &output[ desGridSize ] ) ); IndexType* idxOutput = &idxInput[ desGridSize ]; auto copyFetch = [=] __cuda_callable__ ( IndexType i ) { return input[ i ]; }; diff --git a/src/TNL/Containers/Algorithms/Reduction.hpp b/src/TNL/Containers/Algorithms/Reduction.hpp index 7647ff94b..82a08d100 100644 --- a/src/TNL/Containers/Algorithms/Reduction.hpp +++ b/src/TNL/Containers/Algorithms/Reduction.hpp @@ -77,7 +77,7 @@ Reduction< Devices::Cuda >:: * Reduce the data on the CUDA device. */ ResultType* deviceAux1( 0 ); - IndexType reducedSize = reductionLauncher.start( + IndexType reducedSize = reductionLauncher.start( reduction, volatileReduction, dataFetcher, @@ -176,7 +176,7 @@ reduceWithArgument( const Index size, */ ResultType* deviceAux1( nullptr ); IndexType* deviceIndexes( nullptr ); - IndexType reducedSize = reductionLauncher.startWithArgument( + IndexType reducedSize = reductionLauncher.startWithArgument( reduction, volatileReduction, dataFetcher, @@ -195,7 +195,9 @@ reduceWithArgument( const Index size, * Transfer the reduced data from device to host. */ std::unique_ptr< ResultType[] > resultArray{ new ResultType[ reducedSize ] }; + std::unique_ptr< IndexType[] > indexArray{ new IndexType[ reducedSize ] }; ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory( resultArray.get(), deviceAux1, reducedSize ); + ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory( indexArray.get(), deviceIndexes, reducedSize ); #ifdef CUDA_REDUCTION_PROFILING timer.stop(); @@ -207,14 +209,17 @@ reduceWithArgument( const Index size, /*** * Reduce the data on the host system. */ - auto fetch = [&] ( IndexType i ) { return resultArray[ i ]; }; - const ResultType result = Reduction< Devices::Host >::reduceWithArgument( reducedSize, reduction, volatileReduction, fetch, zero ); + //auto fetch = [&] ( IndexType i ) { return resultArray[ i ]; }; + //const ResultType result = Reduction< Devices::Host >::reduceWithArgument( reducedSize, argument, reduction, volatileReduction, fetch, zero ); + for( IndexType i = 1; i < reducedSize; i++ ) + reduction( indexArray[ 0 ], indexArray[ i ], resultArray[ 0 ], resultArray[ i ] ); #ifdef CUDA_REDUCTION_PROFILING timer.stop(); std::cout << " Reduction of small data set on CPU took " << timer.getRealTime() << " sec. " << std::endl; #endif - return result; + argument = indexArray[ 0 ]; + return resultArray[ 0 ]; } else { /*** diff --git a/src/TNL/Containers/Expressions/VerticalOperations.h b/src/TNL/Containers/Expressions/VerticalOperations.h index 94c9781ba..6aff760f9 100644 --- a/src/TNL/Containers/Expressions/VerticalOperations.h +++ b/src/TNL/Containers/Expressions/VerticalOperations.h @@ -182,16 +182,22 @@ auto ExpressionArgMin( const Expression& expression, typename Expression::IndexT auto fetch = [=] __cuda_callable__ ( IndexType i ) { return expression[ i ]; }; auto reduction = [=] __cuda_callable__ ( IndexType& aIdx, const IndexType& bIdx, ResultType& a, const ResultType& b ) { - if( a < b ) { + if( a > b ) { a = b; aIdx = bIdx; } + else if( a == b && bIdx < aIdx ) + aIdx = bIdx; + }; - auto volatileReduction = [=] __cuda_callable__ ( volatile IndexType& aIdx, volatile IndexType& bIdx, volatile ResultType& a, volatile ResultType& b ) { - if( a < b ) { + auto volatileReduction = [=] __cuda_callable__ ( volatile IndexType& aIdx, volatile IndexType& bIdx, volatile ResultType& a, volatile ResultType& b ) { + if( a > b ) { a = b; aIdx = bIdx; } + else if( a == b && bIdx < aIdx ) + aIdx = bIdx; + }; return Algorithms::Reduction< typename Expression::DeviceType >::reduceWithArgument( expression.getSize(), arg, reduction, volatileReduction, fetch, std::numeric_limits< ResultType >::max() ); } @@ -205,7 +211,7 @@ auto ExpressionMax( const Expression& expression ) -> typename std::remove_refer auto fetch = [=] __cuda_callable__ ( IndexType i ) { return expression[ i ]; }; auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a = a > b ? a : b; }; auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a = a > b ? a : b; }; - return Algorithms::Reduction< typename Expression::DeviceType >::reduce( expression.getSize(), reduction, volatileReduction, fetch, std::numeric_limits< ResultType >::min() ); + return Algorithms::Reduction< typename Expression::DeviceType >::reduce( expression.getSize(), reduction, volatileReduction, fetch, std::numeric_limits< ResultType >::lowest() ); } template< typename Expression > @@ -216,22 +222,22 @@ auto ExpressionArgMax( const Expression& expression, typename Expression::IndexT auto fetch = [=] __cuda_callable__ ( IndexType i ) { return expression[ i ]; }; auto reduction = [=] __cuda_callable__ ( IndexType& aIdx, const IndexType& bIdx, ResultType& a, const ResultType& b ) { - if( a > b ) { + if( a < b ) { a = b; aIdx = bIdx; } else if( a == b && bIdx < aIdx ) aIdx = bIdx; }; - auto volatileReduction = [=] __cuda_callable__ ( volatile IndexType& aIdx, volatile IndexType& bIdx, volatile ResultType& a, volatile ResultType& b ) { - if( a > b ) { + auto volatileReduction = [=] __cuda_callable__ ( volatile IndexType& aIdx, volatile IndexType& bIdx, volatile ResultType& a, volatile ResultType& b ) { + if( a < b ) { a = b; aIdx = bIdx; } else if( a == b && bIdx < aIdx ) aIdx = bIdx; }; - return Algorithms::Reduction< typename Expression::DeviceType >::reduceWithArgument( expression.getSize(), arg, reduction, volatileReduction, fetch, std::numeric_limits< ResultType >::min() ); + return Algorithms::Reduction< typename Expression::DeviceType >::reduceWithArgument( expression.getSize(), arg, reduction, volatileReduction, fetch, std::numeric_limits< ResultType >::lowest() ); } template< typename Expression > diff --git a/src/UnitTests/Containers/VectorTest-7.h b/src/UnitTests/Containers/VectorTest-7.h index 4ed937d4a..842df79af 100644 --- a/src/UnitTests/Containers/VectorTest-7.h +++ b/src/UnitTests/Containers/VectorTest-7.h @@ -88,6 +88,7 @@ TYPED_TEST( VectorTest, verticalOperations ) EXPECT_NEAR( lpNorm( u - v, 2.0 ), l2NormDiff, 2.0e-5 ); IndexType wArgMin, wArgMax; EXPECT_NEAR( TNL::argMin( w, wArgMin ), argMinValue, 2.0e-5 ); + EXPECT_EQ( argMin, wArgMin ); EXPECT_NEAR( TNL::argMax( w, wArgMax ), argMaxValue, 2.0e-5 ); EXPECT_EQ( argMax, wArgMax ); } -- GitLab From aea06f0630b58e3cf705288d0491eee47be99b78 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Fri, 10 May 2019 07:47:43 +0200 Subject: [PATCH 40/93] Refactoring prefix-sum. --- src/TNL/Containers/Algorithms/PrefixSum.h | 107 +++++ src/TNL/Containers/Algorithms/PrefixSum.hpp | 175 +++++++++ src/TNL/Containers/Algorithms/Reduction.h | 6 +- src/TNL/Containers/Algorithms/Reduction.hpp | 408 ++++++++++---------- 4 files changed, 488 insertions(+), 208 deletions(-) create mode 100644 src/TNL/Containers/Algorithms/PrefixSum.h create mode 100644 src/TNL/Containers/Algorithms/PrefixSum.hpp diff --git a/src/TNL/Containers/Algorithms/PrefixSum.h b/src/TNL/Containers/Algorithms/PrefixSum.h new file mode 100644 index 000000000..1d57da974 --- /dev/null +++ b/src/TNL/Containers/Algorithms/PrefixSum.h @@ -0,0 +1,107 @@ +/*************************************************************************** + PrefixSum.h - description + ------------------- + begin : May 9, 2019 + copyright : (C) 2019 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +// Implemented by: Tomas Oberhuber, Jakub Klinkovsky + +#pragma once + +#include +#include +#include + +namespace TNL { +namespace Containers { +namespace Algorithms { + +template< typename Device > +class PrefixSum +{ +}; + +template<> +class PrefixSum< Devices::Host > +{ + public: + template< typename Index, + typename Result, + typename PrefixSumOperation, + typename VolatilePrefixSumOperation > + static Result + inclusive( const Index size, + PrefixSumOperation& reduction, + VolatilePrefixSumOperation& volatilePrefixSum, + const Result& zero ); + + template< typename Index, + typename Result, + typename PrefixSumOperation, + typename VolatilePrefixSumOperation > + static Result + exclusive( const Index size, + PrefixSumOperation& reduction, + VolatilePrefixSumOperation& volatilePrefixSum, + const Result& zero ); +}; + +template<> +class PrefixSum< Devices::Cuda > +{ + public: + template< typename Index, + typename Result, + typename PrefixSumOperation, + typename VolatilePrefixSumOperation > + static Result + inclusive( const Index size, + PrefixSumOperation& reduction, + VolatilePrefixSumOperation& volatilePrefixSum, + const Result& zero ); + + template< typename Index, + typename Result, + typename PrefixSumOperation, + typename VolatilePrefixSumOperation > + static Result + exclusive( const Index size, + PrefixSumOperation& reduction, + VolatilePrefixSumOperation& volatilePrefixSum, + const Result& zero ); +}; + +template<> +class PrefixSum< Devices::MIC > +{ + public: + template< typename Index, + typename Result, + typename PrefixSumOperation, + typename VolatilePrefixSumOperation > + static Result + inclusive( const Index size, + PrefixSumOperation& reduction, + VolatilePrefixSumOperation& volatilePrefixSum, + const Result& zero ); + + template< typename Index, + typename Result, + typename PrefixSumOperation, + typename VolatilePrefixSumOperation > + static Result + exclusive( const Index size, + PrefixSumOperation& reduction, + VolatilePrefixSumOperation& volatilePrefixSum, + const Result& zero ); +}; + +} // namespace Algorithms +} // namespace Containers +} // namespace TNL + +#include diff --git a/src/TNL/Containers/Algorithms/PrefixSum.hpp b/src/TNL/Containers/Algorithms/PrefixSum.hpp new file mode 100644 index 000000000..c0797dc0a --- /dev/null +++ b/src/TNL/Containers/Algorithms/PrefixSum.hpp @@ -0,0 +1,175 @@ +/*************************************************************************** + PrefixSum.hpp - description + ------------------- + begin : Mar 24, 2013 + copyright : (C) 2013 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +// Implemented by: Tomas Oberhuber, Jakub Klinkovsky + +#pragma once + +#include "PrefixSum.h" + +//#define CUDA_REDUCTION_PROFILING + +#include +#include +#include +#include +#include + +#ifdef CUDA_REDUCTION_PROFILING +#include +#include +#endif + +namespace TNL { +namespace Containers { +namespace Algorithms { + +/**** + * Arrays smaller than the following constant + * are reduced on CPU. The constant must not be larger + * than maximal CUDA grid size. + */ +static constexpr int PrefixSum_minGpuDataSize = 256;//65536; //16384;//1024;//256; + +//// +// PrefixSum on host +template< typename Index, + typename Result, + typename PrefixSumOperation, + typename VolatilePrefixSumOperation > +Result +PrefixSum< Devices::Host >:: +inclusive( const Index size, + PrefixSumOperation& reduction, + VolatilePrefixSumOperation& volatilePrefixSum, + const Result& zero ) +{ + using IndexType = Index; + using ResultType = Result; + +} + +template< typename Index, + typename Result, + typename PrefixSumOperation, + typename VolatilePrefixSumOperation > +Result +PrefixSum< Devices::Host >:: +exclusive( const Index size, + PrefixSumOperation& reduction, + VolatilePrefixSumOperation& volatilePrefixSum, + const Result& zero ) +{ + using IndexType = Index; + using ResultType = Result; + +} + + + + +template< typename Index, + typename Result, + typename PrefixSumOperation, + typename VolatilePrefixSumOperation > +Result +PrefixSum< Devices::Cuda >:: + reduce( const Index size, + PrefixSumOperation& reduction, + VolatilePrefixSumOperation& volatilePrefixSum, + const Result& zero ) +{ +#ifdef HAVE_CUDA + + using IndexType = Index; + using ResultType = Result; + + /*** + * Only fundamental and pointer types can be safely reduced on host. Complex + * objects stored on the device might contain pointers into the device memory, + * in which case reduction on host might fail. + */ + //constexpr bool can_reduce_all_on_host = std::is_fundamental< DataType1 >::value || std::is_fundamental< DataType2 >::value || std::is_pointer< DataType1 >::value || std::is_pointer< DataType2 >::value; + constexpr bool can_reduce_later_on_host = std::is_fundamental< ResultType >::value || std::is_pointer< ResultType >::value; + + #ifdef CUDA_REDUCTION_PROFILING + Timer timer; + timer.reset(); + timer.start(); + #endif + + CudaPrefixSumKernelLauncher< IndexType, ResultType > reductionLauncher( size ); + + /**** + * Reduce the data on the CUDA device. + */ + ResultType* deviceAux1( 0 ); + IndexType reducedSize = reductionLauncher.start( + reduction, + volatilePrefixSum, + dataFetcher, + zero, + deviceAux1 ); + #ifdef CUDA_REDUCTION_PROFILING + timer.stop(); + std::cout << " PrefixSum on GPU to size " << reducedSize << " took " << timer.getRealTime() << " sec. " << std::endl; + timer.reset(); + timer.start(); + #endif + + if( can_reduce_later_on_host ) { + /*** + * Transfer the reduced data from device to host. + */ + std::unique_ptr< ResultType[] > resultArray{ new ResultType[ reducedSize ] }; + ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory( resultArray.get(), deviceAux1, reducedSize ); + + #ifdef CUDA_REDUCTION_PROFILING + timer.stop(); + std::cout << " Transferring data to CPU took " << timer.getRealTime() << " sec. " << std::endl; + timer.reset(); + timer.start(); + #endif + + /*** + * Reduce the data on the host system. + */ + auto fetch = [&] ( IndexType i ) { return resultArray[ i ]; }; + const ResultType result = PrefixSum< Devices::Host >::reduce( reducedSize, reduction, volatilePrefixSum, fetch, zero ); + + #ifdef CUDA_REDUCTION_PROFILING + timer.stop(); + std::cout << " PrefixSum of small data set on CPU took " << timer.getRealTime() << " sec. " << std::endl; + #endif + return result; + } + else { + /*** + * Data can't be safely reduced on host, so continue with the reduction on the CUDA device. + */ + auto result = reductionLauncher.finish( reduction, volatilePrefixSum, zero ); + + #ifdef CUDA_REDUCTION_PROFILING + timer.stop(); + std::cout << " PrefixSum of small data set on GPU took " << timer.getRealTime() << " sec. " << std::endl; + timer.reset(); + timer.start(); + #endif + + return result; + } +#else + throw Exceptions::CudaSupportMissing(); +#endif +}; + +} // namespace Algorithms +} // namespace Containers +} // namespace TNL diff --git a/src/TNL/Containers/Algorithms/Reduction.h b/src/TNL/Containers/Algorithms/Reduction.h index 8c7af3da9..5bcfc1e88 100644 --- a/src/TNL/Containers/Algorithms/Reduction.h +++ b/src/TNL/Containers/Algorithms/Reduction.h @@ -26,7 +26,7 @@ class Reduction }; template<> -class Reduction< Devices::Cuda > +class Reduction< Devices::Host > { public: template< typename Index, @@ -56,7 +56,7 @@ class Reduction< Devices::Cuda > }; template<> -class Reduction< Devices::Host > +class Reduction< Devices::Cuda > { public: template< typename Index, @@ -120,4 +120,4 @@ class Reduction< Devices::MIC > } // namespace Containers } // namespace TNL -#include "Reduction.hpp" +#include diff --git a/src/TNL/Containers/Algorithms/Reduction.hpp b/src/TNL/Containers/Algorithms/Reduction.hpp index 82a08d100..21aaf0154 100644 --- a/src/TNL/Containers/Algorithms/Reduction.hpp +++ b/src/TNL/Containers/Algorithms/Reduction.hpp @@ -1,5 +1,5 @@ /*************************************************************************** - Reduction_impl.h - description + Reduction.hpp - description ------------------- begin : Mar 24, 2013 copyright : (C) 2013 by Tomas Oberhuber et al. @@ -12,12 +12,12 @@ #pragma once -#include "Reduction.h" //#define CUDA_REDUCTION_PROFILING #include #include +#include #include #include #include @@ -38,209 +38,6 @@ namespace Algorithms { */ static constexpr int Reduction_minGpuDataSize = 256;//65536; //16384;//1024;//256; - -template< typename Index, - typename Result, - typename ReductionOperation, - typename VolatileReductionOperation, - typename DataFetcher > -Result -Reduction< Devices::Cuda >:: - reduce( const Index size, - ReductionOperation& reduction, - VolatileReductionOperation& volatileReduction, - DataFetcher& dataFetcher, - const Result& zero ) -{ -#ifdef HAVE_CUDA - - using IndexType = Index; - using ResultType = Result; - - /*** - * Only fundamental and pointer types can be safely reduced on host. Complex - * objects stored on the device might contain pointers into the device memory, - * in which case reduction on host might fail. - */ - //constexpr bool can_reduce_all_on_host = std::is_fundamental< DataType1 >::value || std::is_fundamental< DataType2 >::value || std::is_pointer< DataType1 >::value || std::is_pointer< DataType2 >::value; - constexpr bool can_reduce_later_on_host = std::is_fundamental< ResultType >::value || std::is_pointer< ResultType >::value; - - #ifdef CUDA_REDUCTION_PROFILING - Timer timer; - timer.reset(); - timer.start(); - #endif - - CudaReductionKernelLauncher< IndexType, ResultType > reductionLauncher( size ); - - /**** - * Reduce the data on the CUDA device. - */ - ResultType* deviceAux1( 0 ); - IndexType reducedSize = reductionLauncher.start( - reduction, - volatileReduction, - dataFetcher, - zero, - deviceAux1 ); - #ifdef CUDA_REDUCTION_PROFILING - timer.stop(); - std::cout << " Reduction on GPU to size " << reducedSize << " took " << timer.getRealTime() << " sec. " << std::endl; - timer.reset(); - timer.start(); - #endif - - if( can_reduce_later_on_host ) { - /*** - * Transfer the reduced data from device to host. - */ - std::unique_ptr< ResultType[] > resultArray{ new ResultType[ reducedSize ] }; - ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory( resultArray.get(), deviceAux1, reducedSize ); - - #ifdef CUDA_REDUCTION_PROFILING - timer.stop(); - std::cout << " Transferring data to CPU took " << timer.getRealTime() << " sec. " << std::endl; - timer.reset(); - timer.start(); - #endif - - /*** - * Reduce the data on the host system. - */ - auto fetch = [&] ( IndexType i ) { return resultArray[ i ]; }; - const ResultType result = Reduction< Devices::Host >::reduce( reducedSize, reduction, volatileReduction, fetch, zero ); - - #ifdef CUDA_REDUCTION_PROFILING - timer.stop(); - std::cout << " Reduction of small data set on CPU took " << timer.getRealTime() << " sec. " << std::endl; - #endif - return result; - } - else { - /*** - * Data can't be safely reduced on host, so continue with the reduction on the CUDA device. - */ - auto result = reductionLauncher.finish( reduction, volatileReduction, zero ); - - #ifdef CUDA_REDUCTION_PROFILING - timer.stop(); - std::cout << " Reduction of small data set on GPU took " << timer.getRealTime() << " sec. " << std::endl; - timer.reset(); - timer.start(); - #endif - - return result; - } -#else - throw Exceptions::CudaSupportMissing(); -#endif -}; - -template< typename Index, - typename Result, - typename ReductionOperation, - typename VolatileReductionOperation, - typename DataFetcher > -Result -Reduction< Devices::Cuda >:: -reduceWithArgument( const Index size, - Index& argument, - ReductionOperation& reduction, - VolatileReductionOperation& volatileReduction, - DataFetcher& dataFetcher, - const Result& zero ) -{ - #ifdef HAVE_CUDA - - using IndexType = Index; - using ResultType = Result; - - /*** - * Only fundamental and pointer types can be safely reduced on host. Complex - * objects stored on the device might contain pointers into the device memory, - * in which case reduction on host might fail. - */ - //constexpr bool can_reduce_all_on_host = std::is_fundamental< DataType1 >::value || std::is_fundamental< DataType2 >::value || std::is_pointer< DataType1 >::value || std::is_pointer< DataType2 >::value; - constexpr bool can_reduce_later_on_host = std::is_fundamental< ResultType >::value || std::is_pointer< ResultType >::value; - - #ifdef CUDA_REDUCTION_PROFILING - Timer timer; - timer.reset(); - timer.start(); - #endif - - CudaReductionKernelLauncher< IndexType, ResultType > reductionLauncher( size ); - - /**** - * Reduce the data on the CUDA device. - */ - ResultType* deviceAux1( nullptr ); - IndexType* deviceIndexes( nullptr ); - IndexType reducedSize = reductionLauncher.startWithArgument( - reduction, - volatileReduction, - dataFetcher, - zero, - deviceAux1, - deviceIndexes ); - #ifdef CUDA_REDUCTION_PROFILING - timer.stop(); - std::cout << " Reduction on GPU to size " << reducedSize << " took " << timer.getRealTime() << " sec. " << std::endl; - timer.reset(); - timer.start(); - #endif - - if( can_reduce_later_on_host ) { - /*** - * Transfer the reduced data from device to host. - */ - std::unique_ptr< ResultType[] > resultArray{ new ResultType[ reducedSize ] }; - std::unique_ptr< IndexType[] > indexArray{ new IndexType[ reducedSize ] }; - ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory( resultArray.get(), deviceAux1, reducedSize ); - ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory( indexArray.get(), deviceIndexes, reducedSize ); - - #ifdef CUDA_REDUCTION_PROFILING - timer.stop(); - std::cout << " Transferring data to CPU took " << timer.getRealTime() << " sec. " << std::endl; - timer.reset(); - timer.start(); - #endif - - /*** - * Reduce the data on the host system. - */ - //auto fetch = [&] ( IndexType i ) { return resultArray[ i ]; }; - //const ResultType result = Reduction< Devices::Host >::reduceWithArgument( reducedSize, argument, reduction, volatileReduction, fetch, zero ); - for( IndexType i = 1; i < reducedSize; i++ ) - reduction( indexArray[ 0 ], indexArray[ i ], resultArray[ 0 ], resultArray[ i ] ); - - #ifdef CUDA_REDUCTION_PROFILING - timer.stop(); - std::cout << " Reduction of small data set on CPU took " << timer.getRealTime() << " sec. " << std::endl; - #endif - argument = indexArray[ 0 ]; - return resultArray[ 0 ]; - } - else { - /*** - * Data can't be safely reduced on host, so continue with the reduction on the CUDA device. - */ - auto result = reductionLauncher.finishWithArgument( argument, reduction, volatileReduction, zero ); - - #ifdef CUDA_REDUCTION_PROFILING - timer.stop(); - std::cout << " Reduction of small data set on GPU took " << timer.getRealTime() << " sec. " << std::endl; - timer.reset(); - timer.start(); - #endif - - return result; - } -#else - throw Exceptions::CudaSupportMissing(); -#endif -} - //// // Reduction on host template< typename Index, @@ -473,6 +270,207 @@ reduceWithArgument( const Index size, #endif } +template< typename Index, + typename Result, + typename ReductionOperation, + typename VolatileReductionOperation, + typename DataFetcher > +Result +Reduction< Devices::Cuda >:: + reduce( const Index size, + ReductionOperation& reduction, + VolatileReductionOperation& volatileReduction, + DataFetcher& dataFetcher, + const Result& zero ) +{ +#ifdef HAVE_CUDA + + using IndexType = Index; + using ResultType = Result; + + /*** + * Only fundamental and pointer types can be safely reduced on host. Complex + * objects stored on the device might contain pointers into the device memory, + * in which case reduction on host might fail. + */ + //constexpr bool can_reduce_all_on_host = std::is_fundamental< DataType1 >::value || std::is_fundamental< DataType2 >::value || std::is_pointer< DataType1 >::value || std::is_pointer< DataType2 >::value; + constexpr bool can_reduce_later_on_host = std::is_fundamental< ResultType >::value || std::is_pointer< ResultType >::value; + + #ifdef CUDA_REDUCTION_PROFILING + Timer timer; + timer.reset(); + timer.start(); + #endif + + CudaReductionKernelLauncher< IndexType, ResultType > reductionLauncher( size ); + + /**** + * Reduce the data on the CUDA device. + */ + ResultType* deviceAux1( 0 ); + IndexType reducedSize = reductionLauncher.start( + reduction, + volatileReduction, + dataFetcher, + zero, + deviceAux1 ); + #ifdef CUDA_REDUCTION_PROFILING + timer.stop(); + std::cout << " Reduction on GPU to size " << reducedSize << " took " << timer.getRealTime() << " sec. " << std::endl; + timer.reset(); + timer.start(); + #endif + + if( can_reduce_later_on_host ) { + /*** + * Transfer the reduced data from device to host. + */ + std::unique_ptr< ResultType[] > resultArray{ new ResultType[ reducedSize ] }; + ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory( resultArray.get(), deviceAux1, reducedSize ); + + #ifdef CUDA_REDUCTION_PROFILING + timer.stop(); + std::cout << " Transferring data to CPU took " << timer.getRealTime() << " sec. " << std::endl; + timer.reset(); + timer.start(); + #endif + + /*** + * Reduce the data on the host system. + */ + auto fetch = [&] ( IndexType i ) { return resultArray[ i ]; }; + const ResultType result = Reduction< Devices::Host >::reduce( reducedSize, reduction, volatileReduction, fetch, zero ); + + #ifdef CUDA_REDUCTION_PROFILING + timer.stop(); + std::cout << " Reduction of small data set on CPU took " << timer.getRealTime() << " sec. " << std::endl; + #endif + return result; + } + else { + /*** + * Data can't be safely reduced on host, so continue with the reduction on the CUDA device. + */ + auto result = reductionLauncher.finish( reduction, volatileReduction, zero ); + + #ifdef CUDA_REDUCTION_PROFILING + timer.stop(); + std::cout << " Reduction of small data set on GPU took " << timer.getRealTime() << " sec. " << std::endl; + timer.reset(); + timer.start(); + #endif + + return result; + } +#else + throw Exceptions::CudaSupportMissing(); +#endif +}; + +template< typename Index, + typename Result, + typename ReductionOperation, + typename VolatileReductionOperation, + typename DataFetcher > +Result +Reduction< Devices::Cuda >:: +reduceWithArgument( const Index size, + Index& argument, + ReductionOperation& reduction, + VolatileReductionOperation& volatileReduction, + DataFetcher& dataFetcher, + const Result& zero ) +{ + #ifdef HAVE_CUDA + + using IndexType = Index; + using ResultType = Result; + + /*** + * Only fundamental and pointer types can be safely reduced on host. Complex + * objects stored on the device might contain pointers into the device memory, + * in which case reduction on host might fail. + */ + //constexpr bool can_reduce_all_on_host = std::is_fundamental< DataType1 >::value || std::is_fundamental< DataType2 >::value || std::is_pointer< DataType1 >::value || std::is_pointer< DataType2 >::value; + constexpr bool can_reduce_later_on_host = std::is_fundamental< ResultType >::value || std::is_pointer< ResultType >::value; + + #ifdef CUDA_REDUCTION_PROFILING + Timer timer; + timer.reset(); + timer.start(); + #endif + + CudaReductionKernelLauncher< IndexType, ResultType > reductionLauncher( size ); + + /**** + * Reduce the data on the CUDA device. + */ + ResultType* deviceAux1( nullptr ); + IndexType* deviceIndexes( nullptr ); + IndexType reducedSize = reductionLauncher.startWithArgument( + reduction, + volatileReduction, + dataFetcher, + zero, + deviceAux1, + deviceIndexes ); + #ifdef CUDA_REDUCTION_PROFILING + timer.stop(); + std::cout << " Reduction on GPU to size " << reducedSize << " took " << timer.getRealTime() << " sec. " << std::endl; + timer.reset(); + timer.start(); + #endif + + if( can_reduce_later_on_host ) { + /*** + * Transfer the reduced data from device to host. + */ + std::unique_ptr< ResultType[] > resultArray{ new ResultType[ reducedSize ] }; + std::unique_ptr< IndexType[] > indexArray{ new IndexType[ reducedSize ] }; + ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory( resultArray.get(), deviceAux1, reducedSize ); + ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory( indexArray.get(), deviceIndexes, reducedSize ); + + #ifdef CUDA_REDUCTION_PROFILING + timer.stop(); + std::cout << " Transferring data to CPU took " << timer.getRealTime() << " sec. " << std::endl; + timer.reset(); + timer.start(); + #endif + + /*** + * Reduce the data on the host system. + */ + //auto fetch = [&] ( IndexType i ) { return resultArray[ i ]; }; + //const ResultType result = Reduction< Devices::Host >::reduceWithArgument( reducedSize, argument, reduction, volatileReduction, fetch, zero ); + for( IndexType i = 1; i < reducedSize; i++ ) + reduction( indexArray[ 0 ], indexArray[ i ], resultArray[ 0 ], resultArray[ i ] ); + + #ifdef CUDA_REDUCTION_PROFILING + timer.stop(); + std::cout << " Reduction of small data set on CPU took " << timer.getRealTime() << " sec. " << std::endl; + #endif + argument = indexArray[ 0 ]; + return resultArray[ 0 ]; + } + else { + /*** + * Data can't be safely reduced on host, so continue with the reduction on the CUDA device. + */ + auto result = reductionLauncher.finishWithArgument( argument, reduction, volatileReduction, zero ); + + #ifdef CUDA_REDUCTION_PROFILING + timer.stop(); + std::cout << " Reduction of small data set on GPU took " << timer.getRealTime() << " sec. " << std::endl; + timer.reset(); + timer.start(); + #endif + + return result; + } +#else + throw Exceptions::CudaSupportMissing(); +#endif +} } // namespace Algorithms } // namespace Containers -- GitLab From 127c08b4a67a645439357986a3a857ca29a3c1c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Sat, 11 May 2019 18:52:21 +0200 Subject: [PATCH 41/93] Refactoring prefix sum. --- .../Algorithms/CommonVectorOperations.h | 9 + .../Algorithms/CommonVectorOperations.hpp | 35 +++ src/TNL/Containers/Algorithms/PrefixSum.h | 98 +++++-- src/TNL/Containers/Algorithms/PrefixSum.hpp | 249 ++++++++++-------- src/TNL/Containers/Algorithms/Reduction.h | 2 +- .../Containers/Algorithms/VectorOperations.h | 110 -------- .../Algorithms/VectorOperationsHost_impl.h | 34 --- 7 files changed, 267 insertions(+), 270 deletions(-) diff --git a/src/TNL/Containers/Algorithms/CommonVectorOperations.h b/src/TNL/Containers/Algorithms/CommonVectorOperations.h index bd362fbab..ff6974654 100644 --- a/src/TNL/Containers/Algorithms/CommonVectorOperations.h +++ b/src/TNL/Containers/Algorithms/CommonVectorOperations.h @@ -70,6 +70,15 @@ struct CommonVectorOperations template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType > static ResultType getScalarProduct( const Vector1& v1, const Vector2& v2 ); + template< typename Vector > + static void computePrefixSum( Vector& v, + const typename Vector::IndexType begin, + const typename Vector::IndexType end ); + + template< typename Vector > + static void computeExclusivePrefixSum( Vector& v, + const typename Vector::IndexType begin, + const typename Vector::IndexType end ); }; } // namespace Algorithms diff --git a/src/TNL/Containers/Algorithms/CommonVectorOperations.hpp b/src/TNL/Containers/Algorithms/CommonVectorOperations.hpp index ef3317cb7..18478da9f 100644 --- a/src/TNL/Containers/Algorithms/CommonVectorOperations.hpp +++ b/src/TNL/Containers/Algorithms/CommonVectorOperations.hpp @@ -12,6 +12,7 @@ #include #include +#include namespace TNL { namespace Containers { @@ -370,6 +371,40 @@ getScalarProduct( const Vector1& v1, return Reduction< DeviceType >::reduce( v1.getSize(), reduction, volatileReduction, fetch, ( ResultType ) 0 ); } +template< typename Device > +template< typename Vector > +void +CommonVectorOperations< Device >:: +computePrefixSum( Vector& v, + typename Vector::IndexType begin, + typename Vector::IndexType end ) +{ + using RealType = typename Vector::RealType; + using IndexType = typename Vector::IndexType; + + auto reduction = [=] __cuda_callable__ ( RealType& a, const RealType& b ) { a += b; }; + auto volatileReduction = [=] __cuda_callable__ ( volatile RealType& a, volatile RealType& b ) { a += b; }; + + PrefixSum< Device >::inclusive( v, begin, end, reduction, volatileReduction, ( RealType ) 0.0 ); +} + +template< typename Device > + template< typename Vector > +void +CommonVectorOperations< Device >:: +computeExclusivePrefixSum( Vector& v, + typename Vector::IndexType begin, + typename Vector::IndexType end ) +{ + using RealType = typename Vector::RealType; + using IndexType = typename Vector::IndexType; + + auto reduction = [=] __cuda_callable__ ( RealType& a, const RealType& b ) { a += b; }; + auto volatileReduction = [=] __cuda_callable__ ( volatile RealType& a, volatile RealType& b ) { a += b; }; + + PrefixSum< Device >::exclusive( v, begin, end, reduction, volatileReduction, ( RealType ) 0.0 ); +} + } // namespace Algorithms } // namespace Containers } // namespace TNL diff --git a/src/TNL/Containers/Algorithms/PrefixSum.h b/src/TNL/Containers/Algorithms/PrefixSum.h index 1d57da974..36f01fbb1 100644 --- a/src/TNL/Containers/Algorithms/PrefixSum.h +++ b/src/TNL/Containers/Algorithms/PrefixSum.h @@ -29,50 +29,106 @@ template<> class PrefixSum< Devices::Host > { public: - template< typename Index, - typename Result, + template< typename Vector, typename PrefixSumOperation, typename VolatilePrefixSumOperation > - static Result - inclusive( const Index size, + static void + inclusive( Vector& v, + const typename Vector::IndexType begin, + const typename Vector::IndexType end, PrefixSumOperation& reduction, VolatilePrefixSumOperation& volatilePrefixSum, - const Result& zero ); + const typename Vector::RealType& zero ); - template< typename Index, - typename Result, + template< typename Vector, typename PrefixSumOperation, typename VolatilePrefixSumOperation > - static Result - exclusive( const Index size, + static void + exclusive( Vector& v, + const typename Vector::IndexType begin, + const typename Vector::IndexType end, PrefixSumOperation& reduction, VolatilePrefixSumOperation& volatilePrefixSum, - const Result& zero ); + const typename Vector::RealType& zero ); + + template< typename Vector, + typename FlagsArray, + typename PrefixSumOperation, + typename VolatilePrefixSumOperation > + static void + inclusiveSegmented( Vector& v, + FlagsArray& f, + const typename Vector::IndexType begin, + const typename Vector::IndexType end, + PrefixSumOperation& reduction, + VolatilePrefixSumOperation& volatilePrefixSum, + const typename Vector::RealType& zero ); + + template< typename Vector, + typename FlagsArray, + typename PrefixSumOperation, + typename VolatilePrefixSumOperation > + static void + exclusiveSegmented( Vector& v, + FlagsArray& f, + const typename Vector::IndexType begin, + const typename Vector::IndexType end, + PrefixSumOperation& reduction, + VolatilePrefixSumOperation& volatilePrefixSum, + const typename Vector::RealType& zero ); }; template<> class PrefixSum< Devices::Cuda > { public: - template< typename Index, - typename Result, + template< typename Vector, typename PrefixSumOperation, typename VolatilePrefixSumOperation > - static Result - inclusive( const Index size, + static void + inclusive( Vector& v, + const typename Vector::IndexType begin, + const typename Vector::IndexType end, PrefixSumOperation& reduction, VolatilePrefixSumOperation& volatilePrefixSum, - const Result& zero ); + const typename Vector::RealType& zero ); - template< typename Index, - typename Result, + template< typename Vector, typename PrefixSumOperation, typename VolatilePrefixSumOperation > - static Result - exclusive( const Index size, + static void + exclusive( Vector& v, + const typename Vector::IndexType begin, + const typename Vector::IndexType end, PrefixSumOperation& reduction, VolatilePrefixSumOperation& volatilePrefixSum, - const Result& zero ); + const typename Vector::RealType& zero ); + + template< typename Vector, + typename FlagsArray, + typename PrefixSumOperation, + typename VolatilePrefixSumOperation > + static void + inclusiveSegmented( Vector& v, + FlagsArray& f, + const typename Vector::IndexType begin, + const typename Vector::IndexType end, + PrefixSumOperation& reduction, + VolatilePrefixSumOperation& volatilePrefixSum, + const typename Vector::RealType& zero ); + + template< typename Vector, + typename FlagsArray, + typename PrefixSumOperation, + typename VolatilePrefixSumOperation > + static void + exclusiveSegmented( Vector& v, + FlagsArray& f, + const typename Vector::IndexType begin, + const typename Vector::IndexType end, + PrefixSumOperation& reduction, + VolatilePrefixSumOperation& volatilePrefixSum, + const typename Vector::RealType& zero ); }; template<> @@ -104,4 +160,4 @@ class PrefixSum< Devices::MIC > } // namespace Containers } // namespace TNL -#include +#include diff --git a/src/TNL/Containers/Algorithms/PrefixSum.hpp b/src/TNL/Containers/Algorithms/PrefixSum.hpp index c0797dc0a..6e44fa537 100644 --- a/src/TNL/Containers/Algorithms/PrefixSum.hpp +++ b/src/TNL/Containers/Algorithms/PrefixSum.hpp @@ -18,9 +18,8 @@ #include #include -#include +#include #include -#include #ifdef CUDA_REDUCTION_PROFILING #include @@ -40,135 +39,177 @@ static constexpr int PrefixSum_minGpuDataSize = 256;//65536; //16384;//1024;//25 //// // PrefixSum on host -template< typename Index, - typename Result, +template< typename Vector, typename PrefixSumOperation, typename VolatilePrefixSumOperation > -Result +static typename Vector::RealType PrefixSum< Devices::Host >:: -inclusive( const Index size, +inclusive( Vector& v, + const typename Vector::IndexType begin, + const typename Vector::IndexType end, PrefixSumOperation& reduction, VolatilePrefixSumOperation& volatilePrefixSum, - const Result& zero ) + const typename Vector::RealType& zero ) { - using IndexType = Index; - using ResultType = Result; + using IndexType = typename Vector::IndexType; + // TODO: parallelize with OpenMP + for( IndexType i = begin + 1; i < end; i++ ) + reduction( v[ i ], v[ i - 1 ] ); } -template< typename Index, - typename Result, +template< typename Vector, typename PrefixSumOperation, typename VolatilePrefixSumOperation > -Result +static typename Vector::RealType PrefixSum< Devices::Host >:: -exclusive( const Index size, +exclusive( Vector& v, + const typename Vector::IndexType begin, + const typename Vector::IndexType end, PrefixSumOperation& reduction, VolatilePrefixSumOperation& volatilePrefixSum, - const Result& zero ) + const typename Vector::RealType& zero ) { - using IndexType = Index; - using ResultType = Result; + using IndexType = typename Vector::IndexType; + using RealType = typename Vector::RealType; + + // TODO: parallelize with OpenMP + RealType aux( v[ begin ] ); + v[ begin ] = zero; + for( IndexType i = begin + 1; i < end; i++ ) + { + RealType x = v[ i ]; + v[ i ] = aux; + reduction( aux, x ); + } +} + +template< typename Vector, + typename FlagsArray, + typename PrefixSumOperation, + typename VolatilePrefixSumOperation > +static typename Vector::RealType +PrefixSum< Devices::Host >:: +inclusiveSegmented( Vector& v, + FlagsArray& f, + const typename Vector::IndexType begin, + const typename Vector::IndexType end, + PrefixSumOperation& reduction, + VolatilePrefixSumOperation& volatilePrefixSum, + const typename Vector::RealType& zero ) +{ + using IndexType = typename Vector::IndexType; + + // TODO: parallelize with OpenMP + for( IndexType i = begin + 1; i < end; i++ ) + if( f[ i ] ) + v[ i ] = zero; + else + reduction( v[ i ], v[ i - 1 ] ); } +template< typename Vector, + typename FlagsArray, + typename PrefixSumOperation, + typename VolatilePrefixSumOperation > +static typename Vector::RealType +PrefixSum< Devices::Host >:: +exclusiveSegmented( Vector& v, + FlagsArray& f, + const typename Vector::IndexType begin, + const typename Vector::IndexType end, + PrefixSumOperation& reduction, + VolatilePrefixSumOperation& volatilePrefixSum, + const typename Vector::RealType& zero ) +{ + using IndexType = typename Vector::IndexType; + using RealType = typename Vector::RealType; + + // TODO: parallelize with OpenMP + RealType aux( v[ begin ] ); + v[ begin ] = zero; + for( IndexType i = begin + 1; i < end; i++ ) + { + RealType x = v[ i ]; + if( f[ i ] ) + aux = zero; + v[ i ] = aux; + reduction( aux, x ); + } +} +//// +// PrefixSum on CUDA device +template< typename Vector, + typename PrefixSumOperation, + typename VolatilePrefixSumOperation > +static typename Vector::RealType +PrefixSum< Devices::Cuda >:: +inclusive( Vector& v, + const typename Vector::IndexType begin, + const typename Vector::IndexType end, + PrefixSumOperation& reduction, + VolatilePrefixSumOperation& volatilePrefixSum, + const typename Vector::RealType& zero ) +{ + using IndexType = typename Vector::IndexType; +} -template< typename Index, - typename Result, +template< typename Vector, typename PrefixSumOperation, typename VolatilePrefixSumOperation > -Result +static typename Vector::RealType PrefixSum< Devices::Cuda >:: - reduce( const Index size, +exclusive( Vector& v, + const typename Vector::IndexType begin, + const typename Vector::IndexType end, PrefixSumOperation& reduction, VolatilePrefixSumOperation& volatilePrefixSum, - const Result& zero ) + const typename Vector::RealType& zero ) { -#ifdef HAVE_CUDA - - using IndexType = Index; - using ResultType = Result; - - /*** - * Only fundamental and pointer types can be safely reduced on host. Complex - * objects stored on the device might contain pointers into the device memory, - * in which case reduction on host might fail. - */ - //constexpr bool can_reduce_all_on_host = std::is_fundamental< DataType1 >::value || std::is_fundamental< DataType2 >::value || std::is_pointer< DataType1 >::value || std::is_pointer< DataType2 >::value; - constexpr bool can_reduce_later_on_host = std::is_fundamental< ResultType >::value || std::is_pointer< ResultType >::value; - - #ifdef CUDA_REDUCTION_PROFILING - Timer timer; - timer.reset(); - timer.start(); - #endif - - CudaPrefixSumKernelLauncher< IndexType, ResultType > reductionLauncher( size ); - - /**** - * Reduce the data on the CUDA device. - */ - ResultType* deviceAux1( 0 ); - IndexType reducedSize = reductionLauncher.start( - reduction, - volatilePrefixSum, - dataFetcher, - zero, - deviceAux1 ); - #ifdef CUDA_REDUCTION_PROFILING - timer.stop(); - std::cout << " PrefixSum on GPU to size " << reducedSize << " took " << timer.getRealTime() << " sec. " << std::endl; - timer.reset(); - timer.start(); - #endif - - if( can_reduce_later_on_host ) { - /*** - * Transfer the reduced data from device to host. - */ - std::unique_ptr< ResultType[] > resultArray{ new ResultType[ reducedSize ] }; - ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory( resultArray.get(), deviceAux1, reducedSize ); - - #ifdef CUDA_REDUCTION_PROFILING - timer.stop(); - std::cout << " Transferring data to CPU took " << timer.getRealTime() << " sec. " << std::endl; - timer.reset(); - timer.start(); - #endif - - /*** - * Reduce the data on the host system. - */ - auto fetch = [&] ( IndexType i ) { return resultArray[ i ]; }; - const ResultType result = PrefixSum< Devices::Host >::reduce( reducedSize, reduction, volatilePrefixSum, fetch, zero ); - - #ifdef CUDA_REDUCTION_PROFILING - timer.stop(); - std::cout << " PrefixSum of small data set on CPU took " << timer.getRealTime() << " sec. " << std::endl; - #endif - return result; - } - else { - /*** - * Data can't be safely reduced on host, so continue with the reduction on the CUDA device. - */ - auto result = reductionLauncher.finish( reduction, volatilePrefixSum, zero ); - - #ifdef CUDA_REDUCTION_PROFILING - timer.stop(); - std::cout << " PrefixSum of small data set on GPU took " << timer.getRealTime() << " sec. " << std::endl; - timer.reset(); - timer.start(); - #endif - - return result; - } -#else - throw Exceptions::CudaSupportMissing(); -#endif -}; + using IndexType = typename Vector::IndexType; + using RealType = typename Vector::RealType; + +} + +template< typename Vector, + typename FlagsArray, + typename PrefixSumOperation, + typename VolatilePrefixSumOperation > +static typename Vector::RealType +PrefixSum< Devices::Cuda >:: +inclusiveSegmented( Vector& v, + FlagsArray& f, + const typename Vector::IndexType begin, + const typename Vector::IndexType end, + PrefixSumOperation& reduction, + VolatilePrefixSumOperation& volatilePrefixSum, + const typename Vector::RealType& zero ) +{ + using IndexType = typename Vector::IndexType; + +} + +template< typename Vector, + typename FlagsArray, + typename PrefixSumOperation, + typename VolatilePrefixSumOperation > +static typename Vector::RealType +PrefixSum< Devices::Cuda >:: +exclusiveSegmented( Vector& v, + FlagsArray& f, + const typename Vector::IndexType begin, + const typename Vector::IndexType end, + PrefixSumOperation& reduction, + VolatilePrefixSumOperation& volatilePrefixSum, + const typename Vector::RealType& zero ) +{ + using IndexType = typename Vector::IndexType; + using RealType = typename Vector::RealType; + +} } // namespace Algorithms } // namespace Containers diff --git a/src/TNL/Containers/Algorithms/Reduction.h b/src/TNL/Containers/Algorithms/Reduction.h index 5bcfc1e88..975d306b5 100644 --- a/src/TNL/Containers/Algorithms/Reduction.h +++ b/src/TNL/Containers/Algorithms/Reduction.h @@ -120,4 +120,4 @@ class Reduction< Devices::MIC > } // namespace Containers } // namespace TNL -#include +#include diff --git a/src/TNL/Containers/Algorithms/VectorOperations.h b/src/TNL/Containers/Algorithms/VectorOperations.h index 1c89dbca0..e7e46a9f9 100644 --- a/src/TNL/Containers/Algorithms/VectorOperations.h +++ b/src/TNL/Containers/Algorithms/VectorOperations.h @@ -37,55 +37,6 @@ public: const typename Vector::RealType& value, const Scalar thisElementMultiplicator ); - /*template< typename Vector, typename ResultType = typename Vector::RealType > - static ResultType getVectorMax( const Vector& v ); - - template< typename Vector, typename ResultType = typename Vector::RealType > - static ResultType getVectorMin( const Vector& v ); - - template< typename Vector, typename ResultType = typename Vector::RealType > - static ResultType getVectorAbsMax( const Vector& v ); - - template< typename Vector, typename ResultType = typename Vector::RealType > - static ResultType getVectorAbsMin( const Vector& v ); - - template< typename Vector, typename ResultType = typename Vector::RealType > - static ResultType getVectorL1Norm( const Vector& v ); - - template< typename Vector, typename ResultType = typename Vector::RealType > - static ResultType getVectorL2Norm( const Vector& v ); - - template< typename Vector, typename ResultType = typename Vector::RealType, typename Scalar > - static ResultType getVectorLpNorm( const Vector& v, const Scalar p ); - - template< typename Vector, typename ResultType = typename Vector::RealType > - static ResultType getVectorSum( const Vector& v ); - - template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType > - static ResultType getVectorDifferenceMax( const Vector1& v1, const Vector2& v2 ); - - template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType > - static ResultType getVectorDifferenceMin( const Vector1& v1, const Vector2& v2 ); - - template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType > - static ResultType getVectorDifferenceAbsMax( const Vector1& v1, const Vector2& v2 ); - - template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType > - static ResultType getVectorDifferenceAbsMin( const Vector1& v1, const Vector2& v2 ); - - template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType > - static ResultType getVectorDifferenceL1Norm( const Vector1& v1, const Vector2& v2 ); - - template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType > - static ResultType getVectorDifferenceL2Norm( const Vector1& v1, const Vector2& v2 ); - - template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType, typename Scalar > - static ResultType getVectorDifferenceLpNorm( const Vector1& v1, const Vector2& v2, const Scalar p ); - - template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType > - static ResultType getVectorDifferenceSum( const Vector1& v1, const Vector2& v2 ); - */ - template< typename Vector, typename Scalar > static void vectorScalarMultiplication( Vector& v, Scalar alpha ); @@ -106,15 +57,6 @@ public: const Scalar2 multiplicator2, const Scalar3 thisMultiplicator = 1.0 ); - template< typename Vector > - static void computePrefixSum( Vector& v, - const typename Vector::IndexType begin, - const typename Vector::IndexType end ); - - template< typename Vector > - static void computeExclusivePrefixSum( Vector& v, - const typename Vector::IndexType begin, - const typename Vector::IndexType end ); }; template<> @@ -132,61 +74,9 @@ public: const typename Vector::RealType& value, const Scalar thisElementMultiplicator ); - /*template< typename Vector, typename ResultType = typename Vector::RealType > - static ResultType getVectorMax( const Vector& v ); - - template< typename Vector, typename ResultType = typename Vector::RealType > - static ResultType getVectorMin( const Vector& v ); - - template< typename Vector, typename ResultType = typename Vector::RealType > - static ResultType getVectorAbsMax( const Vector& v ); - - template< typename Vector, typename ResultType = typename Vector::RealType > - static ResultType getVectorAbsMin( const Vector& v ); - - template< typename Vector, typename ResultType = typename Vector::RealType > - static ResultType getVectorL1Norm( const Vector& v ); - - template< typename Vector, typename ResultType = typename Vector::RealType > - static ResultType getVectorL2Norm( const Vector& v ); - - template< typename Vector, typename ResultType = typename Vector::RealType, typename Scalar > - static ResultType getVectorLpNorm( const Vector& v, const Scalar p ); - - template< typename Vector, typename ResultType = typename Vector::RealType > - static ResultType getVectorSum( const Vector& v ); - - template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType > - static ResultType getVectorDifferenceMax( const Vector1& v1, const Vector2& v2 ); - - template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType > - static ResultType getVectorDifferenceMin( const Vector1& v1, const Vector2& v2 ); - - template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType > - static ResultType getVectorDifferenceAbsMax( const Vector1& v1, const Vector2& v2 ); - - template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType > - static ResultType getVectorDifferenceAbsMin( const Vector1& v1, const Vector2& v2 ); - - template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType > - static ResultType getVectorDifferenceL1Norm( const Vector1& v1, const Vector2& v2 ); - - template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType > - static ResultType getVectorDifferenceL2Norm( const Vector1& v1, const Vector2& v2 ); - - template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType, typename Scalar > - static ResultType getVectorDifferenceLpNorm( const Vector1& v1, const Vector2& v2, const Scalar p ); - - template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType > - static ResultType getVectorDifferenceSum( const Vector1& v1, const Vector2& v2 ); - */ - template< typename Vector, typename Scalar > static void vectorScalarMultiplication( Vector& v, const Scalar alpha ); - /*template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType > - static ResultType getScalarProduct( const Vector1& v1, const Vector2& v2 );*/ - template< typename Vector1, typename Vector2, typename Scalar1, typename Scalar2 > static void addVector( Vector1& y, const Vector2& x, diff --git a/src/TNL/Containers/Algorithms/VectorOperationsHost_impl.h b/src/TNL/Containers/Algorithms/VectorOperationsHost_impl.h index 47a03c636..7bee6e84f 100644 --- a/src/TNL/Containers/Algorithms/VectorOperationsHost_impl.h +++ b/src/TNL/Containers/Algorithms/VectorOperationsHost_impl.h @@ -139,40 +139,6 @@ addVectors( Vector1& v, v[ i ] = thisMultiplicator * v[ i ] + multiplicator1 * v1[ i ] + multiplicator2 * v2[ i ]; } -template< typename Vector > -void -VectorOperations< Devices::Host >:: -computePrefixSum( Vector& v, - typename Vector::IndexType begin, - typename Vector::IndexType end ) -{ - typedef typename Vector::IndexType Index; - - // TODO: parallelize with OpenMP - for( Index i = begin + 1; i < end; i++ ) - v[ i ] += v[ i - 1 ]; -} - -template< typename Vector > -void -VectorOperations< Devices::Host >:: -computeExclusivePrefixSum( Vector& v, - typename Vector::IndexType begin, - typename Vector::IndexType end ) -{ - typedef typename Vector::IndexType Index; - typedef typename Vector::RealType Real; - - // TODO: parallelize with OpenMP - Real aux( v[ begin ] ); - v[ begin ] = 0.0; - for( Index i = begin + 1; i < end; i++ ) - { - Real x = v[ i ]; - v[ i ] = aux; - aux += x; - } -} } // namespace Algorithms } // namespace Containers -- GitLab From d804c3a26470e8ef5a7bf9226fe49abda682d45c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Thu, 16 May 2019 06:50:38 +0200 Subject: [PATCH 42/93] Refactoring segmented prefix-sum. --- .../Algorithms/CommonVectorOperations.h | 13 +++++++ .../Algorithms/CommonVectorOperations.hpp | 37 +++++++++++++++++++ src/TNL/Containers/Algorithms/PrefixSum.hpp | 16 ++++---- src/TNL/Containers/ArrayView.h | 2 +- src/TNL/Containers/ArrayView.hpp | 2 +- src/UnitTests/Containers/VectorTest-4.h | 12 +++++- 6 files changed, 70 insertions(+), 12 deletions(-) diff --git a/src/TNL/Containers/Algorithms/CommonVectorOperations.h b/src/TNL/Containers/Algorithms/CommonVectorOperations.h index ff6974654..42b67a981 100644 --- a/src/TNL/Containers/Algorithms/CommonVectorOperations.h +++ b/src/TNL/Containers/Algorithms/CommonVectorOperations.h @@ -79,6 +79,19 @@ struct CommonVectorOperations static void computeExclusivePrefixSum( Vector& v, const typename Vector::IndexType begin, const typename Vector::IndexType end ); + + template< typename Vector, typename Flags > + static void computeSegmentedPrefixSum( Vector& v, + Flags& f, + const typename Vector::IndexType begin, + const typename Vector::IndexType end ); + + template< typename Vector, typename Flags > + static void computeExclusiveSegmentedPrefixSum( Vector& v, + Flags& f, + const typename Vector::IndexType begin, + const typename Vector::IndexType end ); + }; } // namespace Algorithms diff --git a/src/TNL/Containers/Algorithms/CommonVectorOperations.hpp b/src/TNL/Containers/Algorithms/CommonVectorOperations.hpp index 18478da9f..cb5cc8b08 100644 --- a/src/TNL/Containers/Algorithms/CommonVectorOperations.hpp +++ b/src/TNL/Containers/Algorithms/CommonVectorOperations.hpp @@ -405,6 +405,43 @@ computeExclusivePrefixSum( Vector& v, PrefixSum< Device >::exclusive( v, begin, end, reduction, volatileReduction, ( RealType ) 0.0 ); } +template< typename Device > + template< typename Vector, typename Flags > +void +CommonVectorOperations< Device >:: +computeSegmentedPrefixSum( Vector& v, + Flags& f, + typename Vector::IndexType begin, + typename Vector::IndexType end ) +{ + using RealType = typename Vector::RealType; + using IndexType = typename Vector::IndexType; + + auto reduction = [=] __cuda_callable__ ( RealType& a, const RealType& b ) { a += b; }; + auto volatileReduction = [=] __cuda_callable__ ( volatile RealType& a, volatile RealType& b ) { a += b; }; + + PrefixSum< Device >::inclusive( v, begin, end, reduction, volatileReduction, ( RealType ) 0.0 ); +} + +template< typename Device > + template< typename Vector, typename Flags > +void +CommonVectorOperations< Device >:: +computeExclusiveSegmentedPrefixSum( Vector& v, + Flags& f, + typename Vector::IndexType begin, + typename Vector::IndexType end ) +{ + using RealType = typename Vector::RealType; + using IndexType = typename Vector::IndexType; + + auto reduction = [=] __cuda_callable__ ( RealType& a, const RealType& b ) { a += b; }; + auto volatileReduction = [=] __cuda_callable__ ( volatile RealType& a, volatile RealType& b ) { a += b; }; + + PrefixSum< Device >::exclusive( v, begin, end, reduction, volatileReduction, ( RealType ) 0.0 ); +} + + } // namespace Algorithms } // namespace Containers } // namespace TNL diff --git a/src/TNL/Containers/Algorithms/PrefixSum.hpp b/src/TNL/Containers/Algorithms/PrefixSum.hpp index 6e44fa537..354467be9 100644 --- a/src/TNL/Containers/Algorithms/PrefixSum.hpp +++ b/src/TNL/Containers/Algorithms/PrefixSum.hpp @@ -42,7 +42,7 @@ static constexpr int PrefixSum_minGpuDataSize = 256;//65536; //16384;//1024;//25 template< typename Vector, typename PrefixSumOperation, typename VolatilePrefixSumOperation > -static typename Vector::RealType +void PrefixSum< Devices::Host >:: inclusive( Vector& v, const typename Vector::IndexType begin, @@ -61,7 +61,7 @@ inclusive( Vector& v, template< typename Vector, typename PrefixSumOperation, typename VolatilePrefixSumOperation > -static typename Vector::RealType +void PrefixSum< Devices::Host >:: exclusive( Vector& v, const typename Vector::IndexType begin, @@ -88,7 +88,7 @@ template< typename Vector, typename FlagsArray, typename PrefixSumOperation, typename VolatilePrefixSumOperation > -static typename Vector::RealType +void PrefixSum< Devices::Host >:: inclusiveSegmented( Vector& v, FlagsArray& f, @@ -113,7 +113,7 @@ template< typename Vector, typename FlagsArray, typename PrefixSumOperation, typename VolatilePrefixSumOperation > -static typename Vector::RealType +void PrefixSum< Devices::Host >:: exclusiveSegmented( Vector& v, FlagsArray& f, @@ -144,7 +144,7 @@ exclusiveSegmented( Vector& v, template< typename Vector, typename PrefixSumOperation, typename VolatilePrefixSumOperation > -static typename Vector::RealType +void PrefixSum< Devices::Cuda >:: inclusive( Vector& v, const typename Vector::IndexType begin, @@ -160,7 +160,7 @@ inclusive( Vector& v, template< typename Vector, typename PrefixSumOperation, typename VolatilePrefixSumOperation > -static typename Vector::RealType +void PrefixSum< Devices::Cuda >:: exclusive( Vector& v, const typename Vector::IndexType begin, @@ -178,7 +178,7 @@ template< typename Vector, typename FlagsArray, typename PrefixSumOperation, typename VolatilePrefixSumOperation > -static typename Vector::RealType +void PrefixSum< Devices::Cuda >:: inclusiveSegmented( Vector& v, FlagsArray& f, @@ -196,7 +196,7 @@ template< typename Vector, typename FlagsArray, typename PrefixSumOperation, typename VolatilePrefixSumOperation > -static typename Vector::RealType +void PrefixSum< Devices::Cuda >:: exclusiveSegmented( Vector& v, FlagsArray& f, diff --git a/src/TNL/Containers/ArrayView.h b/src/TNL/Containers/ArrayView.h index 04084c316..2164ece7d 100644 --- a/src/TNL/Containers/ArrayView.h +++ b/src/TNL/Containers/ArrayView.h @@ -377,7 +377,7 @@ public: * \param v Reference to a value. */ template< typename Function > - void evaluate( Function& f, + void evaluate( const Function& f, const Index begin = 0, Index end = -1 ); diff --git a/src/TNL/Containers/ArrayView.hpp b/src/TNL/Containers/ArrayView.hpp index f3a0b4ecf..552efc04b 100644 --- a/src/TNL/Containers/ArrayView.hpp +++ b/src/TNL/Containers/ArrayView.hpp @@ -306,7 +306,7 @@ template< typename Value, typename Index > template< typename Function > void ArrayView< Value, Device, Index >:: -evaluate( Function& f, const Index begin, Index end ) +evaluate( const Function& f, const Index begin, Index end ) { TNL_ASSERT_TRUE( this->getData(), "Attempted to set a value of an empty array view." ); diff --git a/src/UnitTests/Containers/VectorTest-4.h b/src/UnitTests/Containers/VectorTest-4.h index b3fcd8d68..2bef96b7e 100644 --- a/src/UnitTests/Containers/VectorTest-4.h +++ b/src/UnitTests/Containers/VectorTest-4.h @@ -127,12 +127,20 @@ TYPED_TEST( VectorTest, prefixSum ) using VectorType = typename TestFixture::VectorType; using VectorOperations = typename TestFixture::VectorOperations; using ViewType = typename TestFixture::ViewType; + using RealType = typename VectorType::RealType; + using DeviceType = typename VectorType::DeviceType; + using IndexType = typename VectorType::IndexType; + using FlagsArrayType = Array< bool, DeviceType, IndexType >; + using FlagsViewType = ArrayView< bool, DeviceType, IndexType >; const int size = VECTOR_TEST_SIZE; - VectorType v; - v.setSize( size ); + VectorType v( size ); ViewType v_view( v ); + FlagsArrayType flags( size ); + FlagsViewType flags_view( flags ); + flags_view.evaluate( [] __cuda_callable__ ( IndexType i ) { return ( i % 5 ) == 0; } ); + setConstantSequence( v, 1 ); v.computePrefixSum(); for( int i = 0; i < size; i++ ) -- GitLab From 8246797aa622d3debc154099772f2e5a8c3d121b Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Fri, 17 May 2019 15:51:42 +0200 Subject: [PATCH 43/93] [WIP] Adding segmented prefix sum test. --- src/UnitTests/Containers/VectorTest-4.h | 70 ++++++++++++++++++++----- 1 file changed, 58 insertions(+), 12 deletions(-) diff --git a/src/UnitTests/Containers/VectorTest-4.h b/src/UnitTests/Containers/VectorTest-4.h index 2bef96b7e..0bab92f3a 100644 --- a/src/UnitTests/Containers/VectorTest-4.h +++ b/src/UnitTests/Containers/VectorTest-4.h @@ -130,23 +130,16 @@ TYPED_TEST( VectorTest, prefixSum ) using RealType = typename VectorType::RealType; using DeviceType = typename VectorType::DeviceType; using IndexType = typename VectorType::IndexType; - using FlagsArrayType = Array< bool, DeviceType, IndexType >; - using FlagsViewType = ArrayView< bool, DeviceType, IndexType >; - const int size = VECTOR_TEST_SIZE; VectorType v( size ); ViewType v_view( v ); - FlagsArrayType flags( size ); - FlagsViewType flags_view( flags ); - flags_view.evaluate( [] __cuda_callable__ ( IndexType i ) { return ( i % 5 ) == 0; } ); - - setConstantSequence( v, 1 ); - v.computePrefixSum(); + v = 1; + v.computeSegmentedPrefixSum(); for( int i = 0; i < size; i++ ) - EXPECT_EQ( v.getElement( i ), i + 1 ); + EXPECT_EQ( v.getElement( i ), 0 ); - v.setValue( 0 ); + v = 0; v.computePrefixSum(); for( int i = 0; i < size; i++ ) EXPECT_EQ( v.getElement( i ), 0 ); @@ -161,7 +154,7 @@ TYPED_TEST( VectorTest, prefixSum ) for( int i = 0; i < size; i++ ) EXPECT_EQ( v.getElement( i ), i + 1 ); - v.setValue( 0 ); + v = 0; v_view.computePrefixSum(); for( int i = 0; i < size; i++ ) EXPECT_EQ( v.getElement( i ), 0 ); @@ -214,6 +207,59 @@ TYPED_TEST( VectorTest, exclusivePrefixSum ) EXPECT_EQ( v.getElement( i ) - v.getElement( i - 1 ), i - 1 ); } +TYPED_TEST( VectorTest, segmentedPrefixSum ) +{ + using VectorType = typename TestFixture::VectorType; + using VectorOperations = typename TestFixture::VectorOperations; + using ViewType = typename TestFixture::ViewType; + using RealType = typename VectorType::RealType; + using DeviceType = typename VectorType::DeviceType; + using IndexType = typename VectorType::IndexType; + using FlagsArrayType = Array< bool, DeviceType, IndexType >; + using FlagsViewType = ArrayView< bool, DeviceType, IndexType >; + const int size = VECTOR_TEST_SIZE; + + VectorType v( size ); + ViewType v_view( v ); + + FlagsArrayType flags( size ), flags_copy( size ); + FlagsViewType flags_view( flags ); + flags_view.evaluate( [] __cuda_callable__ ( IndexType i ) { return ( i % 5 ) == 0; } ); + flags_copy = flags_view; + + v = 1; + v.computeSegmentedPrefixSum(); + for( int i = 0; i < size; i++ ) + EXPECT_EQ( v.getElement( i ), 0 ); + + + v.setValue( 0 ); + v.computePrefixSum(); + for( int i = 0; i < size; i++ ) + EXPECT_EQ( v.getElement( i ), 0 ); + + setLinearSequence( v ); + v.computePrefixSum(); + for( int i = 1; i < size; i++ ) + EXPECT_EQ( v.getElement( i ) - v.getElement( i - 1 ), i ); + + setConstantSequence( v, 1 ); + v_view.computePrefixSum(); + for( int i = 0; i < size; i++ ) + EXPECT_EQ( v.getElement( i ), i + 1 ); + + v.setValue( 0 ); + v_view.computePrefixSum(); + for( int i = 0; i < size; i++ ) + EXPECT_EQ( v.getElement( i ), 0 ); + + setLinearSequence( v ); + v_view.computePrefixSum(); + for( int i = 1; i < size; i++ ) + EXPECT_EQ( v.getElement( i ) - v.getElement( i - 1 ), i ); +} + + TYPED_TEST( VectorTest, abs ) { using VectorType = typename TestFixture::VectorType; -- GitLab From b243d46a4ecdcbd7794d9979f4b56ad226c10f87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Sun, 19 May 2019 11:38:05 +0200 Subject: [PATCH 44/93] Adding prefix sum test. --- src/UnitTests/Containers/VectorTest-4.h | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/UnitTests/Containers/VectorTest-4.h b/src/UnitTests/Containers/VectorTest-4.h index 0bab92f3a..1918eb0f3 100644 --- a/src/UnitTests/Containers/VectorTest-4.h +++ b/src/UnitTests/Containers/VectorTest-4.h @@ -130,15 +130,11 @@ TYPED_TEST( VectorTest, prefixSum ) using RealType = typename VectorType::RealType; using DeviceType = typename VectorType::DeviceType; using IndexType = typename VectorType::IndexType; + const int size = VECTOR_TEST_SIZE; VectorType v( size ); ViewType v_view( v ); - v = 1; - v.computeSegmentedPrefixSum(); - for( int i = 0; i < size; i++ ) - EXPECT_EQ( v.getElement( i ), 0 ); - v = 0; v.computePrefixSum(); for( int i = 0; i < size; i++ ) @@ -222,7 +218,7 @@ TYPED_TEST( VectorTest, segmentedPrefixSum ) VectorType v( size ); ViewType v_view( v ); - FlagsArrayType flags( size ), flags_copy( size ); + /*FlagsArrayType flags( size ), flags_copy( size ); FlagsViewType flags_view( flags ); flags_view.evaluate( [] __cuda_callable__ ( IndexType i ) { return ( i % 5 ) == 0; } ); flags_copy = flags_view; @@ -231,7 +227,7 @@ TYPED_TEST( VectorTest, segmentedPrefixSum ) v.computeSegmentedPrefixSum(); for( int i = 0; i < size; i++ ) EXPECT_EQ( v.getElement( i ), 0 ); - +*/ v.setValue( 0 ); v.computePrefixSum(); -- GitLab From bd3059409cab8a160cd4b9bb4b680067b30cfb76 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Mon, 20 May 2019 11:42:23 +0200 Subject: [PATCH 45/93] Adding segmented prefix sum. i --- src/TNL/Containers/Vector.h | 14 +++++++- src/TNL/Containers/Vector.hpp | 48 ++++++++++++++++++++++++++++ src/TNL/Containers/VectorView.h | 12 +++++++ src/TNL/Containers/VectorView_impl.h | 48 ++++++++++++++++++++++++++++ 4 files changed, 121 insertions(+), 1 deletion(-) diff --git a/src/TNL/Containers/Vector.h b/src/TNL/Containers/Vector.h index 084055607..336b4121e 100644 --- a/src/TNL/Containers/Vector.h +++ b/src/TNL/Containers/Vector.h @@ -404,7 +404,19 @@ public: * \param begin Index of the element in this vector which to begin with. * \param end Index of the element in this vector which to end with. */ - void computeExclusivePrefixSum( const IndexType begin, const IndexType end ); + void computeSegmentedExclusivePrefixSum( const IndexType begin, const IndexType end ); + + template< typename FlagsArray > + void computeSegmentedPrefixSum( FlagsArray& flags ); + + template< typename FlagsArray > + void computeSegmentedPrefixSum( FlagsArray& flags, const IndexType begin, const IndexType end ); + + template< typename FlagsArray > + void computeSegmentedExclusivePrefixSum( FlagsArray& flags ); + + template< typename FlagsArray > + void computeSegmentedExclusivePrefixSum( FlagsArray& flags, const IndexType begin, const IndexType end ); }; } // namespace Containers diff --git a/src/TNL/Containers/Vector.hpp b/src/TNL/Containers/Vector.hpp index 26b34d803..79ae312e5 100644 --- a/src/TNL/Containers/Vector.hpp +++ b/src/TNL/Containers/Vector.hpp @@ -516,5 +516,53 @@ computeExclusivePrefixSum( const IndexType begin, Algorithms::VectorOperations< Device >::computeExclusivePrefixSum( *this, begin, end ); } +template< typename Real, + typename Device, + typename Index > + template< typename FlagsArray > +void +Vector< Real, Device, Index >:: +computeSegmentedPrefixSum( FlagsArray& flags ) +{ + Algorithms::VectorOperations< Device >::computePrefixSum( *this, flags, 0, this->getSize() ); +} + +template< typename Real, + typename Device, + typename Index > + template< typename FlagsArray > +void +Vector< Real, Device, Index >:: +computeSegmentedPrefixSum( FlagsArray& flags, + const IndexType begin, + const IndexType end ) +{ + Algorithms::VectorOperations< Device >::computePrefixSum( *this, flags, begin, end ); +} + +template< typename Real, + typename Device, + typename Index > + template< typename FlagsArray > +void +Vector< Real, Device, Index >:: +computeSegmentedExclusivePrefixSum( FlagsArray& flags ) +{ + Algorithms::VectorOperations< Device >::computeExclusivePrefixSum( *this, flags, 0, this->getSize() ); +} + +template< typename Real, + typename Device, + typename Index > + template< typename FlagsArray > +void +Vector< Real, Device, Index >:: +computeSegmentedExclusivePrefixSum( FlagsArray& flags, + const IndexType begin, + const IndexType end ) +{ + Algorithms::VectorOperations< Device >::computeExclusivePrefixSum( *this, flags, begin, end ); +} + } // namespace Containers } // namespace TNL diff --git a/src/TNL/Containers/VectorView.h b/src/TNL/Containers/VectorView.h index 59dc29f83..5ed0b6899 100644 --- a/src/TNL/Containers/VectorView.h +++ b/src/TNL/Containers/VectorView.h @@ -195,6 +195,18 @@ public: void computeExclusivePrefixSum(); void computeExclusivePrefixSum( IndexType begin, IndexType end ); + + template< typename FlagsArray > + void computeSegmentedPrefixSum( FlagsArray& flags ); + + template< typename FlagsArray > + void computeSegmentedPrefixSum( FlagsArray& flags, const IndexType begin, const IndexType end ); + + template< typename FlagsArray > + void computeSegmentedExclusivePrefixSum( FlagsArray& flags ); + + template< typename FlagsArray > + void computeSegmentedExclusivePrefixSum( FlagsArray& flags, const IndexType begin, const IndexType end ); }; } // namespace Containers diff --git a/src/TNL/Containers/VectorView_impl.h b/src/TNL/Containers/VectorView_impl.h index 2433721b7..f3dd2992d 100644 --- a/src/TNL/Containers/VectorView_impl.h +++ b/src/TNL/Containers/VectorView_impl.h @@ -429,5 +429,53 @@ computeExclusivePrefixSum( IndexType begin, IndexType end ) Algorithms::VectorOperations< Device >::computeExclusivePrefixSum( *this, begin, end ); } +template< typename Real, + typename Device, + typename Index > + template< typename FlagsArray > +void +VectorView< Real, Device, Index >:: +computeSegmentedPrefixSum( FlagsArray& flags ) +{ + Algorithms::VectorOperations< Device >::computePrefixSum( *this, flags, 0, this->getSize() ); +} + +template< typename Real, + typename Device, + typename Index > + template< typename FlagsArray > +void +VectorView< Real, Device, Index >:: +computeSegmentedPrefixSum( FlagsArray& flags, + const IndexType begin, + const IndexType end ) +{ + Algorithms::VectorOperations< Device >::computePrefixSum( *this, flags, begin, end ); +} + +template< typename Real, + typename Device, + typename Index > + template< typename FlagsArray > +void +VectorView< Real, Device, Index >:: +computeSegmentedExclusivePrefixSum( FlagsArray& flags ) +{ + Algorithms::VectorOperations< Device >::computeExclusivePrefixSum( *this, flags, 0, this->getSize() ); +} + +template< typename Real, + typename Device, + typename Index > + template< typename FlagsArray > +void +VectorView< Real, Device, Index >:: +computeSegmentedExclusivePrefixSum( FlagsArray& flags, + const IndexType begin, + const IndexType end ) +{ + Algorithms::VectorOperations< Device >::computeExclusivePrefixSum( *this, flags, begin, end ); +} + } // namespace Containers } // namespace TNL -- GitLab From 8b1b6cba5c9bff441a3efa5312426303f8e35793 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Mon, 20 May 2019 14:00:30 +0200 Subject: [PATCH 46/93] Small --- src/TNL/Containers/Vector.h | 2 +- src/TNL/Matrices/Sparse_impl.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/TNL/Containers/Vector.h b/src/TNL/Containers/Vector.h index 336b4121e..2aa5e60ff 100644 --- a/src/TNL/Containers/Vector.h +++ b/src/TNL/Containers/Vector.h @@ -404,7 +404,7 @@ public: * \param begin Index of the element in this vector which to begin with. * \param end Index of the element in this vector which to end with. */ - void computeSegmentedExclusivePrefixSum( const IndexType begin, const IndexType end ); + void computeExclusivePrefixSum( const IndexType begin, const IndexType end ); template< typename FlagsArray > void computeSegmentedPrefixSum( FlagsArray& flags ); diff --git a/src/TNL/Matrices/Sparse_impl.h b/src/TNL/Matrices/Sparse_impl.h index 18bc4ea71..588668175 100644 --- a/src/TNL/Matrices/Sparse_impl.h +++ b/src/TNL/Matrices/Sparse_impl.h @@ -52,7 +52,7 @@ Index Sparse< Real, Device, Index >::getNumberOfNonzeroMatrixElements() const IndexType nonzeroElements( 0 ); for( IndexType i = 0; i < this->values.getSize(); i++ ) if( this->columnIndexes.getElement( i ) != this-> columns && - this->values.getElement( i ) != 0.0 ) + this->values.getElement( i ) != RealType( 0 ) ) nonzeroElements++; return nonzeroElements; } -- GitLab From 341117ffd26af7b7a918f97c2f4b373ebd2b1964 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Tue, 21 May 2019 11:42:07 +0200 Subject: [PATCH 47/93] Fixed segmented prefix sum on CPU. --- .../Algorithms/CommonVectorOperations.hpp | 4 +- src/TNL/Containers/Algorithms/PrefixSum.hpp | 7 +-- src/TNL/Containers/Vector.hpp | 8 +-- src/TNL/Containers/VectorView.h | 4 +- src/TNL/Containers/VectorView_impl.h | 8 +-- src/UnitTests/Containers/VectorTest-4.h | 57 ++++++++++++------- 6 files changed, 49 insertions(+), 39 deletions(-) diff --git a/src/TNL/Containers/Algorithms/CommonVectorOperations.hpp b/src/TNL/Containers/Algorithms/CommonVectorOperations.hpp index cb5cc8b08..62630863b 100644 --- a/src/TNL/Containers/Algorithms/CommonVectorOperations.hpp +++ b/src/TNL/Containers/Algorithms/CommonVectorOperations.hpp @@ -420,7 +420,7 @@ computeSegmentedPrefixSum( Vector& v, auto reduction = [=] __cuda_callable__ ( RealType& a, const RealType& b ) { a += b; }; auto volatileReduction = [=] __cuda_callable__ ( volatile RealType& a, volatile RealType& b ) { a += b; }; - PrefixSum< Device >::inclusive( v, begin, end, reduction, volatileReduction, ( RealType ) 0.0 ); + PrefixSum< Device >::inclusiveSegmented( v, f, begin, end, reduction, volatileReduction, ( RealType ) 0.0 ); } template< typename Device > @@ -438,7 +438,7 @@ computeExclusiveSegmentedPrefixSum( Vector& v, auto reduction = [=] __cuda_callable__ ( RealType& a, const RealType& b ) { a += b; }; auto volatileReduction = [=] __cuda_callable__ ( volatile RealType& a, volatile RealType& b ) { a += b; }; - PrefixSum< Device >::exclusive( v, begin, end, reduction, volatileReduction, ( RealType ) 0.0 ); + PrefixSum< Device >::exclusiveSegmented( v, f, begin, end, reduction, volatileReduction, ( RealType ) 0.0 ); } diff --git a/src/TNL/Containers/Algorithms/PrefixSum.hpp b/src/TNL/Containers/Algorithms/PrefixSum.hpp index 354467be9..65731ca49 100644 --- a/src/TNL/Containers/Algorithms/PrefixSum.hpp +++ b/src/TNL/Containers/Algorithms/PrefixSum.hpp @@ -99,14 +99,11 @@ inclusiveSegmented( Vector& v, const typename Vector::RealType& zero ) { using IndexType = typename Vector::IndexType; - + // TODO: parallelize with OpenMP for( IndexType i = begin + 1; i < end; i++ ) - if( f[ i ] ) - v[ i ] = zero; - else + if( ! f[ i ] ) reduction( v[ i ], v[ i - 1 ] ); - } template< typename Vector, diff --git a/src/TNL/Containers/Vector.hpp b/src/TNL/Containers/Vector.hpp index 79ae312e5..a8a6edc1e 100644 --- a/src/TNL/Containers/Vector.hpp +++ b/src/TNL/Containers/Vector.hpp @@ -524,7 +524,7 @@ void Vector< Real, Device, Index >:: computeSegmentedPrefixSum( FlagsArray& flags ) { - Algorithms::VectorOperations< Device >::computePrefixSum( *this, flags, 0, this->getSize() ); + Algorithms::VectorOperations< Device >::computeSegmentedPrefixSum( *this, flags, 0, this->getSize() ); } template< typename Real, @@ -537,7 +537,7 @@ computeSegmentedPrefixSum( FlagsArray& flags, const IndexType begin, const IndexType end ) { - Algorithms::VectorOperations< Device >::computePrefixSum( *this, flags, begin, end ); + Algorithms::VectorOperations< Device >::computeSegmentedPrefixSum( *this, flags, begin, end ); } template< typename Real, @@ -548,7 +548,7 @@ void Vector< Real, Device, Index >:: computeSegmentedExclusivePrefixSum( FlagsArray& flags ) { - Algorithms::VectorOperations< Device >::computeExclusivePrefixSum( *this, flags, 0, this->getSize() ); + Algorithms::VectorOperations< Device >::computeSegmentedExclusivePrefixSum( *this, flags, 0, this->getSize() ); } template< typename Real, @@ -561,7 +561,7 @@ computeSegmentedExclusivePrefixSum( FlagsArray& flags, const IndexType begin, const IndexType end ) { - Algorithms::VectorOperations< Device >::computeExclusivePrefixSum( *this, flags, begin, end ); + Algorithms::VectorOperations< Device >::computeSegmentedExclusivePrefixSum( *this, flags, begin, end ); } } // namespace Containers diff --git a/src/TNL/Containers/VectorView.h b/src/TNL/Containers/VectorView.h index 5ed0b6899..bc468943f 100644 --- a/src/TNL/Containers/VectorView.h +++ b/src/TNL/Containers/VectorView.h @@ -86,8 +86,8 @@ public: static String getType(); - template< typename VectorOperationType > - void evaluate( const VectorOperationType& vo ); + //template< typename VectorOperationType > + //void evaluate( const VectorOperationType& vo ); template< typename VectorOperationType > void evaluateFor( const VectorOperationType& vo ); diff --git a/src/TNL/Containers/VectorView_impl.h b/src/TNL/Containers/VectorView_impl.h index f3dd2992d..23e79c125 100644 --- a/src/TNL/Containers/VectorView_impl.h +++ b/src/TNL/Containers/VectorView_impl.h @@ -437,7 +437,7 @@ void VectorView< Real, Device, Index >:: computeSegmentedPrefixSum( FlagsArray& flags ) { - Algorithms::VectorOperations< Device >::computePrefixSum( *this, flags, 0, this->getSize() ); + Algorithms::VectorOperations< Device >::computeSegmentedPrefixSum( *this, flags, 0, this->getSize() ); } template< typename Real, @@ -450,7 +450,7 @@ computeSegmentedPrefixSum( FlagsArray& flags, const IndexType begin, const IndexType end ) { - Algorithms::VectorOperations< Device >::computePrefixSum( *this, flags, begin, end ); + Algorithms::VectorOperations< Device >::computeSegmentedPrefixSum( *this, flags, begin, end ); } template< typename Real, @@ -461,7 +461,7 @@ void VectorView< Real, Device, Index >:: computeSegmentedExclusivePrefixSum( FlagsArray& flags ) { - Algorithms::VectorOperations< Device >::computeExclusivePrefixSum( *this, flags, 0, this->getSize() ); + Algorithms::VectorOperations< Device >::computeSegmentedExclusivePrefixSum( *this, flags, 0, this->getSize() ); } template< typename Real, @@ -474,7 +474,7 @@ computeSegmentedExclusivePrefixSum( FlagsArray& flags, const IndexType begin, const IndexType end ) { - Algorithms::VectorOperations< Device >::computeExclusivePrefixSum( *this, flags, begin, end ); + Algorithms::VectorOperations< Device >::computeSegmentedExclusivePrefixSum( *this, flags, begin, end ); } } // namespace Containers diff --git a/src/UnitTests/Containers/VectorTest-4.h b/src/UnitTests/Containers/VectorTest-4.h index 1918eb0f3..62095e054 100644 --- a/src/UnitTests/Containers/VectorTest-4.h +++ b/src/UnitTests/Containers/VectorTest-4.h @@ -206,7 +206,6 @@ TYPED_TEST( VectorTest, exclusivePrefixSum ) TYPED_TEST( VectorTest, segmentedPrefixSum ) { using VectorType = typename TestFixture::VectorType; - using VectorOperations = typename TestFixture::VectorOperations; using ViewType = typename TestFixture::ViewType; using RealType = typename VectorType::RealType; using DeviceType = typename VectorType::DeviceType; @@ -218,41 +217,55 @@ TYPED_TEST( VectorTest, segmentedPrefixSum ) VectorType v( size ); ViewType v_view( v ); - /*FlagsArrayType flags( size ), flags_copy( size ); + FlagsArrayType flags( size ), flags_copy( size ); FlagsViewType flags_view( flags ); flags_view.evaluate( [] __cuda_callable__ ( IndexType i ) { return ( i % 5 ) == 0; } ); flags_copy = flags_view; - v = 1; - v.computeSegmentedPrefixSum(); + v = 0; + v.computeSegmentedPrefixSum( flags_view ); for( int i = 0; i < size; i++ ) EXPECT_EQ( v.getElement( i ), 0 ); -*/ - - v.setValue( 0 ); - v.computePrefixSum(); + flags_view = flags_copy; + + v = 1; + v.computeSegmentedPrefixSum( flags_view ); for( int i = 0; i < size; i++ ) - EXPECT_EQ( v.getElement( i ), 0 ); + EXPECT_EQ( v.getElement( i ), ( i % 5 ) + 1 ); + flags_view = flags_copy; setLinearSequence( v ); - v.computePrefixSum(); + v.computeSegmentedPrefixSum( flags_view ); for( int i = 1; i < size; i++ ) - EXPECT_EQ( v.getElement( i ) - v.getElement( i - 1 ), i ); - - setConstantSequence( v, 1 ); - v_view.computePrefixSum(); + { + if( flags.getElement( i ) ) + EXPECT_EQ( v.getElement( i ), i ); + else + EXPECT_EQ( v.getElement( i ) - v.getElement( i - 1 ), i ); + } + flags_view = flags_copy; + + v_view = 0; + v_view.computeSegmentedPrefixSum( flags_view ); for( int i = 0; i < size; i++ ) - EXPECT_EQ( v.getElement( i ), i + 1 ); - - v.setValue( 0 ); - v_view.computePrefixSum(); + EXPECT_EQ( v_view.getElement( i ), 0 ); + flags_view = flags_copy; + + v_view = 1; + v_view.computeSegmentedPrefixSum( flags_view ); for( int i = 0; i < size; i++ ) - EXPECT_EQ( v.getElement( i ), 0 ); + EXPECT_EQ( v_view.getElement( i ), ( i % 5 ) + 1 ); + flags_view = flags_copy; - setLinearSequence( v ); - v_view.computePrefixSum(); + v_view.evaluate( [] __cuda_callable__ ( IndexType i ) { return i; } ); + v_view.computeSegmentedPrefixSum( flags_view ); for( int i = 1; i < size; i++ ) - EXPECT_EQ( v.getElement( i ) - v.getElement( i - 1 ), i ); + { + if( flags.getElement( i ) ) + EXPECT_EQ( v_view.getElement( i ), i ); + else + EXPECT_EQ( v_view.getElement( i ) - v_view.getElement( i - 1 ), i ); + } } -- GitLab From 5a33f3e28524b6e887c73c2cfab9ad85316610d9 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Fri, 24 May 2019 06:57:32 +0200 Subject: [PATCH 48/93] Fixing prefix sum for CUDA. --- CMakeLists.txt | 2 +- src/UnitTests/Containers/VectorTest-4.h | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f179cc30a..9621c394b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -82,7 +82,7 @@ set( CMAKE_CXX_STANDARD_REQUIRED ON ) set( CMAKE_CXX_EXTENSIONS OFF ) # set Debug/Release options -set( CMAKE_CXX_FLAGS "-pthread -Wall -Wno-unused-local-typedefs -Wno-unused-variable" ) +set( CMAKE_CXX_FLAGS "-std=c++14 -pthread -Wall -Wno-unused-local-typedefs -Wno-unused-variable" ) set( CMAKE_CXX_FLAGS_DEBUG "-g" ) set( CMAKE_CXX_FLAGS_RELEASE "-O3 -march=native -mtune=native -DNDEBUG" ) #set( CMAKE_CXX_FLAGS_RELEASE "-O3 -march=native -mtune=native -DNDEBUG -ftree-vectorizer-verbose=1 -ftree-vectorize -fopt-info-vec-missed -funroll-loops" ) diff --git a/src/UnitTests/Containers/VectorTest-4.h b/src/UnitTests/Containers/VectorTest-4.h index 62095e054..48a2c9796 100644 --- a/src/UnitTests/Containers/VectorTest-4.h +++ b/src/UnitTests/Containers/VectorTest-4.h @@ -203,6 +203,9 @@ TYPED_TEST( VectorTest, exclusivePrefixSum ) EXPECT_EQ( v.getElement( i ) - v.getElement( i - 1 ), i - 1 ); } +template< typename IndexType > +auto f1 = [=] __cuda_callable__ ( IndexType i ) { return ( i % 5 ) == 0; }; + TYPED_TEST( VectorTest, segmentedPrefixSum ) { using VectorType = typename TestFixture::VectorType; @@ -219,7 +222,7 @@ TYPED_TEST( VectorTest, segmentedPrefixSum ) FlagsArrayType flags( size ), flags_copy( size ); FlagsViewType flags_view( flags ); - flags_view.evaluate( [] __cuda_callable__ ( IndexType i ) { return ( i % 5 ) == 0; } ); + flags_view.evaluate( f1 ); flags_copy = flags_view; v = 0; -- GitLab From 6040b3dada3362c5784c99ed95de8d1ef063024f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Sun, 26 May 2019 21:27:42 +0200 Subject: [PATCH 49/93] Fixind vector test. --- src/UnitTests/Containers/VectorTest-4.h | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/UnitTests/Containers/VectorTest-4.h b/src/UnitTests/Containers/VectorTest-4.h index 48a2c9796..0505a7399 100644 --- a/src/UnitTests/Containers/VectorTest-4.h +++ b/src/UnitTests/Containers/VectorTest-4.h @@ -203,8 +203,13 @@ TYPED_TEST( VectorTest, exclusivePrefixSum ) EXPECT_EQ( v.getElement( i ) - v.getElement( i - 1 ), i - 1 ); } -template< typename IndexType > -auto f1 = [=] __cuda_callable__ ( IndexType i ) { return ( i % 5 ) == 0; }; + +template< typename FlagsView > +void setupFlags( FlagsView& f ) +{ + auto f1 = [] __cuda_callable__ ( typename FlagsView::IndexType i ) { return ( i % 5 ) == 0; }; + f.evaluate( f1 ); +} TYPED_TEST( VectorTest, segmentedPrefixSum ) { @@ -222,7 +227,9 @@ TYPED_TEST( VectorTest, segmentedPrefixSum ) FlagsArrayType flags( size ), flags_copy( size ); FlagsViewType flags_view( flags ); - flags_view.evaluate( f1 ); + //auto f1 = [] __cuda_callable__ ( IndexType i ) { return ( i % 5 ) == 0; }; + //flags_view.evaluate( f1 ); + setupFlags( flags_view ); flags_copy = flags_view; v = 0; @@ -260,7 +267,8 @@ TYPED_TEST( VectorTest, segmentedPrefixSum ) EXPECT_EQ( v_view.getElement( i ), ( i % 5 ) + 1 ); flags_view = flags_copy; - v_view.evaluate( [] __cuda_callable__ ( IndexType i ) { return i; } ); + //v_view.evaluate( [] __cuda_callable__ ( IndexType i ) { return i; } ); + setLinearSequence( v ); v_view.computeSegmentedPrefixSum( flags_view ); for( int i = 1; i < size; i++ ) { -- GitLab From c1c6fc8ee5b32971f6ccebddf1d0543afe45b2b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Tue, 4 Jun 2019 10:52:17 +0200 Subject: [PATCH 50/93] [WIP] Refactoring prefix sum in CUDA. --- .../Containers/Algorithms/VectorOperations.h | 4 +- .../Algorithms/VectorOperationsCuda_impl.h | 294 ------------------ .../Algorithms/cuda-prefix-sum_impl.h | 14 + 3 files changed, 16 insertions(+), 296 deletions(-) diff --git a/src/TNL/Containers/Algorithms/VectorOperations.h b/src/TNL/Containers/Algorithms/VectorOperations.h index e7e46a9f9..4c455f000 100644 --- a/src/TNL/Containers/Algorithms/VectorOperations.h +++ b/src/TNL/Containers/Algorithms/VectorOperations.h @@ -186,7 +186,7 @@ public: const Scalar2 multiplicator2, const Scalar3 thisMultiplicator = 1.0 ); - template< typename Vector > + /*template< typename Vector > static void computePrefixSum( Vector& v, const typename Vector::IndexType begin, const typename Vector::IndexType end ); @@ -194,7 +194,7 @@ public: template< typename Vector > static void computeExclusivePrefixSum( Vector& v, const typename Vector::IndexType begin, - const typename Vector::IndexType end ); + const typename Vector::IndexType end );*/ }; #endif diff --git a/src/TNL/Containers/Algorithms/VectorOperationsCuda_impl.h b/src/TNL/Containers/Algorithms/VectorOperationsCuda_impl.h index e8e6555cf..f62cfda07 100644 --- a/src/TNL/Containers/Algorithms/VectorOperationsCuda_impl.h +++ b/src/TNL/Containers/Algorithms/VectorOperationsCuda_impl.h @@ -39,283 +39,6 @@ addElement( Vector& v, v[ i ] = thisElementMultiplicator * v[ i ] + value; } -/*template< typename Vector, typename ResultType > -ResultType -VectorOperations< Devices::Cuda >:: -getVectorMax( const Vector& v ) -{ - typedef typename Vector::RealType RealType; - - TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." ); - - Algorithms::ParallelReductionMax< RealType, ResultType > operation; - return Reduction< Devices::Cuda >::reduce( operation, - v.getSize(), - v.getData(), - ( RealType* ) 0 ); -} - -template< typename Vector, typename ResultType > -ResultType -VectorOperations< Devices::Cuda >:: -getVectorMin( const Vector& v ) -{ - typedef typename Vector::RealType RealType; - - TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." ); - - Algorithms::ParallelReductionMin< RealType, ResultType > operation; - return Reduction< Devices::Cuda >::reduce( operation, - v.getSize(), - v.getData(), - ( RealType* ) 0 ); -} - -template< typename Vector, typename ResultType > -ResultType -VectorOperations< Devices::Cuda >:: -getVectorAbsMax( const Vector& v ) -{ - typedef typename Vector::RealType RealType; - - TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." ); - - Algorithms::ParallelReductionAbsMax< RealType, ResultType > operation; - return Reduction< Devices::Cuda >::reduce( operation, - v.getSize(), - v.getData(), - ( RealType* ) 0 ); -} - -template< typename Vector, typename ResultType > -ResultType -VectorOperations< Devices::Cuda >:: -getVectorAbsMin( const Vector& v ) -{ - typedef typename Vector::RealType RealType; - - TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." ); - - Algorithms::ParallelReductionAbsMin< RealType, ResultType > operation; - return Reduction< Devices::Cuda >::reduce( operation, - v.getSize(), - v.getData(), - ( RealType* ) 0 ); -} - -template< typename Vector, typename ResultType > -ResultType -VectorOperations< Devices::Cuda >:: -getVectorL1Norm( const Vector& v ) -{ - typedef typename Vector::RealType RealType; - - TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." ); - - Algorithms::ParallelReductionAbsSum< RealType, ResultType > operation; - return Reduction< Devices::Cuda >::reduce( operation, - v.getSize(), - v.getData(), - ( RealType* ) 0 ); -} - -template< typename Vector, typename ResultType > -ResultType -VectorOperations< Devices::Cuda >:: -getVectorL2Norm( const Vector& v ) -{ - typedef typename Vector::RealType Real; - - TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." ); - - Algorithms::ParallelReductionL2Norm< Real, ResultType > operation; - const ResultType result = Reduction< Devices::Cuda >::reduce( operation, - v.getSize(), - v.getData(), - ( Real* ) 0 ); - return std::sqrt( result ); -} - -template< typename Vector, typename ResultType, typename Scalar > -ResultType -VectorOperations< Devices::Cuda >:: -getVectorLpNorm( const Vector& v, - const Scalar p ) -{ - typedef typename Vector::RealType Real; - - TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." ); - TNL_ASSERT_GE( p, 1.0, "Parameter of the L^p norm must be at least 1.0." ); - - if( p == 1 ) - return getVectorL1Norm< Vector, ResultType >( v ); - if( p == 2 ) - return getVectorL2Norm< Vector, ResultType >( v ); - - Algorithms::ParallelReductionLpNorm< Real, ResultType, Scalar > operation; - operation.setPower( p ); - const ResultType result = Reduction< Devices::Cuda >::reduce( operation, - v.getSize(), - v.getData(), - ( Real* ) 0 ); - return std::pow( result, 1.0 / p ); -} - -template< typename Vector, typename ResultType > -ResultType -VectorOperations< Devices::Cuda >:: -getVectorSum( const Vector& v ) -{ - typedef typename Vector::RealType Real; - - TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." ); - - Algorithms::ParallelReductionSum< Real, ResultType > operation; - return Reduction< Devices::Cuda >::reduce( operation, - v.getSize(), - v.getData(), - ( Real* ) 0 ); -} - -template< typename Vector1, typename Vector2, typename ResultType > -ResultType -VectorOperations< Devices::Cuda >:: -getVectorDifferenceMax( const Vector1& v1, - const Vector2& v2 ) -{ - TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." ); - TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." ); - - Algorithms::ParallelReductionDiffMax< typename Vector1::RealType, typename Vector2::RealType, ResultType > operation; - return Reduction< Devices::Cuda >::reduce( operation, - v1.getSize(), - v1.getData(), - v2.getData() ); -} - -template< typename Vector1, typename Vector2, typename ResultType > -ResultType -VectorOperations< Devices::Cuda >:: -getVectorDifferenceMin( const Vector1& v1, - const Vector2& v2 ) -{ - TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." ); - TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." ); - - Algorithms::ParallelReductionDiffMin< typename Vector1::RealType, typename Vector2::RealType, ResultType > operation; - return Reduction< Devices::Cuda >::reduce( operation, - v1.getSize(), - v1.getData(), - v2.getData() ); -} - - -template< typename Vector1, typename Vector2, typename ResultType > -ResultType -VectorOperations< Devices::Cuda >:: -getVectorDifferenceAbsMax( const Vector1& v1, - const Vector2& v2 ) -{ - TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." ); - TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." ); - - Algorithms::ParallelReductionDiffAbsMax< typename Vector1::RealType, typename Vector2::RealType, ResultType > operation; - return Reduction< Devices::Cuda >::reduce( operation, - v1.getSize(), - v1.getData(), - v2.getData() ); -} - -template< typename Vector1, typename Vector2, typename ResultType > -ResultType -VectorOperations< Devices::Cuda >:: -getVectorDifferenceAbsMin( const Vector1& v1, - const Vector2& v2 ) -{ - TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." ); - TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." ); - - Algorithms::ParallelReductionDiffAbsMin< typename Vector1::RealType, typename Vector2::RealType, ResultType > operation; - return Reduction< Devices::Cuda >::reduce( operation, - v1.getSize(), - v1.getData(), - v2.getData() ); -} - -template< typename Vector1, typename Vector2, typename ResultType > -ResultType -VectorOperations< Devices::Cuda >:: -getVectorDifferenceL1Norm( const Vector1& v1, - const Vector2& v2 ) -{ - TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." ); - TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." ); - - Algorithms::ParallelReductionDiffAbsSum< typename Vector1::RealType, typename Vector2::RealType, ResultType > operation; - return Reduction< Devices::Cuda >::reduce( operation, - v1.getSize(), - v1.getData(), - v2.getData() ); -} - -template< typename Vector1, typename Vector2, typename ResultType > -ResultType -VectorOperations< Devices::Cuda >:: -getVectorDifferenceL2Norm( const Vector1& v1, - const Vector2& v2 ) -{ - TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." ); - TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." ); - - Algorithms::ParallelReductionDiffL2Norm< typename Vector1::RealType, typename Vector2::RealType, ResultType > operation; - const ResultType result = Reduction< Devices::Cuda >::reduce( operation, - v1.getSize(), - v1.getData(), - v2.getData() ); - return std::sqrt( result ); -} - -template< typename Vector1, typename Vector2, typename ResultType, typename Scalar > -ResultType -VectorOperations< Devices::Cuda >:: -getVectorDifferenceLpNorm( const Vector1& v1, - const Vector2& v2, - const Scalar p ) -{ - TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." ); - TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." ); - TNL_ASSERT_GE( p, 1.0, "Parameter of the L^p norm must be at least 1.0." ); - - if( p == 1.0 ) - return getVectorDifferenceL1Norm< Vector1, Vector2, ResultType >( v1, v2 ); - if( p == 2.0 ) - return getVectorDifferenceL2Norm< Vector1, Vector2, ResultType >( v1, v2 ); - - Algorithms::ParallelReductionDiffLpNorm< typename Vector1::RealType, typename Vector2::RealType, ResultType, Scalar > operation; - operation.setPower( p ); - const ResultType result = Reduction< Devices::Cuda >::reduce( operation, - v1.getSize(), - v1.getData(), - v2.getData() ); - return std::pow( result, 1.0 / p ); -} - -template< typename Vector1, typename Vector2, typename ResultType > -ResultType -VectorOperations< Devices::Cuda >:: -getVectorDifferenceSum( const Vector1& v1, - const Vector2& v2 ) -{ - TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." ); - TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." ); - - Algorithms::ParallelReductionDiffSum< typename Vector1::RealType, typename Vector2::RealType, ResultType > operation; - return Reduction< Devices::Cuda >::reduce( operation, - v1.getSize(), - v1.getData(), - v2.getData() ); -}*/ - #ifdef HAVE_CUDA template< typename Real, typename Index, typename Scalar > __global__ void @@ -357,23 +80,6 @@ vectorScalarMultiplication( Vector& v, #endif } - -/*template< typename Vector1, typename Vector2, typename ResultType > -ResultType -VectorOperations< Devices::Cuda >:: -getScalarProduct( const Vector1& v1, - const Vector2& v2 ) -{ - TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." ); - TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." ); - - Algorithms::ParallelReductionScalarProduct< typename Vector1::RealType, typename Vector2::RealType, ResultType > operation; - return Reduction< Devices::Cuda >::reduce( operation, - v1.getSize(), - v1.getData(), - v2.getData() ); -}*/ - #ifdef HAVE_CUDA template< typename Real1, typename Real2, typename Index, typename Scalar1, typename Scalar2 > __global__ void diff --git a/src/TNL/Containers/Algorithms/cuda-prefix-sum_impl.h b/src/TNL/Containers/Algorithms/cuda-prefix-sum_impl.h index 9687a7e2a..614d33dc0 100644 --- a/src/TNL/Containers/Algorithms/cuda-prefix-sum_impl.h +++ b/src/TNL/Containers/Algorithms/cuda-prefix-sum_impl.h @@ -24,6 +24,20 @@ namespace TNL { namespace Containers { namespace Algorithms { +/* +template< typename Vector, + typename PrefixSumOperation, + typename VolatilePrefixSumOperation > +void +PrefixSum< Devices::Host >:: +inclusive( Vector& v, + const typename Vector::IndexType begin, + const typename Vector::IndexType end, + PrefixSumOperation& reduction, + VolatilePrefixSumOperation& volatilePrefixSum, + const typename Vector::RealType& zero ) +*/ + template< typename DataType, typename Operation, typename Index > -- GitLab From 12a7c39c8f77d46ecc7a6d058b9382cd1530bcc1 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Tue, 4 Jun 2019 15:52:45 +0200 Subject: [PATCH 51/93] Refactoring CUDA prefix sum. --- src/TNL/Containers/Algorithms/PrefixSum.hpp | 29 ++++++-- .../Containers/Algorithms/VectorOperations.h | 4 +- .../Algorithms/VectorOperationsCuda_impl.h | 6 +- .../Containers/Algorithms/cuda-prefix-sum.h | 9 ++- .../Algorithms/cuda-prefix-sum_impl.h | 69 ++++++++++--------- 5 files changed, 75 insertions(+), 42 deletions(-) diff --git a/src/TNL/Containers/Algorithms/PrefixSum.hpp b/src/TNL/Containers/Algorithms/PrefixSum.hpp index 65731ca49..29ec072c0 100644 --- a/src/TNL/Containers/Algorithms/PrefixSum.hpp +++ b/src/TNL/Containers/Algorithms/PrefixSum.hpp @@ -20,6 +20,7 @@ #include #include #include +#include #ifdef CUDA_REDUCTION_PROFILING #include @@ -99,7 +100,7 @@ inclusiveSegmented( Vector& v, const typename Vector::RealType& zero ) { using IndexType = typename Vector::IndexType; - + // TODO: parallelize with OpenMP for( IndexType i = begin + 1; i < end; i++ ) if( ! f[ i ] ) @@ -147,11 +148,20 @@ inclusive( Vector& v, const typename Vector::IndexType begin, const typename Vector::IndexType end, PrefixSumOperation& reduction, - VolatilePrefixSumOperation& volatilePrefixSum, + VolatilePrefixSumOperation& volatileReduction, const typename Vector::RealType& zero ) { + using RealType = typename Vector::RealType; using IndexType = typename Vector::IndexType; - + using IndexType = typename Vector::IndexType; + cudaPrefixSum( ( IndexType ) ( end - begin ), + ( IndexType ) 256, + &v[ begin ], + &v[ begin ], + reduction, + volatileReduction, + zero, + Algorithms::PrefixSumType::inclusive ); } template< typename Vector, @@ -163,11 +173,22 @@ exclusive( Vector& v, const typename Vector::IndexType begin, const typename Vector::IndexType end, PrefixSumOperation& reduction, - VolatilePrefixSumOperation& volatilePrefixSum, + VolatilePrefixSumOperation& volatileReduction, const typename Vector::RealType& zero ) { using IndexType = typename Vector::IndexType; using RealType = typename Vector::RealType; + using RealType = typename Vector::RealType; + using IndexType = typename Vector::IndexType; + using IndexType = typename Vector::IndexType; + cudaPrefixSum( ( IndexType ) ( end - begin ), + ( IndexType ) 256, + &v[ begin ], + &v[ begin ], + reduction, + volatileReduction, + zero, + Algorithms::PrefixSumType::exclusive ); } diff --git a/src/TNL/Containers/Algorithms/VectorOperations.h b/src/TNL/Containers/Algorithms/VectorOperations.h index 4c455f000..f940b3ac1 100644 --- a/src/TNL/Containers/Algorithms/VectorOperations.h +++ b/src/TNL/Containers/Algorithms/VectorOperations.h @@ -91,7 +91,7 @@ public: const Scalar2 multiplicator2, const Scalar3 thisMultiplicator = 1.0 ); - template< typename Vector > + /*template< typename Vector > static void computePrefixSum( Vector& v, const typename Vector::IndexType begin, const typename Vector::IndexType end ); @@ -99,7 +99,7 @@ public: template< typename Vector > static void computeExclusivePrefixSum( Vector& v, const typename Vector::IndexType begin, - const typename Vector::IndexType end ); + const typename Vector::IndexType end );*/ }; #ifdef HAVE_MIC diff --git a/src/TNL/Containers/Algorithms/VectorOperationsCuda_impl.h b/src/TNL/Containers/Algorithms/VectorOperationsCuda_impl.h index f62cfda07..e428148bc 100644 --- a/src/TNL/Containers/Algorithms/VectorOperationsCuda_impl.h +++ b/src/TNL/Containers/Algorithms/VectorOperationsCuda_impl.h @@ -213,7 +213,7 @@ addVectors( Vector1& v, #endif } -template< typename Vector > +/*template< typename Vector > void VectorOperations< Devices::Cuda >:: computePrefixSum( Vector& v, @@ -232,6 +232,7 @@ computePrefixSum( Vector& v, &v.getData()[ begin ], &v.getData()[ begin ], operation, + operation.initialValue(), Algorithms::PrefixSumType::inclusive ); #else throw Exceptions::CudaSupportMissing(); @@ -257,9 +258,10 @@ computeExclusivePrefixSum( Vector& v, &v.getData()[ begin ], &v.getData()[ begin ], operation, + operation.initialValue(), Algorithms::PrefixSumType::exclusive ); #endif -} +}*/ } // namespace Algorithms } // namespace Containers diff --git a/src/TNL/Containers/Algorithms/cuda-prefix-sum.h b/src/TNL/Containers/Algorithms/cuda-prefix-sum.h index 37215a995..d24de8cb5 100644 --- a/src/TNL/Containers/Algorithms/cuda-prefix-sum.h +++ b/src/TNL/Containers/Algorithms/cuda-prefix-sum.h @@ -13,7 +13,7 @@ namespace TNL { namespace Containers { namespace Algorithms { - + enum class PrefixSumType { exclusive, @@ -22,12 +22,15 @@ enum class PrefixSumType template< typename DataType, typename Operation, + typename VolatileOperation, typename Index > -bool cudaPrefixSum( const Index size, +void cudaPrefixSum( const Index size, const Index blockSize, const DataType *deviceInput, DataType* deviceOutput, - const Operation& operation, + Operation& operation, + VolatileOperation& volatileOperation, + const DataType& zero, const PrefixSumType prefixSumType = PrefixSumType::inclusive ); } // namespace Algorithms diff --git a/src/TNL/Containers/Algorithms/cuda-prefix-sum_impl.h b/src/TNL/Containers/Algorithms/cuda-prefix-sum_impl.h index 614d33dc0..d8a2a0a45 100644 --- a/src/TNL/Containers/Algorithms/cuda-prefix-sum_impl.h +++ b/src/TNL/Containers/Algorithms/cuda-prefix-sum_impl.h @@ -22,28 +22,17 @@ namespace TNL { namespace Containers { -namespace Algorithms { - -/* -template< typename Vector, - typename PrefixSumOperation, - typename VolatilePrefixSumOperation > -void -PrefixSum< Devices::Host >:: -inclusive( Vector& v, - const typename Vector::IndexType begin, - const typename Vector::IndexType end, - PrefixSumOperation& reduction, - VolatilePrefixSumOperation& volatilePrefixSum, - const typename Vector::RealType& zero ) -*/ +namespace Algorithms { template< typename DataType, typename Operation, + typename VolatileOperation, typename Index > __global__ void cudaFirstPhaseBlockPrefixSum( const PrefixSumType prefixSumType, Operation operation, + VolatileOperation volatileOperation, + const DataType zero, const Index size, const Index elementsInBlock, const DataType* input, @@ -65,7 +54,7 @@ cudaFirstPhaseBlockPrefixSum( const PrefixSumType prefixSumType, if( prefixSumType == PrefixSumType::exclusive ) { if( idx == 0 ) - sharedData[ 0 ] = operation.initialValue(); + sharedData[ 0 ] = zero; //operation.initialValue(); while( idx < elementsInBlock && blockOffset + idx < size ) { sharedData[ Devices::Cuda::getInterleaving( idx + 1 ) ] = input[ blockOffset + idx ]; @@ -97,8 +86,8 @@ cudaFirstPhaseBlockPrefixSum( const PrefixSumType prefixSumType, while( chunkPointer < chunkSize && chunkOffset + chunkPointer < lastElementInBlock ) { - operation.commonReduction( sharedData[ Devices::Cuda::getInterleaving( chunkOffset + chunkPointer ) ], - sharedData[ Devices::Cuda::getInterleaving( chunkOffset + chunkPointer - 1 ) ] ); + operation( sharedData[ Devices::Cuda::getInterleaving( chunkOffset + chunkPointer ) ], + sharedData[ Devices::Cuda::getInterleaving( chunkOffset + chunkPointer - 1 ) ] ); auxData[ threadIdx.x ] = sharedData[ Devices::Cuda::getInterleaving( chunkOffset + chunkPointer ) ]; chunkPointer++; @@ -111,7 +100,7 @@ cudaFirstPhaseBlockPrefixSum( const PrefixSumType prefixSumType, const int warpIdx = threadIdx.x / Devices::Cuda::getWarpSize(); for( int stride = 1; stride < Devices::Cuda::getWarpSize(); stride *= 2 ) if( threadInWarpIdx >= stride && threadIdx.x < numberOfChunks ) - operation.commonReduction( auxData[ threadIdx.x ], auxData[ threadIdx.x - stride ] ); + volatileOperation( auxData[ threadIdx.x ], auxData[ threadIdx.x - stride ] ); if( threadInWarpIdx == Devices::Cuda::getWarpSize() - 1 ) warpSums[ warpIdx ] = auxData[ threadIdx.x ]; @@ -123,14 +112,14 @@ cudaFirstPhaseBlockPrefixSum( const PrefixSumType prefixSumType, if( warpIdx == 0 ) for( int stride = 1; stride < Devices::Cuda::getWarpSize(); stride *= 2 ) if( threadInWarpIdx >= stride ) - operation.commonReduction( warpSums[ threadIdx.x ], warpSums[ threadIdx.x - stride ] ); + volatileOperation( warpSums[ threadIdx.x ], warpSums[ threadIdx.x - stride ] ); __syncthreads(); /**** * Shift the warp prefix-sums. */ if( warpIdx > 0 ) - operation.commonReduction( auxData[ threadIdx.x ], warpSums[ warpIdx - 1 ] ); + volatileOperation( auxData[ threadIdx.x ], warpSums[ warpIdx - 1 ] ); /*** * Store the result back in global memory. @@ -140,10 +129,10 @@ cudaFirstPhaseBlockPrefixSum( const PrefixSumType prefixSumType, while( idx < elementsInBlock && blockOffset + idx < size ) { const Index chunkIdx = idx / chunkSize; - DataType chunkShift( operation.initialValue() ); + DataType chunkShift( zero ); //operation.initialValue() ); if( chunkIdx > 0 ) chunkShift = auxData[ chunkIdx - 1 ]; - operation.commonReduction( sharedData[ Devices::Cuda::getInterleaving( idx ) ], chunkShift ); + operation( sharedData[ Devices::Cuda::getInterleaving( idx ) ], chunkShift ); output[ blockOffset + idx ] = sharedData[ Devices::Cuda::getInterleaving( idx ) ]; idx += blockDim.x; } @@ -156,9 +145,9 @@ cudaFirstPhaseBlockPrefixSum( const PrefixSumType prefixSumType, /*auxArray[ blockIdx.x ] = operation.commonReduction( Devices::Cuda::getInterleaving( lastElementInBlock - 1 ), Devices::Cuda::getInterleaving( lastElementInBlock ), sharedData );*/ - DataType aux = operation.initialValue(); - operation.commonReduction( aux, sharedData[ Devices::Cuda::getInterleaving( lastElementInBlock - 1 ) ] ); - operation.commonReduction( aux, sharedData[ Devices::Cuda::getInterleaving( lastElementInBlock ) ] ); + DataType aux = zero; //operation.initialValue(); + operation( aux, sharedData[ Devices::Cuda::getInterleaving( lastElementInBlock - 1 ) ] ); + operation( aux, sharedData[ Devices::Cuda::getInterleaving( lastElementInBlock ) ] ); auxArray[ blockIdx.x ] = aux; } else @@ -179,13 +168,13 @@ cudaSecondPhaseBlockPrefixSum( Operation operation, { if( blockIdx.x > 0 ) { - operation.commonReduction( gridShift, auxArray[ blockIdx.x - 1 ] ); + operation( gridShift, auxArray[ blockIdx.x - 1 ] ); const Index readOffset = blockIdx.x * elementsInBlock; Index readIdx = threadIdx.x; while( readIdx < elementsInBlock && readOffset + readIdx < size ) { - operation.commonReduction( data[ readIdx + readOffset ], gridShift ); + operation( data[ readIdx + readOffset ], gridShift ); readIdx += blockDim.x; } } @@ -194,10 +183,13 @@ cudaSecondPhaseBlockPrefixSum( Operation operation, template< typename DataType, typename Operation, + typename VolatileOperation, typename Index > void cudaRecursivePrefixSum( const PrefixSumType prefixSumType, Operation& operation, + VolatileOperation& volatileOperation, + const DataType& zero, const Index size, const Index blockSize, const Index elementsInBlock, @@ -228,6 +220,8 @@ cudaRecursivePrefixSum( const PrefixSumType prefixSumType, cudaFirstPhaseBlockPrefixSum<<< cudaGridSize, cudaBlockSize, sharedMemory >>> ( prefixSumType, operation, + volatileOperation, + zero, size, elementsInBlock, input, @@ -243,10 +237,12 @@ cudaRecursivePrefixSum( const PrefixSumType prefixSumType, if( numberOfBlocks > 1 ) cudaRecursivePrefixSum( PrefixSumType::inclusive, operation, + volatileOperation, + zero, numberOfBlocks, blockSize, elementsInBlock, - operation.initialValue(), + gridShift, auxArray1.getData(), auxArray2.getData() ); @@ -261,13 +257,15 @@ cudaRecursivePrefixSum( const PrefixSumType prefixSumType, } - template< typename DataType, typename Operation, + typename VolatileOperation, typename Index > void cudaGridPrefixSum( PrefixSumType prefixSumType, Operation& operation, + VolatileOperation& volatileOperation, + const DataType& zero, const Index size, const Index blockSize, const Index elementsInBlock, @@ -277,6 +275,8 @@ cudaGridPrefixSum( PrefixSumType prefixSumType, { cudaRecursivePrefixSum( prefixSumType, operation, + volatileOperation, + zero, size, blockSize, elementsInBlock, @@ -291,8 +291,11 @@ cudaGridPrefixSum( PrefixSumType prefixSumType, TNL_CHECK_CUDA_DEVICE; } +///// +// deviceInput and deviceOutput can be the same template< typename DataType, typename Operation, + typename VolatileOperation, typename Index > void cudaPrefixSum( const Index size, @@ -300,6 +303,8 @@ cudaPrefixSum( const Index size, const DataType *deviceInput, DataType* deviceOutput, Operation& operation, + VolatileOperation& volatileOperation, + const DataType& zero, const PrefixSumType prefixSumType ) { /**** @@ -313,7 +318,7 @@ cudaPrefixSum( const Index size, /**** * Loop over all grids. */ - DataType gridShift = operation.initialValue(); + DataType gridShift = zero; //operation.initialValue(); for( Index gridIdx = 0; gridIdx < numberOfGrids; gridIdx++ ) { /**** @@ -326,6 +331,8 @@ cudaPrefixSum( const Index size, cudaGridPrefixSum( prefixSumType, operation, + volatileOperation, + zero, currentSize, blockSize, elementsInBlock, -- GitLab From 2cee804b3d6d22a25b2d10908a00a7fbcfdf83ff Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Wed, 5 Jun 2019 17:52:59 +0200 Subject: [PATCH 52/93] Fixed CUDA prefix sum. --- src/TNL/Containers/Algorithms/PrefixSum.hpp | 5 +- .../Containers/Algorithms/VectorAssignment.h | 297 ++++++++++++++- .../Containers/Algorithms/cuda-prefix-sum.h | 353 +++++++++++++++++- .../Algorithms/cuda-prefix-sum_impl.h | 349 ----------------- src/TNL/Containers/VectorView.h | 16 +- src/TNL/Containers/VectorView_impl.h | 28 +- src/UnitTests/Containers/VectorTest-4.h | 65 +++- src/UnitTests/Containers/VectorTestSetup.h | 21 ++ 8 files changed, 748 insertions(+), 386 deletions(-) delete mode 100644 src/TNL/Containers/Algorithms/cuda-prefix-sum_impl.h diff --git a/src/TNL/Containers/Algorithms/PrefixSum.hpp b/src/TNL/Containers/Algorithms/PrefixSum.hpp index 29ec072c0..30a39852b 100644 --- a/src/TNL/Containers/Algorithms/PrefixSum.hpp +++ b/src/TNL/Containers/Algorithms/PrefixSum.hpp @@ -154,6 +154,7 @@ inclusive( Vector& v, using RealType = typename Vector::RealType; using IndexType = typename Vector::IndexType; using IndexType = typename Vector::IndexType; +#ifdef HAVE_CUDA cudaPrefixSum( ( IndexType ) ( end - begin ), ( IndexType ) 256, &v[ begin ], @@ -162,6 +163,7 @@ inclusive( Vector& v, volatileReduction, zero, Algorithms::PrefixSumType::inclusive ); +#endif } template< typename Vector, @@ -181,6 +183,7 @@ exclusive( Vector& v, using RealType = typename Vector::RealType; using IndexType = typename Vector::IndexType; using IndexType = typename Vector::IndexType; +#ifdef HAVE_CUDA cudaPrefixSum( ( IndexType ) ( end - begin ), ( IndexType ) 256, &v[ begin ], @@ -189,7 +192,7 @@ exclusive( Vector& v, volatileReduction, zero, Algorithms::PrefixSumType::exclusive ); - +#endif } template< typename Vector, diff --git a/src/TNL/Containers/Algorithms/VectorAssignment.h b/src/TNL/Containers/Algorithms/VectorAssignment.h index 9a01fa1a8..222aa64ae 100644 --- a/src/TNL/Containers/Algorithms/VectorAssignment.h +++ b/src/TNL/Containers/Algorithms/VectorAssignment.h @@ -13,6 +13,7 @@ #include #include #include +#include namespace TNL { namespace Containers { @@ -38,13 +39,48 @@ public: }; } // namespace Details +/** + * \brief Vector assignment + */ template< typename Vector, typename T, bool hasSubscriptOperator = Details::HasSubscriptOperator< T >::value > struct VectorAssignment{}; /** - * \brief Specialization for assignment with subscript operator + * \brief Vector addition + */ +template< typename Vector, + typename T, + bool hasSubscriptOperator = Details::HasSubscriptOperator< T >::value > +struct VectorAddition{}; + +/** + * \brief Vector subtraction + */ +template< typename Vector, + typename T, + bool hasSubscriptOperator = Details::HasSubscriptOperator< T >::value > +struct VectorSubtraction{}; + +/** + * \brief Vector multiplication + */ +template< typename Vector, + typename T, + bool hasSubscriptOperator = Details::HasSubscriptOperator< T >::value > +struct VectorMultiplication{}; + +/** + * \brief Vector division + */ +template< typename Vector, + typename T, + bool hasSubscriptOperator = Details::HasSubscriptOperator< T >::value > +struct VectorDivision{}; + +/** + * \brief Specialization of ASSIGNEMENT with subscript operator */ template< typename Vector, typename T > @@ -78,11 +114,10 @@ struct VectorAssignment< Vector, T, true > ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), ass ); TNL_CHECK_CUDA_DEVICE; }; - }; /** - * \brief Specialization for array-value assignment for other types. We assume + * \brief Specialization of ASSIGNEMENT for array-value assignment for other types. We assume * that T is convertible to Vector::ValueType. */ template< typename Vector, @@ -115,7 +150,263 @@ struct VectorAssignment< Vector, T, false > ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), ass ); TNL_CHECK_CUDA_DEVICE; } +}; + +/** + * \brief Specialization of ADDITION with subscript operator + */ +template< typename Vector, + typename T > +struct VectorAddition< Vector, T, true > +{ + __cuda_callable__ + static void additionStatic( Vector& v, const T& t ) + { + TNL_ASSERT_EQ( v.getSize(), t.getSize(), "The sizes of the vectors must be equal." ); + for( decltype( v.getSize() ) i = 0; i < v.getSize(); i ++ ) + v[ i ] += t[ i ]; + }; + + static void addition( Vector& v, const T& t ) + { + TNL_ASSERT_EQ( v.getSize(), t.getSize(), "The sizes of the vectors must be equal." ); + using RealType = typename Vector::RealType; + using DeviceType = typename Vector::DeviceType; + using IndexType = typename Vector::IndexType; + + RealType* data = v.getData(); + auto add = [=] __cuda_callable__ ( IndexType i ) + { + data[ i ] += t[ i ]; + }; + ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), add ); + TNL_CHECK_CUDA_DEVICE; + }; +}; + +/** + * \brief Specialization of ADDITION for array-value assignment for other types. We assume + * that T is convertible to Vector::ValueType. + */ +template< typename Vector, + typename T > +struct VectorAddition< Vector, T, false > +{ + __cuda_callable__ + static void additionStatic( Vector& v, const T& t ) + { + TNL_ASSERT_GT( v.getSize(), 0, "Cannot assign value to empty vector." ); + for( decltype( v.getSize() ) i = 0; i < v.getSize(); i ++ ) + v[ i ] += t; + }; + + static void addition( Vector& v, const T& t ) + { + using RealType = typename Vector::RealType; + using DeviceType = typename Vector::DeviceType; + using IndexType = typename Vector::IndexType; + + RealType* data = v.getData(); + auto add = [=] __cuda_callable__ ( IndexType i ) + { + data[ i ] += t; + }; + ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), add ); + TNL_CHECK_CUDA_DEVICE; + } +}; +/** + * \brief Specialization of SUBTRACTION with subscript operator + */ +template< typename Vector, + typename T > +struct VectorSubtraction< Vector, T, true > +{ + __cuda_callable__ + static void subtractionStatic( Vector& v, const T& t ) + { + TNL_ASSERT_EQ( v.getSize(), t.getSize(), "The sizes of the vectors must be equal." ); + for( decltype( v.getSize() ) i = 0; i < v.getSize(); i ++ ) + v[ i ] -= t[ i ]; + }; + + static void subtraction( Vector& v, const T& t ) + { + TNL_ASSERT_EQ( v.getSize(), t.getSize(), "The sizes of the vectors must be equal." ); + using RealType = typename Vector::RealType; + using DeviceType = typename Vector::DeviceType; + using IndexType = typename Vector::IndexType; + + RealType* data = v.getData(); + auto subtract = [=] __cuda_callable__ ( IndexType i ) + { + data[ i ] -= t[ i ]; + }; + ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), subtract ); + TNL_CHECK_CUDA_DEVICE; + }; +}; + +/** + * \brief Specialization of SUBTRACTION for array-value assignment for other types. We assume + * that T is convertible to Vector::ValueType. + */ +template< typename Vector, + typename T > +struct VectorSubtraction< Vector, T, false > +{ + __cuda_callable__ + static void subtractionStatic( Vector& v, const T& t ) + { + TNL_ASSERT_GT( v.getSize(), 0, "Cannot assign value to empty vector." ); + for( decltype( v.getSize() ) i = 0; i < v.getSize(); i ++ ) + v[ i ] -= t; + }; + + static void subtraction( Vector& v, const T& t ) + { + using RealType = typename Vector::RealType; + using DeviceType = typename Vector::DeviceType; + using IndexType = typename Vector::IndexType; + + RealType* data = v.getData(); + auto subtract = [=] __cuda_callable__ ( IndexType i ) + { + data[ i ] -= t; + }; + ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), subtract ); + TNL_CHECK_CUDA_DEVICE; + } +}; + +/** + * \brief Specialization of MULTIPLICATION with subscript operator + */ +template< typename Vector, + typename T > +struct VectorMultiplication< Vector, T, true > +{ + __cuda_callable__ + static void multiplicationStatic( Vector& v, const T& t ) + { + TNL_ASSERT_EQ( v.getSize(), t.getSize(), "The sizes of the vectors must be equal." ); + for( decltype( v.getSize() ) i = 0; i < v.getSize(); i ++ ) + v[ i ] *= t[ i ]; + }; + + static void multiplication( Vector& v, const T& t ) + { + TNL_ASSERT_EQ( v.getSize(), t.getSize(), "The sizes of the vectors must be equal." ); + using RealType = typename Vector::RealType; + using DeviceType = typename Vector::DeviceType; + using IndexType = typename Vector::IndexType; + + RealType* data = v.getData(); + auto multiply = [=] __cuda_callable__ ( IndexType i ) + { + data[ i ] *= t[ i ]; + }; + ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), multiply ); + TNL_CHECK_CUDA_DEVICE; + }; +}; + +/** + * \brief Specialization of MULTIPLICATION for array-value assignment for other types. We assume + * that T is convertible to Vector::ValueType. + */ +template< typename Vector, + typename T > +struct VectorMultiplication< Vector, T, false > +{ + __cuda_callable__ + static void multiplicationStatic( Vector& v, const T& t ) + { + TNL_ASSERT_GT( v.getSize(), 0, "Cannot assign value to empty vector." ); + for( decltype( v.getSize() ) i = 0; i < v.getSize(); i ++ ) + v[ i ] *= t; + }; + + static void multiplication( Vector& v, const T& t ) + { + using RealType = typename Vector::RealType; + using DeviceType = typename Vector::DeviceType; + using IndexType = typename Vector::IndexType; + + RealType* data = v.getData(); + auto multiply = [=] __cuda_callable__ ( IndexType i ) + { + data[ i ] *= t; + }; + ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), multiply ); + TNL_CHECK_CUDA_DEVICE; + } +}; + + +/** + * \brief Specialization of DIVISION with subscript operator + */ +template< typename Vector, + typename T > +struct VectorDivision< Vector, T, true > +{ + __cuda_callable__ + static void divisionStatic( Vector& v, const T& t ) + { + TNL_ASSERT_EQ( v.getSize(), t.getSize(), "The sizes of the vectors must be equal." ); + for( decltype( v.getSize() ) i = 0; i < v.getSize(); i ++ ) + v[ i ] /= t[ i ]; + }; + + static void division( Vector& v, const T& t ) + { + TNL_ASSERT_EQ( v.getSize(), t.getSize(), "The sizes of the vectors must be equal." ); + using RealType = typename Vector::RealType; + using DeviceType = typename Vector::DeviceType; + using IndexType = typename Vector::IndexType; + + RealType* data = v.getData(); + auto divide = [=] __cuda_callable__ ( IndexType i ) + { + data[ i ] /= t[ i ]; + }; + ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), divide ); + TNL_CHECK_CUDA_DEVICE; + }; +}; + +/** + * \brief Specialization of DIVISION for array-value assignment for other types. We assume + * that T is convertible to Vector::ValueType. + */ +template< typename Vector, + typename T > +struct VectorDivision< Vector, T, false > +{ + __cuda_callable__ + static void divisionStatic( Vector& v, const T& t ) + { + TNL_ASSERT_GT( v.getSize(), 0, "Cannot assign value to empty vector." ); + for( decltype( v.getSize() ) i = 0; i < v.getSize(); i ++ ) + v[ i ] /= t; + }; + + static void division( Vector& v, const T& t ) + { + using RealType = typename Vector::RealType; + using DeviceType = typename Vector::DeviceType; + using IndexType = typename Vector::IndexType; + + RealType* data = v.getData(); + auto divide = [=] __cuda_callable__ ( IndexType i ) + { + data[ i ] /= t; + }; + ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), divide ); + TNL_CHECK_CUDA_DEVICE; + } }; } // namespace Algorithms diff --git a/src/TNL/Containers/Algorithms/cuda-prefix-sum.h b/src/TNL/Containers/Algorithms/cuda-prefix-sum.h index d24de8cb5..4019423a9 100644 --- a/src/TNL/Containers/Algorithms/cuda-prefix-sum.h +++ b/src/TNL/Containers/Algorithms/cuda-prefix-sum.h @@ -10,6 +10,14 @@ #pragma once +#include + +#include +#include +#include +#include +#include + namespace TNL { namespace Containers { namespace Algorithms { @@ -20,21 +28,348 @@ enum class PrefixSumType inclusive }; +#ifdef HAVE_CUDA + template< typename DataType, typename Operation, typename VolatileOperation, typename Index > -void cudaPrefixSum( const Index size, - const Index blockSize, - const DataType *deviceInput, - DataType* deviceOutput, - Operation& operation, - VolatileOperation& volatileOperation, - const DataType& zero, - const PrefixSumType prefixSumType = PrefixSumType::inclusive ); +__global__ void +cudaFirstPhaseBlockPrefixSum( const PrefixSumType prefixSumType, + Operation operation, + VolatileOperation volatileOperation, + const DataType zero, + const Index size, + const Index elementsInBlock, + const DataType* input, + DataType* output, + DataType* auxArray ) +{ + DataType* sharedData = TNL::Devices::Cuda::getSharedMemory< DataType >(); + volatile DataType* auxData = &sharedData[ elementsInBlock + elementsInBlock / Devices::Cuda::getNumberOfSharedMemoryBanks() + 2 ]; + volatile DataType* warpSums = &auxData[ blockDim.x ]; + + const Index lastElementIdx = size - blockIdx.x * elementsInBlock; + const Index lastElementInBlock = TNL::min( lastElementIdx, elementsInBlock ); + + /*** + * Load data into the shared memory. + */ + const Index blockOffset = blockIdx.x * elementsInBlock; + Index idx = threadIdx.x; + if( prefixSumType == PrefixSumType::exclusive ) + { + if( idx == 0 ) + sharedData[ 0 ] = zero; + while( idx < elementsInBlock && blockOffset + idx < size ) + { + sharedData[ Devices::Cuda::getInterleaving( idx + 1 ) ] = input[ blockOffset + idx ]; + idx += blockDim.x; + } + } + else + while( idx < elementsInBlock && blockOffset + idx < size ) + { + sharedData[ Devices::Cuda::getInterleaving( idx ) ] = input[ blockOffset + idx ]; + idx += blockDim.x; + } + + /*** + * Perform the sequential prefix-sum. + */ + __syncthreads(); + const int chunkSize = elementsInBlock / blockDim.x; + const int chunkOffset = threadIdx.x * chunkSize; + const int numberOfChunks = roundUpDivision( lastElementInBlock, chunkSize ); + + if( chunkOffset < lastElementInBlock ) + { + auxData[ threadIdx.x ] = + sharedData[ Devices::Cuda::getInterleaving( chunkOffset ) ]; + } + + Index chunkPointer( 1 ); + while( chunkPointer < chunkSize && + chunkOffset + chunkPointer < lastElementInBlock ) + { + operation( sharedData[ Devices::Cuda::getInterleaving( chunkOffset + chunkPointer ) ], + sharedData[ Devices::Cuda::getInterleaving( chunkOffset + chunkPointer - 1 ) ] ); + auxData[ threadIdx.x ] = + sharedData[ Devices::Cuda::getInterleaving( chunkOffset + chunkPointer ) ]; + chunkPointer++; + } + + /*** + * Perform the parallel prefix-sum inside warps. + */ + const int threadInWarpIdx = threadIdx.x % Devices::Cuda::getWarpSize(); + const int warpIdx = threadIdx.x / Devices::Cuda::getWarpSize(); + for( int stride = 1; stride < Devices::Cuda::getWarpSize(); stride *= 2 ) + if( threadInWarpIdx >= stride && threadIdx.x < numberOfChunks ) + volatileOperation( auxData[ threadIdx.x ], auxData[ threadIdx.x - stride ] ); + + if( threadInWarpIdx == Devices::Cuda::getWarpSize() - 1 ) + warpSums[ warpIdx ] = auxData[ threadIdx.x ]; + __syncthreads(); + + /**** + * Compute prefix-sum of warp sums using one warp + */ + if( warpIdx == 0 ) + for( int stride = 1; stride < Devices::Cuda::getWarpSize(); stride *= 2 ) + if( threadInWarpIdx >= stride ) + volatileOperation( warpSums[ threadIdx.x ], warpSums[ threadIdx.x - stride ] ); + __syncthreads(); + + /**** + * Shift the warp prefix-sums. + */ + if( warpIdx > 0 ) + volatileOperation( auxData[ threadIdx.x ], warpSums[ warpIdx - 1 ] ); + + /*** + * Store the result back in global memory. + */ + __syncthreads(); + idx = threadIdx.x; + while( idx < elementsInBlock && blockOffset + idx < size ) + { + const Index chunkIdx = idx / chunkSize; + DataType chunkShift( zero ); + if( chunkIdx > 0 ) + chunkShift = auxData[ chunkIdx - 1 ]; + operation( sharedData[ Devices::Cuda::getInterleaving( idx ) ], chunkShift ); + output[ blockOffset + idx ] = sharedData[ Devices::Cuda::getInterleaving( idx ) ]; + idx += blockDim.x; + } + __syncthreads(); + + if( threadIdx.x == 0 ) + { + if( prefixSumType == PrefixSumType::exclusive ) + { + DataType aux = zero; + operation( aux, sharedData[ Devices::Cuda::getInterleaving( lastElementInBlock - 1 ) ] ); + operation( aux, sharedData[ Devices::Cuda::getInterleaving( lastElementInBlock ) ] ); + auxArray[ blockIdx.x ] = aux; + } + else + auxArray[ blockIdx.x ] = sharedData[ Devices::Cuda::getInterleaving( lastElementInBlock - 1 ) ]; + } +} + +template< typename DataType, + typename Operation, + typename Index > +__global__ void +cudaSecondPhaseBlockPrefixSum( Operation operation, + const Index size, + const Index elementsInBlock, + DataType gridShift, + const DataType* auxArray, + DataType* data ) +{ + if( blockIdx.x > 0 ) + { + operation( gridShift, auxArray[ blockIdx.x - 1 ] ); + + const Index readOffset = blockIdx.x * elementsInBlock; + Index readIdx = threadIdx.x; + while( readIdx < elementsInBlock && readOffset + readIdx < size ) + { + operation( data[ readIdx + readOffset ], gridShift ); + readIdx += blockDim.x; + } + } +} + +template< typename DataType, + typename Operation, + typename VolatileOperation, + typename Index > +void +cudaRecursivePrefixSum( const PrefixSumType prefixSumType, + Operation& operation, + VolatileOperation& volatileOperation, + const DataType& zero, + const Index size, + const Index blockSize, + const Index elementsInBlock, + const DataType gridShift, + const DataType* input, + DataType *output ) +{ + const Index numberOfBlocks = roundUpDivision( size, elementsInBlock ); + const Index auxArraySize = numberOfBlocks * sizeof( DataType ); + + Array< DataType, Devices::Cuda > auxArray1, auxArray2; + auxArray1.setSize( auxArraySize ); + auxArray2.setSize( auxArraySize ); + + /**** + * Setup block and grid size. + */ + dim3 cudaBlockSize( 0 ), cudaGridSize( 0 ); + cudaBlockSize.x = blockSize; + cudaGridSize.x = roundUpDivision( size, elementsInBlock ); + + /**** + * Run the kernel. + */ + const std::size_t sharedDataSize = elementsInBlock + + elementsInBlock / Devices::Cuda::getNumberOfSharedMemoryBanks() + 2; + const std::size_t sharedMemory = ( sharedDataSize + blockSize + Devices::Cuda::getWarpSize() ) * sizeof( DataType ); + cudaFirstPhaseBlockPrefixSum<<< cudaGridSize, cudaBlockSize, sharedMemory >>> + ( prefixSumType, + operation, + volatileOperation, + zero, + size, + elementsInBlock, + input, + output, + auxArray1.getData() ); + TNL_CHECK_CUDA_DEVICE; + + /*** + * In auxArray1 there is now a sum of numbers in each block. + * We must compute prefix-sum of auxArray1 and then shift + * each block. + */ + if( numberOfBlocks > 1 ) + cudaRecursivePrefixSum( PrefixSumType::inclusive, + operation, + volatileOperation, + zero, + numberOfBlocks, + blockSize, + elementsInBlock, + gridShift, + auxArray1.getData(), + auxArray2.getData() ); + + cudaSecondPhaseBlockPrefixSum<<< cudaGridSize, cudaBlockSize >>> + ( operation, + size, + elementsInBlock, + gridShift, + auxArray2.getData(), + output ); + TNL_CHECK_CUDA_DEVICE; +} + + +template< typename DataType, + typename Operation, + typename VolatileOperation, + typename Index > +void +cudaGridPrefixSum( PrefixSumType prefixSumType, + Operation& operation, + VolatileOperation& volatileOperation, + const DataType& zero, + const Index size, + const Index blockSize, + const Index elementsInBlock, + const DataType *deviceInput, + DataType *deviceOutput, + DataType& gridShift ) +{ + cudaRecursivePrefixSum( prefixSumType, + operation, + volatileOperation, + zero, + size, + blockSize, + elementsInBlock, + gridShift, + deviceInput, + deviceOutput ); + + cudaMemcpy( &gridShift, + &deviceOutput[ size - 1 ], + sizeof( DataType ), + cudaMemcpyDeviceToHost ); + TNL_CHECK_CUDA_DEVICE; +} + +///// +// deviceInput and deviceOutput can be the same +template< typename DataType, + typename Operation, + typename VolatileOperation, + typename Index > +void +cudaPrefixSum( const Index size, + const Index blockSize, + const DataType *deviceInput, + DataType* deviceOutput, + Operation& operation, + VolatileOperation& volatileOperation, + const DataType& zero, + const PrefixSumType prefixSumType ) +{ + /**** + * Compute the number of grids + */ + const Index elementsInBlock = 8 * blockSize; + const Index numberOfBlocks = roundUpDivision( size, elementsInBlock ); + const auto maxGridSize = Devices::Cuda::getMaxGridSize(); + const Index numberOfGrids = Devices::Cuda::getNumberOfGrids( numberOfBlocks, maxGridSize ); + Array< DataType, Devices::Host, Index > gridShifts( numberOfGrids ); + gridShifts = zero; + + /**** + * Loop over all grids. + */ + for( Index gridIdx = 0; gridIdx < numberOfGrids; gridIdx++ ) + { + /**** + * Compute current grid size and size of data to be scanned + */ + const Index gridOffset = gridIdx * maxGridSize * elementsInBlock; + Index currentSize = size - gridOffset; + if( currentSize / elementsInBlock > maxGridSize ) + currentSize = maxGridSize * elementsInBlock; + + cudaGridPrefixSum( gridIdx == 0 ? prefixSumType : PrefixSumType::inclusive, + operation, + volatileOperation, + zero, + currentSize, + blockSize, + elementsInBlock, + &deviceInput[ gridOffset ], + &deviceOutput[ gridOffset ], + gridShifts[ gridIdx ] ); + } + + //gridShifts.computeExclusivePrefixSum(); + DataType aux( gridShifts[ 0 ] ); + gridShifts[ 0 ] = zero; + for( Index i = 1; i < numberOfGrids; i++ ) + { + DataType x = gridShifts[ i ]; + gridShifts[ i ] = aux; + operation( aux, x ); + } + + for( Index gridIdx = 1; gridIdx < numberOfGrids; gridIdx ++ ) + { + const Index gridOffset = gridIdx * maxGridSize * elementsInBlock; + Index currentSize = size - gridOffset; + if( currentSize / elementsInBlock > maxGridSize ) + currentSize = maxGridSize * elementsInBlock; + //ArrayView< DataType, Devices::Cuda, Index > v( &deviceOutput[ gridOffset ], currentSize ); + const auto g = gridShifts[ gridIdx ]; + auto shift = [=] __cuda_callable__ ( Index i ) { deviceOutput[ gridOffset + i ] += g; }; + ParallelFor< Devices::Cuda >::exec( ( Index ) 0, currentSize, shift ); + } +} +#endif } // namespace Algorithms } // namespace Containers } // namespace TNL -#include + diff --git a/src/TNL/Containers/Algorithms/cuda-prefix-sum_impl.h b/src/TNL/Containers/Algorithms/cuda-prefix-sum_impl.h deleted file mode 100644 index d8a2a0a45..000000000 --- a/src/TNL/Containers/Algorithms/cuda-prefix-sum_impl.h +++ /dev/null @@ -1,349 +0,0 @@ -/*************************************************************************** - cuda-prefix-sum_impl.h - description - ------------------- - begin : Jan 18, 2014 - copyright : (C) 2014 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/* See Copyright Notice in tnl/Copyright */ - -#pragma once - -#include - -#include -#include -#include -#include -#include - -#ifdef HAVE_CUDA - -namespace TNL { -namespace Containers { -namespace Algorithms { - -template< typename DataType, - typename Operation, - typename VolatileOperation, - typename Index > -__global__ void -cudaFirstPhaseBlockPrefixSum( const PrefixSumType prefixSumType, - Operation operation, - VolatileOperation volatileOperation, - const DataType zero, - const Index size, - const Index elementsInBlock, - const DataType* input, - DataType* output, - DataType* auxArray ) -{ - DataType* sharedData = TNL::Devices::Cuda::getSharedMemory< DataType >(); - volatile DataType* auxData = &sharedData[ elementsInBlock + elementsInBlock / Devices::Cuda::getNumberOfSharedMemoryBanks() + 2 ]; - volatile DataType* warpSums = &auxData[ blockDim.x ]; - - const Index lastElementIdx = size - blockIdx.x * elementsInBlock; - const Index lastElementInBlock = TNL::min( lastElementIdx, elementsInBlock ); - - /*** - * Load data into the shared memory. - */ - const Index blockOffset = blockIdx.x * elementsInBlock; - Index idx = threadIdx.x; - if( prefixSumType == PrefixSumType::exclusive ) - { - if( idx == 0 ) - sharedData[ 0 ] = zero; //operation.initialValue(); - while( idx < elementsInBlock && blockOffset + idx < size ) - { - sharedData[ Devices::Cuda::getInterleaving( idx + 1 ) ] = input[ blockOffset + idx ]; - idx += blockDim.x; - } - } - else - while( idx < elementsInBlock && blockOffset + idx < size ) - { - sharedData[ Devices::Cuda::getInterleaving( idx ) ] = input[ blockOffset + idx ]; - idx += blockDim.x; - } - - /*** - * Perform the sequential prefix-sum. - */ - __syncthreads(); - const int chunkSize = elementsInBlock / blockDim.x; - const int chunkOffset = threadIdx.x * chunkSize; - const int numberOfChunks = roundUpDivision( lastElementInBlock, chunkSize ); - - if( chunkOffset < lastElementInBlock ) - { - auxData[ threadIdx.x ] = - sharedData[ Devices::Cuda::getInterleaving( chunkOffset ) ]; - } - - Index chunkPointer( 1 ); - while( chunkPointer < chunkSize && - chunkOffset + chunkPointer < lastElementInBlock ) - { - operation( sharedData[ Devices::Cuda::getInterleaving( chunkOffset + chunkPointer ) ], - sharedData[ Devices::Cuda::getInterleaving( chunkOffset + chunkPointer - 1 ) ] ); - auxData[ threadIdx.x ] = - sharedData[ Devices::Cuda::getInterleaving( chunkOffset + chunkPointer ) ]; - chunkPointer++; - } - - /*** - * Perform the parallel prefix-sum inside warps. - */ - const int threadInWarpIdx = threadIdx.x % Devices::Cuda::getWarpSize(); - const int warpIdx = threadIdx.x / Devices::Cuda::getWarpSize(); - for( int stride = 1; stride < Devices::Cuda::getWarpSize(); stride *= 2 ) - if( threadInWarpIdx >= stride && threadIdx.x < numberOfChunks ) - volatileOperation( auxData[ threadIdx.x ], auxData[ threadIdx.x - stride ] ); - - if( threadInWarpIdx == Devices::Cuda::getWarpSize() - 1 ) - warpSums[ warpIdx ] = auxData[ threadIdx.x ]; - __syncthreads(); - - /**** - * Compute prefix-sum of warp sums using one warp - */ - if( warpIdx == 0 ) - for( int stride = 1; stride < Devices::Cuda::getWarpSize(); stride *= 2 ) - if( threadInWarpIdx >= stride ) - volatileOperation( warpSums[ threadIdx.x ], warpSums[ threadIdx.x - stride ] ); - __syncthreads(); - - /**** - * Shift the warp prefix-sums. - */ - if( warpIdx > 0 ) - volatileOperation( auxData[ threadIdx.x ], warpSums[ warpIdx - 1 ] ); - - /*** - * Store the result back in global memory. - */ - __syncthreads(); - idx = threadIdx.x; - while( idx < elementsInBlock && blockOffset + idx < size ) - { - const Index chunkIdx = idx / chunkSize; - DataType chunkShift( zero ); //operation.initialValue() ); - if( chunkIdx > 0 ) - chunkShift = auxData[ chunkIdx - 1 ]; - operation( sharedData[ Devices::Cuda::getInterleaving( idx ) ], chunkShift ); - output[ blockOffset + idx ] = sharedData[ Devices::Cuda::getInterleaving( idx ) ]; - idx += blockDim.x; - } - __syncthreads(); - - if( threadIdx.x == 0 ) - { - if( prefixSumType == PrefixSumType::exclusive ) - { - /*auxArray[ blockIdx.x ] = operation.commonReduction( Devices::Cuda::getInterleaving( lastElementInBlock - 1 ), - Devices::Cuda::getInterleaving( lastElementInBlock ), - sharedData );*/ - DataType aux = zero; //operation.initialValue(); - operation( aux, sharedData[ Devices::Cuda::getInterleaving( lastElementInBlock - 1 ) ] ); - operation( aux, sharedData[ Devices::Cuda::getInterleaving( lastElementInBlock ) ] ); - auxArray[ blockIdx.x ] = aux; - } - else - auxArray[ blockIdx.x ] = sharedData[ Devices::Cuda::getInterleaving( lastElementInBlock - 1 ) ]; - } -} - -template< typename DataType, - typename Operation, - typename Index > -__global__ void -cudaSecondPhaseBlockPrefixSum( Operation operation, - const Index size, - const Index elementsInBlock, - DataType gridShift, - const DataType* auxArray, - DataType* data ) -{ - if( blockIdx.x > 0 ) - { - operation( gridShift, auxArray[ blockIdx.x - 1 ] ); - - const Index readOffset = blockIdx.x * elementsInBlock; - Index readIdx = threadIdx.x; - while( readIdx < elementsInBlock && readOffset + readIdx < size ) - { - operation( data[ readIdx + readOffset ], gridShift ); - readIdx += blockDim.x; - } - } -} - - -template< typename DataType, - typename Operation, - typename VolatileOperation, - typename Index > -void -cudaRecursivePrefixSum( const PrefixSumType prefixSumType, - Operation& operation, - VolatileOperation& volatileOperation, - const DataType& zero, - const Index size, - const Index blockSize, - const Index elementsInBlock, - const DataType gridShift, - const DataType* input, - DataType *output ) -{ - const Index numberOfBlocks = roundUpDivision( size, elementsInBlock ); - const Index auxArraySize = numberOfBlocks * sizeof( DataType ); - - Array< DataType, Devices::Cuda > auxArray1, auxArray2; - auxArray1.setSize( auxArraySize ); - auxArray2.setSize( auxArraySize ); - - /**** - * Setup block and grid size. - */ - dim3 cudaBlockSize( 0 ), cudaGridSize( 0 ); - cudaBlockSize.x = blockSize; - cudaGridSize.x = roundUpDivision( size, elementsInBlock ); - - /**** - * Run the kernel. - */ - const std::size_t sharedDataSize = elementsInBlock + - elementsInBlock / Devices::Cuda::getNumberOfSharedMemoryBanks() + 2; - const std::size_t sharedMemory = ( sharedDataSize + blockSize + Devices::Cuda::getWarpSize() ) * sizeof( DataType ); - cudaFirstPhaseBlockPrefixSum<<< cudaGridSize, cudaBlockSize, sharedMemory >>> - ( prefixSumType, - operation, - volatileOperation, - zero, - size, - elementsInBlock, - input, - output, - auxArray1.getData() ); - TNL_CHECK_CUDA_DEVICE; - - /*** - * In auxArray1 there is now a sum of numbers in each block. - * We must compute prefix-sum of auxArray1 and then shift - * each block. - */ - if( numberOfBlocks > 1 ) - cudaRecursivePrefixSum( PrefixSumType::inclusive, - operation, - volatileOperation, - zero, - numberOfBlocks, - blockSize, - elementsInBlock, - gridShift, - auxArray1.getData(), - auxArray2.getData() ); - - cudaSecondPhaseBlockPrefixSum<<< cudaGridSize, cudaBlockSize >>> - ( operation, - size, - elementsInBlock, - gridShift, - auxArray2.getData(), - output ); - TNL_CHECK_CUDA_DEVICE; -} - - -template< typename DataType, - typename Operation, - typename VolatileOperation, - typename Index > -void -cudaGridPrefixSum( PrefixSumType prefixSumType, - Operation& operation, - VolatileOperation& volatileOperation, - const DataType& zero, - const Index size, - const Index blockSize, - const Index elementsInBlock, - const DataType *deviceInput, - DataType *deviceOutput, - DataType& gridShift ) -{ - cudaRecursivePrefixSum( prefixSumType, - operation, - volatileOperation, - zero, - size, - blockSize, - elementsInBlock, - gridShift, - deviceInput, - deviceOutput ); - - cudaMemcpy( &gridShift, - &deviceOutput[ size - 1 ], - sizeof( DataType ), - cudaMemcpyDeviceToHost ); - TNL_CHECK_CUDA_DEVICE; -} - -///// -// deviceInput and deviceOutput can be the same -template< typename DataType, - typename Operation, - typename VolatileOperation, - typename Index > -void -cudaPrefixSum( const Index size, - const Index blockSize, - const DataType *deviceInput, - DataType* deviceOutput, - Operation& operation, - VolatileOperation& volatileOperation, - const DataType& zero, - const PrefixSumType prefixSumType ) -{ - /**** - * Compute the number of grids - */ - const Index elementsInBlock = 8 * blockSize; - const Index numberOfBlocks = roundUpDivision( size, elementsInBlock ); - const auto maxGridSize = Devices::Cuda::getMaxGridSize(); - const Index numberOfGrids = Devices::Cuda::getNumberOfGrids( numberOfBlocks, maxGridSize ); - - /**** - * Loop over all grids. - */ - DataType gridShift = zero; //operation.initialValue(); - for( Index gridIdx = 0; gridIdx < numberOfGrids; gridIdx++ ) - { - /**** - * Compute current grid size and size of data to be scanned - */ - const Index gridOffset = gridIdx * maxGridSize * elementsInBlock; - Index currentSize = size - gridOffset; - if( currentSize / elementsInBlock > maxGridSize ) - currentSize = maxGridSize * elementsInBlock; - - cudaGridPrefixSum( prefixSumType, - operation, - volatileOperation, - zero, - currentSize, - blockSize, - elementsInBlock, - &deviceInput[ gridOffset ], - &deviceOutput[ gridOffset ], - gridShift ); - } -} - -} // namespace Algorithms -} // namespace Containers -} // namespace TNL - -#endif diff --git a/src/TNL/Containers/VectorView.h b/src/TNL/Containers/VectorView.h index bc468943f..46a10c254 100644 --- a/src/TNL/Containers/VectorView.h +++ b/src/TNL/Containers/VectorView.h @@ -109,17 +109,17 @@ public: template< typename VectorExpression > VectorView& operator=( const VectorExpression& expression ); - template< typename Vector > - VectorView& operator-=( const Vector& vector ); + template< typename VectorExpression > + VectorView& operator-=( const VectorExpression& expression ); - template< typename Vector > - VectorView& operator+=( const Vector& vector ); + template< typename VectorExpression > + VectorView& operator+=( const VectorExpression& expression ); - template< typename Scalar > - VectorView& operator*=( Scalar c ); + template< typename VectorExpression > + VectorView& operator*=( const VectorExpression& expression ); - template< typename Scalar > - VectorView& operator/=( Scalar c ); + template< typename VectorExpression > + VectorView& operator/=( const VectorExpression& expression ); template< typename Real_, typename Device_, typename Index_ > bool operator==( const VectorView< Real_, Device_, Index_ >& v ); diff --git a/src/TNL/Containers/VectorView_impl.h b/src/TNL/Containers/VectorView_impl.h index 23e79c125..37be6d048 100644 --- a/src/TNL/Containers/VectorView_impl.h +++ b/src/TNL/Containers/VectorView_impl.h @@ -130,48 +130,52 @@ VectorView< Real, Device, Index >::operator=( const VectorExpression& expression template< typename Real, typename Device, typename Index > - template< typename Vector > + template< typename VectorExpression > VectorView< Real, Device, Index >& VectorView< Real, Device, Index >:: -operator-=( const Vector& vector ) +operator-=( const VectorExpression& expression ) { - addVector( vector, -1.0 ); + //addVector( vector, -1.0 ); + Algorithms::VectorSubtraction< VectorView< Real, Device, Index >, VectorExpression >::subtraction( *this, expression ); return *this; } template< typename Real, typename Device, typename Index > - template< typename Vector > + template< typename VectorExpression > VectorView< Real, Device, Index >& VectorView< Real, Device, Index >:: -operator+=( const Vector& vector ) +operator+=( const VectorExpression& expression ) { - addVector( vector ); + //addVector( vector ); + Algorithms::VectorAddition< VectorView< Real, Device, Index >, VectorExpression >::addition( *this, expression ); return *this; } template< typename Real, typename Device, typename Index > - template< typename Scalar > + template< typename VectorExpression > VectorView< Real, Device, Index >& VectorView< Real, Device, Index >:: -operator*=( Scalar c ) +operator*=( const VectorExpression& expression ) { - Algorithms::VectorOperations< Device >::vectorScalarMultiplication( *this, c ); + //Algorithms::VectorOperations< Device >::vectorScalarMultiplication( *this, c ); + Algorithms::VectorMultiplication< VectorView< Real, Device, Index >, VectorExpression >::multiplication( *this, expression ); return *this; } template< typename Real, typename Device, typename Index > - template< typename Scalar > + template< typename VectorExpression > VectorView< Real, Device, Index >& VectorView< Real, Device, Index >:: -operator/=( Scalar c ) +operator/=( const VectorExpression& expression ) { - Algorithms::VectorOperations< Device >::vectorScalarMultiplication( *this, 1.0 / c ); + //Algorithms::VectorOperations< Device >::vectorScalarMultiplication( *this, 1.0 / c ); + Algorithms::VectorDivision< VectorView< Real, Device, Index >, VectorExpression >::division( *this, expression ); return *this; } diff --git a/src/UnitTests/Containers/VectorTest-4.h b/src/UnitTests/Containers/VectorTest-4.h index 0505a7399..e85a67a3c 100644 --- a/src/UnitTests/Containers/VectorTest-4.h +++ b/src/UnitTests/Containers/VectorTest-4.h @@ -161,6 +161,62 @@ TYPED_TEST( VectorTest, prefixSum ) EXPECT_EQ( v.getElement( i ) - v.getElement( i - 1 ), i ); } +/*** + * The following test tekaes too long - 6 min approx. + */ +/*TYPED_TEST( VectorTest, longPrefixSum ) +{ + using VectorType = typename TestFixture::VectorType; + using VectorOperations = typename TestFixture::VectorOperations; + using ViewType = typename TestFixture::ViewType; + using RealType = typename VectorType::RealType; + using DeviceType = typename VectorType::DeviceType; + using IndexType = typename VectorType::IndexType; + using HostVectorType = Vector< RealType, Devices::Host, IndexType >; + using HostViewType = VectorView< RealType, Devices::Host, IndexType >; + + ///// + // This is test of prefix sum on long vectors to check of the correction + // across multiple CUDA grids is correct + if( std::is_same< DeviceType, Devices::Cuda >::value && + ! std::is_same< IndexType, short >::value && + ! std::is_same< RealType, float >::value ) + { + const IndexType size = 134217728+100; + + VectorType v( size ); + ViewType v_view( v ); + + HostVectorType host_v( size ), host_copy( size ); + HostViewType host_v_view( host_v ); + + v = 0; + host_v = 0; + v.computePrefixSum(); + host_v.computePrefixSum(); + host_copy = v; + for( IndexType i = 0; i < size; i ++ ) + EXPECT_EQ( host_copy[ i ], host_v[ i ] ); + + setOscilatingLinearSequence( v ); + setOscilatingLinearSequence( host_v ); + v.computePrefixSum(); + host_v.computePrefixSum(); + host_copy = v; + for( IndexType i = 0; i < size; i ++ ) + EXPECT_EQ( host_copy[ i ], host_v[ i ] ); + + + setOscilatingConstantSequence( v, 1 ); + setOscilatingConstantSequence( host_v, 1 ); + v_view.computePrefixSum(); + host_v_view.computePrefixSum(); + host_copy = v; + for( IndexType i = 0; i < size; i ++ ) + EXPECT_EQ( host_copy[ i ], host_v[ i ] ); + } +}*/ + TYPED_TEST( VectorTest, exclusivePrefixSum ) { using VectorType = typename TestFixture::VectorType; @@ -211,6 +267,7 @@ void setupFlags( FlagsView& f ) f.evaluate( f1 ); } +/* TYPED_TEST( VectorTest, segmentedPrefixSum ) { using VectorType = typename TestFixture::VectorType; @@ -237,7 +294,7 @@ TYPED_TEST( VectorTest, segmentedPrefixSum ) for( int i = 0; i < size; i++ ) EXPECT_EQ( v.getElement( i ), 0 ); flags_view = flags_copy; - + v = 1; v.computeSegmentedPrefixSum( flags_view ); for( int i = 0; i < size; i++ ) @@ -254,13 +311,13 @@ TYPED_TEST( VectorTest, segmentedPrefixSum ) EXPECT_EQ( v.getElement( i ) - v.getElement( i - 1 ), i ); } flags_view = flags_copy; - + v_view = 0; v_view.computeSegmentedPrefixSum( flags_view ); for( int i = 0; i < size; i++ ) EXPECT_EQ( v_view.getElement( i ), 0 ); flags_view = flags_copy; - + v_view = 1; v_view.computeSegmentedPrefixSum( flags_view ); for( int i = 0; i < size; i++ ) @@ -278,7 +335,7 @@ TYPED_TEST( VectorTest, segmentedPrefixSum ) EXPECT_EQ( v_view.getElement( i ) - v_view.getElement( i - 1 ), i ); } } - +*/ TYPED_TEST( VectorTest, abs ) { diff --git a/src/UnitTests/Containers/VectorTestSetup.h b/src/UnitTests/Containers/VectorTestSetup.h index 870db0f25..f53a6e970 100644 --- a/src/UnitTests/Containers/VectorTestSetup.h +++ b/src/UnitTests/Containers/VectorTestSetup.h @@ -43,6 +43,27 @@ void setConstantSequence( Vector& deviceVector, deviceVector.setValue( v ); } +template< typename Vector > +void setOscilatingLinearSequence( Vector& deviceVector ) +{ + typename Vector::HostType a; + a.setLike( deviceVector ); + for( int i = 0; i < a.getSize(); i++ ) + a[ i ] = i % 30 - 15; + deviceVector = a; +} + +template< typename Vector > +void setOscilatingConstantSequence( Vector& deviceVector, + typename Vector::RealType v ) +{ + typename Vector::HostType a; + a.setLike( deviceVector ); + for( int i = 0; i < a.getSize(); i++ ) + a[ i ] = TNL::sign( i % 30 - 15 ); + deviceVector = a; +} + template< typename Vector > void setNegativeLinearSequence( Vector& deviceVector ) { -- GitLab From 0278d7e62319b4fd64a21f4b9d734c703914734d Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Thu, 6 Jun 2019 13:53:27 +0200 Subject: [PATCH 53/93] Refactoring prefix sum. --- .../Algorithms/CudaPrefixSumKernel.h | 355 +++++++++++++++++ src/TNL/Containers/Algorithms/PrefixSum.hpp | 34 +- .../Algorithms/VectorOperationsCuda_impl.h | 2 +- .../Containers/Algorithms/cuda-prefix-sum.h | 375 ------------------ src/UnitTests/Containers/VectorTest-4.h | 45 ++- 5 files changed, 408 insertions(+), 403 deletions(-) create mode 100644 src/TNL/Containers/Algorithms/CudaPrefixSumKernel.h delete mode 100644 src/TNL/Containers/Algorithms/cuda-prefix-sum.h diff --git a/src/TNL/Containers/Algorithms/CudaPrefixSumKernel.h b/src/TNL/Containers/Algorithms/CudaPrefixSumKernel.h new file mode 100644 index 000000000..966fe2d74 --- /dev/null +++ b/src/TNL/Containers/Algorithms/CudaPrefixSumKernel.h @@ -0,0 +1,355 @@ +/*************************************************************************** + cuda-prefix-sum.h - description + ------------------- + begin : Jan 18, 2014 + copyright : (C) 2014 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include + +#include +#include +#include +#include +#include + +namespace TNL { +namespace Containers { +namespace Algorithms { + +enum class PrefixSumType { + exclusive, + inclusive +}; + +enum class PrefixSumSegmentation { + nonsegmented, + segmented +}; + +#ifdef HAVE_CUDA + +template< typename Real, + typename Operation, + typename VolatileOperation, + typename Index > +__global__ void +cudaFirstPhaseBlockPrefixSum( const PrefixSumType prefixSumType, + Operation operation, + VolatileOperation volatileOperation, + const Real zero, + const Index size, + const Index elementsInBlock, + const Real* input, + Real* output, + Real* auxArray, + const Real gridShift ) +{ + Real* sharedData = TNL::Devices::Cuda::getSharedMemory< Real >(); + volatile Real* auxData = &sharedData[ elementsInBlock + elementsInBlock / Devices::Cuda::getNumberOfSharedMemoryBanks() + 2 ]; + volatile Real* warpSums = &auxData[ blockDim.x ]; + + const Index lastElementIdx = size - blockIdx.x * elementsInBlock; + const Index lastElementInBlock = TNL::min( lastElementIdx, elementsInBlock ); + + /*** + * Load data into the shared memory. + */ + const Index blockOffset = blockIdx.x * elementsInBlock; + Index idx = threadIdx.x; + if( prefixSumType == PrefixSumType::exclusive ) + { + if( idx == 0 ) + sharedData[ 0 ] = zero; + while( idx < elementsInBlock && blockOffset + idx < size ) + { + sharedData[ Devices::Cuda::getInterleaving( idx + 1 ) ] = input[ blockOffset + idx ]; + idx += blockDim.x; + } + } + else + { + while( idx < elementsInBlock && blockOffset + idx < size ) + { + sharedData[ Devices::Cuda::getInterleaving( idx ) ] = input[ blockOffset + idx ]; + idx += blockDim.x; + } + } + if( blockIdx.x == 0 && threadIdx.x == 0 ) + operation( sharedData[ 0 ], gridShift ); + + /*** + * Perform the sequential prefix-sum. + */ + __syncthreads(); + const int chunkSize = elementsInBlock / blockDim.x; + const int chunkOffset = threadIdx.x * chunkSize; + const int numberOfChunks = roundUpDivision( lastElementInBlock, chunkSize ); + + if( chunkOffset < lastElementInBlock ) + { + auxData[ threadIdx.x ] = + sharedData[ Devices::Cuda::getInterleaving( chunkOffset ) ]; + } + + Index chunkPointer( 1 ); + while( chunkPointer < chunkSize && + chunkOffset + chunkPointer < lastElementInBlock ) + { + operation( sharedData[ Devices::Cuda::getInterleaving( chunkOffset + chunkPointer ) ], + sharedData[ Devices::Cuda::getInterleaving( chunkOffset + chunkPointer - 1 ) ] ); + auxData[ threadIdx.x ] = + sharedData[ Devices::Cuda::getInterleaving( chunkOffset + chunkPointer ) ]; + chunkPointer++; + } + + /*** + * Perform the parallel prefix-sum inside warps. + */ + const int threadInWarpIdx = threadIdx.x % Devices::Cuda::getWarpSize(); + const int warpIdx = threadIdx.x / Devices::Cuda::getWarpSize(); + for( int stride = 1; stride < Devices::Cuda::getWarpSize(); stride *= 2 ) + if( threadInWarpIdx >= stride && threadIdx.x < numberOfChunks ) + volatileOperation( auxData[ threadIdx.x ], auxData[ threadIdx.x - stride ] ); + + if( threadInWarpIdx == Devices::Cuda::getWarpSize() - 1 ) + warpSums[ warpIdx ] = auxData[ threadIdx.x ]; + __syncthreads(); + + /**** + * Compute prefix-sum of warp sums using one warp + */ + if( warpIdx == 0 ) + for( int stride = 1; stride < Devices::Cuda::getWarpSize(); stride *= 2 ) + if( threadInWarpIdx >= stride ) + volatileOperation( warpSums[ threadIdx.x ], warpSums[ threadIdx.x - stride ] ); + __syncthreads(); + + /**** + * Shift the warp prefix-sums. + */ + if( warpIdx > 0 ) + volatileOperation( auxData[ threadIdx.x ], warpSums[ warpIdx - 1 ] ); + + /*** + * Store the result back in global memory. + */ + __syncthreads(); + idx = threadIdx.x; + while( idx < elementsInBlock && blockOffset + idx < size ) + { + const Index chunkIdx = idx / chunkSize; + Real chunkShift( zero ); + if( chunkIdx > 0 ) + chunkShift = auxData[ chunkIdx - 1 ]; + operation( sharedData[ Devices::Cuda::getInterleaving( idx ) ], chunkShift ); + output[ blockOffset + idx ] = sharedData[ Devices::Cuda::getInterleaving( idx ) ]; + idx += blockDim.x; + } + __syncthreads(); + + if( threadIdx.x == 0 ) + { + if( prefixSumType == PrefixSumType::exclusive ) + { + Real aux = zero; + operation( aux, sharedData[ Devices::Cuda::getInterleaving( lastElementInBlock - 1 ) ] ); + operation( aux, sharedData[ Devices::Cuda::getInterleaving( lastElementInBlock ) ] ); + auxArray[ blockIdx.x ] = aux; + } + else + auxArray[ blockIdx.x ] = sharedData[ Devices::Cuda::getInterleaving( lastElementInBlock - 1 ) ]; + } +} + +template< typename Real, + typename Operation, + typename Index > +__global__ void +cudaSecondPhaseBlockPrefixSum( Operation operation, + const Index size, + const Index elementsInBlock, + Real gridShift, + const Real* auxArray, + Real* data ) +{ + if( blockIdx.x > 0 ) + { + const Real shift = auxArray[ blockIdx.x - 1 ]; + //operation( gridShift, shift ); //auxArray[ blockIdx.x - 1 ] ); + + const Index readOffset = blockIdx.x * elementsInBlock; + Index readIdx = threadIdx.x; + while( readIdx < elementsInBlock && readOffset + readIdx < size ) + { + operation( data[ readIdx + readOffset ], shift ); + readIdx += blockDim.x; + } + } +} + +template< PrefixSumType prefixSumType, + PrefixSumSegmentation segmentation, + typename Real, + typename Index > +struct CudaPrefixSumKernelLauncher +{ + template< typename Operation, + typename VolatileOperation > + static void + cudaRecursivePrefixSum( PrefixSumType prefixSumType_, + Operation& operation, + VolatileOperation& volatileOperation, + const Real& zero, + const Index size, + const Index blockSize, + const Index elementsInBlock, + Real& gridShift, + const Real* input, + Real* output ) + { + const Index numberOfBlocks = roundUpDivision( size, elementsInBlock ); + const Index auxArraySize = numberOfBlocks; + + Array< Real, Devices::Cuda > auxArray1, auxArray2; + auxArray1.setSize( auxArraySize ); + auxArray2.setSize( auxArraySize ); + + /**** + * Setup block and grid size. + */ + dim3 cudaBlockSize( 0 ), cudaGridSize( 0 ); + cudaBlockSize.x = blockSize; + cudaGridSize.x = roundUpDivision( size, elementsInBlock ); + + /**** + * Run the kernel. + */ + const std::size_t sharedDataSize = elementsInBlock + + elementsInBlock / Devices::Cuda::getNumberOfSharedMemoryBanks() + 2; + const std::size_t sharedMemory = ( sharedDataSize + blockSize + Devices::Cuda::getWarpSize() ) * sizeof( Real ); + cudaFirstPhaseBlockPrefixSum<<< cudaGridSize, cudaBlockSize, sharedMemory >>> + ( prefixSumType_, + operation, + volatileOperation, + zero, + size, + elementsInBlock, + input, + output, + auxArray1.getData(), + gridShift ); + TNL_CHECK_CUDA_DEVICE; + + + //std::cerr << " auxArray1 = " << auxArray1 << std::endl; + /*** + * In auxArray1 there is now a sum of numbers in each block. + * We must compute prefix-sum of auxArray1 and then shift + * each block. + */ + Real gridShift2 = zero; + if( numberOfBlocks > 1 ) + cudaRecursivePrefixSum( PrefixSumType::inclusive, + operation, + volatileOperation, + zero, + numberOfBlocks, + blockSize, + elementsInBlock, + gridShift2, + auxArray1.getData(), + auxArray2.getData() ); + + //std::cerr << " auxArray2 = " << auxArray2 << std::endl; + cudaSecondPhaseBlockPrefixSum<<< cudaGridSize, cudaBlockSize >>> + ( operation, + size, + elementsInBlock, + gridShift, + auxArray2.getData(), + output ); + TNL_CHECK_CUDA_DEVICE; + + cudaMemcpy( &gridShift, + &auxArray2[ auxArraySize - 1 ], + sizeof( Real ), + cudaMemcpyDeviceToHost ); + //std::cerr << "gridShift = " << gridShift << std::endl; + TNL_CHECK_CUDA_DEVICE; + } + + /**** + * \brief Starts prefix sum in CUDA. + * + * \tparam Operation operation to be peformed on particular elements - addition usually + * \tparam VolatileOperation - volatile version of Operation + * \param size is number of elements to be scanned + * \param blockSize is CUDA block size + * \param deviceInput is pointer to input data on GPU + * \param deviceOutput is pointer to resulting array, can be the same as input + * \param operation is instance of Operation + * \param volatileOperation is instance of VolatileOperation + * \param zero is neutral element for fiven Operation + */ + template< typename Operation, + typename VolatileOperation > + static void + start( const Index size, + const Index blockSize, + const Real *deviceInput, + Real* deviceOutput, + Operation& operation, + VolatileOperation& volatileOperation, + const Real& zero ) + { + /**** + * Compute the number of grids + */ + const Index elementsInBlock = 8 * blockSize; + const Index numberOfBlocks = roundUpDivision( size, elementsInBlock ); + const auto maxGridSize = 3; //Devices::Cuda::getMaxGridSize(); + const Index numberOfGrids = Devices::Cuda::getNumberOfGrids( numberOfBlocks, maxGridSize ); + Real gridShift = zero; + //std::cerr << "numberOfgrids = " << numberOfGrids << std::endl; + + /**** + * Loop over all grids. + */ + for( Index gridIdx = 0; gridIdx < numberOfGrids; gridIdx++ ) + { + /**** + * Compute current grid size and size of data to be scanned + */ + const Index gridOffset = gridIdx * maxGridSize * elementsInBlock; + Index currentSize = size - gridOffset; + if( currentSize / elementsInBlock > maxGridSize ) + currentSize = maxGridSize * elementsInBlock; + + //std::cerr << "GridIdx = " << gridIdx << " grid size = " << currentSize << std::endl; + cudaRecursivePrefixSum( prefixSumType, + operation, + volatileOperation, + zero, + currentSize, + blockSize, + elementsInBlock, + gridShift, + &deviceInput[ gridOffset ], + &deviceOutput[ gridOffset ] ); + TNL_CHECK_CUDA_DEVICE; + } + } +}; +#endif + +} // namespace Algorithms +} // namespace Containers +} // namespace TNL + + diff --git a/src/TNL/Containers/Algorithms/PrefixSum.hpp b/src/TNL/Containers/Algorithms/PrefixSum.hpp index 30a39852b..58906b20f 100644 --- a/src/TNL/Containers/Algorithms/PrefixSum.hpp +++ b/src/TNL/Containers/Algorithms/PrefixSum.hpp @@ -20,7 +20,7 @@ #include #include #include -#include +#include #ifdef CUDA_REDUCTION_PROFILING #include @@ -155,14 +155,14 @@ inclusive( Vector& v, using IndexType = typename Vector::IndexType; using IndexType = typename Vector::IndexType; #ifdef HAVE_CUDA - cudaPrefixSum( ( IndexType ) ( end - begin ), - ( IndexType ) 256, - &v[ begin ], - &v[ begin ], - reduction, - volatileReduction, - zero, - Algorithms::PrefixSumType::inclusive ); + CudaPrefixSumKernelLauncher< PrefixSumType::inclusive, PrefixSumSegmentation::nonsegmented, RealType, IndexType >::start( + ( IndexType ) ( end - begin ), + ( IndexType ) 256, + &v[ begin ], + &v[ begin ], + reduction, + volatileReduction, + zero ); #endif } @@ -184,14 +184,14 @@ exclusive( Vector& v, using IndexType = typename Vector::IndexType; using IndexType = typename Vector::IndexType; #ifdef HAVE_CUDA - cudaPrefixSum( ( IndexType ) ( end - begin ), - ( IndexType ) 256, - &v[ begin ], - &v[ begin ], - reduction, - volatileReduction, - zero, - Algorithms::PrefixSumType::exclusive ); + CudaPrefixSumKernelLauncher< PrefixSumType::exclusive, PrefixSumSegmentation::nonsegmented, RealType, IndexType>::start( + ( IndexType ) ( end - begin ), + ( IndexType ) 256, + &v[ begin ], + &v[ begin ], + reduction, + volatileReduction, + zero ); #endif } diff --git a/src/TNL/Containers/Algorithms/VectorOperationsCuda_impl.h b/src/TNL/Containers/Algorithms/VectorOperationsCuda_impl.h index e428148bc..cfd67d48e 100644 --- a/src/TNL/Containers/Algorithms/VectorOperationsCuda_impl.h +++ b/src/TNL/Containers/Algorithms/VectorOperationsCuda_impl.h @@ -12,7 +12,7 @@ #include #include -#include +#include namespace TNL { namespace Containers { diff --git a/src/TNL/Containers/Algorithms/cuda-prefix-sum.h b/src/TNL/Containers/Algorithms/cuda-prefix-sum.h deleted file mode 100644 index 4019423a9..000000000 --- a/src/TNL/Containers/Algorithms/cuda-prefix-sum.h +++ /dev/null @@ -1,375 +0,0 @@ -/*************************************************************************** - cuda-prefix-sum.h - description - ------------------- - begin : Jan 18, 2014 - copyright : (C) 2014 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/* See Copyright Notice in tnl/Copyright */ - -#pragma once - -#include - -#include -#include -#include -#include -#include - -namespace TNL { -namespace Containers { -namespace Algorithms { - -enum class PrefixSumType -{ - exclusive, - inclusive -}; - -#ifdef HAVE_CUDA - -template< typename DataType, - typename Operation, - typename VolatileOperation, - typename Index > -__global__ void -cudaFirstPhaseBlockPrefixSum( const PrefixSumType prefixSumType, - Operation operation, - VolatileOperation volatileOperation, - const DataType zero, - const Index size, - const Index elementsInBlock, - const DataType* input, - DataType* output, - DataType* auxArray ) -{ - DataType* sharedData = TNL::Devices::Cuda::getSharedMemory< DataType >(); - volatile DataType* auxData = &sharedData[ elementsInBlock + elementsInBlock / Devices::Cuda::getNumberOfSharedMemoryBanks() + 2 ]; - volatile DataType* warpSums = &auxData[ blockDim.x ]; - - const Index lastElementIdx = size - blockIdx.x * elementsInBlock; - const Index lastElementInBlock = TNL::min( lastElementIdx, elementsInBlock ); - - /*** - * Load data into the shared memory. - */ - const Index blockOffset = blockIdx.x * elementsInBlock; - Index idx = threadIdx.x; - if( prefixSumType == PrefixSumType::exclusive ) - { - if( idx == 0 ) - sharedData[ 0 ] = zero; - while( idx < elementsInBlock && blockOffset + idx < size ) - { - sharedData[ Devices::Cuda::getInterleaving( idx + 1 ) ] = input[ blockOffset + idx ]; - idx += blockDim.x; - } - } - else - while( idx < elementsInBlock && blockOffset + idx < size ) - { - sharedData[ Devices::Cuda::getInterleaving( idx ) ] = input[ blockOffset + idx ]; - idx += blockDim.x; - } - - /*** - * Perform the sequential prefix-sum. - */ - __syncthreads(); - const int chunkSize = elementsInBlock / blockDim.x; - const int chunkOffset = threadIdx.x * chunkSize; - const int numberOfChunks = roundUpDivision( lastElementInBlock, chunkSize ); - - if( chunkOffset < lastElementInBlock ) - { - auxData[ threadIdx.x ] = - sharedData[ Devices::Cuda::getInterleaving( chunkOffset ) ]; - } - - Index chunkPointer( 1 ); - while( chunkPointer < chunkSize && - chunkOffset + chunkPointer < lastElementInBlock ) - { - operation( sharedData[ Devices::Cuda::getInterleaving( chunkOffset + chunkPointer ) ], - sharedData[ Devices::Cuda::getInterleaving( chunkOffset + chunkPointer - 1 ) ] ); - auxData[ threadIdx.x ] = - sharedData[ Devices::Cuda::getInterleaving( chunkOffset + chunkPointer ) ]; - chunkPointer++; - } - - /*** - * Perform the parallel prefix-sum inside warps. - */ - const int threadInWarpIdx = threadIdx.x % Devices::Cuda::getWarpSize(); - const int warpIdx = threadIdx.x / Devices::Cuda::getWarpSize(); - for( int stride = 1; stride < Devices::Cuda::getWarpSize(); stride *= 2 ) - if( threadInWarpIdx >= stride && threadIdx.x < numberOfChunks ) - volatileOperation( auxData[ threadIdx.x ], auxData[ threadIdx.x - stride ] ); - - if( threadInWarpIdx == Devices::Cuda::getWarpSize() - 1 ) - warpSums[ warpIdx ] = auxData[ threadIdx.x ]; - __syncthreads(); - - /**** - * Compute prefix-sum of warp sums using one warp - */ - if( warpIdx == 0 ) - for( int stride = 1; stride < Devices::Cuda::getWarpSize(); stride *= 2 ) - if( threadInWarpIdx >= stride ) - volatileOperation( warpSums[ threadIdx.x ], warpSums[ threadIdx.x - stride ] ); - __syncthreads(); - - /**** - * Shift the warp prefix-sums. - */ - if( warpIdx > 0 ) - volatileOperation( auxData[ threadIdx.x ], warpSums[ warpIdx - 1 ] ); - - /*** - * Store the result back in global memory. - */ - __syncthreads(); - idx = threadIdx.x; - while( idx < elementsInBlock && blockOffset + idx < size ) - { - const Index chunkIdx = idx / chunkSize; - DataType chunkShift( zero ); - if( chunkIdx > 0 ) - chunkShift = auxData[ chunkIdx - 1 ]; - operation( sharedData[ Devices::Cuda::getInterleaving( idx ) ], chunkShift ); - output[ blockOffset + idx ] = sharedData[ Devices::Cuda::getInterleaving( idx ) ]; - idx += blockDim.x; - } - __syncthreads(); - - if( threadIdx.x == 0 ) - { - if( prefixSumType == PrefixSumType::exclusive ) - { - DataType aux = zero; - operation( aux, sharedData[ Devices::Cuda::getInterleaving( lastElementInBlock - 1 ) ] ); - operation( aux, sharedData[ Devices::Cuda::getInterleaving( lastElementInBlock ) ] ); - auxArray[ blockIdx.x ] = aux; - } - else - auxArray[ blockIdx.x ] = sharedData[ Devices::Cuda::getInterleaving( lastElementInBlock - 1 ) ]; - } -} - -template< typename DataType, - typename Operation, - typename Index > -__global__ void -cudaSecondPhaseBlockPrefixSum( Operation operation, - const Index size, - const Index elementsInBlock, - DataType gridShift, - const DataType* auxArray, - DataType* data ) -{ - if( blockIdx.x > 0 ) - { - operation( gridShift, auxArray[ blockIdx.x - 1 ] ); - - const Index readOffset = blockIdx.x * elementsInBlock; - Index readIdx = threadIdx.x; - while( readIdx < elementsInBlock && readOffset + readIdx < size ) - { - operation( data[ readIdx + readOffset ], gridShift ); - readIdx += blockDim.x; - } - } -} - -template< typename DataType, - typename Operation, - typename VolatileOperation, - typename Index > -void -cudaRecursivePrefixSum( const PrefixSumType prefixSumType, - Operation& operation, - VolatileOperation& volatileOperation, - const DataType& zero, - const Index size, - const Index blockSize, - const Index elementsInBlock, - const DataType gridShift, - const DataType* input, - DataType *output ) -{ - const Index numberOfBlocks = roundUpDivision( size, elementsInBlock ); - const Index auxArraySize = numberOfBlocks * sizeof( DataType ); - - Array< DataType, Devices::Cuda > auxArray1, auxArray2; - auxArray1.setSize( auxArraySize ); - auxArray2.setSize( auxArraySize ); - - /**** - * Setup block and grid size. - */ - dim3 cudaBlockSize( 0 ), cudaGridSize( 0 ); - cudaBlockSize.x = blockSize; - cudaGridSize.x = roundUpDivision( size, elementsInBlock ); - - /**** - * Run the kernel. - */ - const std::size_t sharedDataSize = elementsInBlock + - elementsInBlock / Devices::Cuda::getNumberOfSharedMemoryBanks() + 2; - const std::size_t sharedMemory = ( sharedDataSize + blockSize + Devices::Cuda::getWarpSize() ) * sizeof( DataType ); - cudaFirstPhaseBlockPrefixSum<<< cudaGridSize, cudaBlockSize, sharedMemory >>> - ( prefixSumType, - operation, - volatileOperation, - zero, - size, - elementsInBlock, - input, - output, - auxArray1.getData() ); - TNL_CHECK_CUDA_DEVICE; - - /*** - * In auxArray1 there is now a sum of numbers in each block. - * We must compute prefix-sum of auxArray1 and then shift - * each block. - */ - if( numberOfBlocks > 1 ) - cudaRecursivePrefixSum( PrefixSumType::inclusive, - operation, - volatileOperation, - zero, - numberOfBlocks, - blockSize, - elementsInBlock, - gridShift, - auxArray1.getData(), - auxArray2.getData() ); - - cudaSecondPhaseBlockPrefixSum<<< cudaGridSize, cudaBlockSize >>> - ( operation, - size, - elementsInBlock, - gridShift, - auxArray2.getData(), - output ); - TNL_CHECK_CUDA_DEVICE; -} - - -template< typename DataType, - typename Operation, - typename VolatileOperation, - typename Index > -void -cudaGridPrefixSum( PrefixSumType prefixSumType, - Operation& operation, - VolatileOperation& volatileOperation, - const DataType& zero, - const Index size, - const Index blockSize, - const Index elementsInBlock, - const DataType *deviceInput, - DataType *deviceOutput, - DataType& gridShift ) -{ - cudaRecursivePrefixSum( prefixSumType, - operation, - volatileOperation, - zero, - size, - blockSize, - elementsInBlock, - gridShift, - deviceInput, - deviceOutput ); - - cudaMemcpy( &gridShift, - &deviceOutput[ size - 1 ], - sizeof( DataType ), - cudaMemcpyDeviceToHost ); - TNL_CHECK_CUDA_DEVICE; -} - -///// -// deviceInput and deviceOutput can be the same -template< typename DataType, - typename Operation, - typename VolatileOperation, - typename Index > -void -cudaPrefixSum( const Index size, - const Index blockSize, - const DataType *deviceInput, - DataType* deviceOutput, - Operation& operation, - VolatileOperation& volatileOperation, - const DataType& zero, - const PrefixSumType prefixSumType ) -{ - /**** - * Compute the number of grids - */ - const Index elementsInBlock = 8 * blockSize; - const Index numberOfBlocks = roundUpDivision( size, elementsInBlock ); - const auto maxGridSize = Devices::Cuda::getMaxGridSize(); - const Index numberOfGrids = Devices::Cuda::getNumberOfGrids( numberOfBlocks, maxGridSize ); - Array< DataType, Devices::Host, Index > gridShifts( numberOfGrids ); - gridShifts = zero; - - /**** - * Loop over all grids. - */ - for( Index gridIdx = 0; gridIdx < numberOfGrids; gridIdx++ ) - { - /**** - * Compute current grid size and size of data to be scanned - */ - const Index gridOffset = gridIdx * maxGridSize * elementsInBlock; - Index currentSize = size - gridOffset; - if( currentSize / elementsInBlock > maxGridSize ) - currentSize = maxGridSize * elementsInBlock; - - cudaGridPrefixSum( gridIdx == 0 ? prefixSumType : PrefixSumType::inclusive, - operation, - volatileOperation, - zero, - currentSize, - blockSize, - elementsInBlock, - &deviceInput[ gridOffset ], - &deviceOutput[ gridOffset ], - gridShifts[ gridIdx ] ); - } - - //gridShifts.computeExclusivePrefixSum(); - DataType aux( gridShifts[ 0 ] ); - gridShifts[ 0 ] = zero; - for( Index i = 1; i < numberOfGrids; i++ ) - { - DataType x = gridShifts[ i ]; - gridShifts[ i ] = aux; - operation( aux, x ); - } - - for( Index gridIdx = 1; gridIdx < numberOfGrids; gridIdx ++ ) - { - const Index gridOffset = gridIdx * maxGridSize * elementsInBlock; - Index currentSize = size - gridOffset; - if( currentSize / elementsInBlock > maxGridSize ) - currentSize = maxGridSize * elementsInBlock; - //ArrayView< DataType, Devices::Cuda, Index > v( &deviceOutput[ gridOffset ], currentSize ); - const auto g = gridShifts[ gridIdx ]; - auto shift = [=] __cuda_callable__ ( Index i ) { deviceOutput[ gridOffset + i ] += g; }; - ParallelFor< Devices::Cuda >::exec( ( Index ) 0, currentSize, shift ); - } -} -#endif - -} // namespace Algorithms -} // namespace Containers -} // namespace TNL - - diff --git a/src/UnitTests/Containers/VectorTest-4.h b/src/UnitTests/Containers/VectorTest-4.h index e85a67a3c..825409912 100644 --- a/src/UnitTests/Containers/VectorTest-4.h +++ b/src/UnitTests/Containers/VectorTest-4.h @@ -31,6 +31,7 @@ using namespace TNL::Arithmetics; // and large enough to require multiple CUDA blocks for reduction constexpr int VECTOR_TEST_SIZE = 5000; +/* TYPED_TEST( VectorTest, addVector ) { using VectorType = typename TestFixture::VectorType; @@ -161,14 +162,22 @@ TYPED_TEST( VectorTest, prefixSum ) EXPECT_EQ( v.getElement( i ) - v.getElement( i - 1 ), i ); } + */ + /*** * The following test tekaes too long - 6 min approx. */ -/*TYPED_TEST( VectorTest, longPrefixSum ) +//TYPED_TEST( VectorTest, longPrefixSum ) +void Test() { - using VectorType = typename TestFixture::VectorType; - using VectorOperations = typename TestFixture::VectorOperations; - using ViewType = typename TestFixture::ViewType; + using VectorType = Containers::Vector< double, Devices::Cuda, int >; + using VectorOperations = Algorithms::VectorOperations< typename VectorType::DeviceType >; + using ViewType = Containers::VectorView< double, Devices::Cuda, int >; + + + //using VectorType = typename TestFixture::VectorType; + //using VectorOperations = typename TestFixture::VectorOperations; + //using ViewType = typename TestFixture::ViewType; using RealType = typename VectorType::RealType; using DeviceType = typename VectorType::DeviceType; using IndexType = typename VectorType::IndexType; @@ -182,7 +191,7 @@ TYPED_TEST( VectorTest, prefixSum ) ! std::is_same< IndexType, short >::value && ! std::is_same< RealType, float >::value ) { - const IndexType size = 134217728+100; + const IndexType size = 15500; //134217728+100; VectorType v( size ); ViewType v_view( v ); @@ -190,8 +199,8 @@ TYPED_TEST( VectorTest, prefixSum ) HostVectorType host_v( size ), host_copy( size ); HostViewType host_v_view( host_v ); - v = 0; - host_v = 0; + v = 1; + host_v = 1; v.computePrefixSum(); host_v.computePrefixSum(); host_copy = v; @@ -206,6 +215,13 @@ TYPED_TEST( VectorTest, prefixSum ) for( IndexType i = 0; i < size; i ++ ) EXPECT_EQ( host_copy[ i ], host_v[ i ] ); + setOscilatingLinearSequence( v ); + setOscilatingLinearSequence( host_v ); + v.computeExclusivePrefixSum(); + host_v.computeExclusivePrefixSum(); + host_copy = v; + for( IndexType i = 0; i < size; i ++ ) + EXPECT_EQ( host_copy[ i ], host_v[ i ] ); setOscilatingConstantSequence( v, 1 ); setOscilatingConstantSequence( host_v, 1 ); @@ -214,8 +230,17 @@ TYPED_TEST( VectorTest, prefixSum ) host_copy = v; for( IndexType i = 0; i < size; i ++ ) EXPECT_EQ( host_copy[ i ], host_v[ i ] ); + + /*setOscilatingConstantSequence( v, 1 ); + setOscilatingConstantSequence( host_v, 1 ); + v_view.computeExclusivePrefixSum(); + host_v_view.computeExclusivePrefixSum(); + host_copy = v; + for( IndexType i = 0; i < size; i ++ ) + EXPECT_EQ( host_copy[ i ], host_v[ i ] ); + * */ } -}*/ +} TYPED_TEST( VectorTest, exclusivePrefixSum ) { @@ -358,8 +383,8 @@ TYPED_TEST( VectorTest, abs ) #include "../GtestMissingError.h" int main( int argc, char* argv[] ) { - //Test(); - //return 0; + Test(); + return 0; #ifdef HAVE_GTEST ::testing::InitGoogleTest( &argc, argv ); return RUN_ALL_TESTS(); -- GitLab From 8923e36a006d6fa30e1e795729b052fd6dc897e9 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Thu, 6 Jun 2019 15:22:36 +0200 Subject: [PATCH 54/93] Refactoring prefix sum. --- .../Algorithms/CommonVectorOperations.h | 35 +-- .../Algorithms/CommonVectorOperations.hpp | 58 +---- .../Algorithms/CudaPrefixSumKernel.h | 21 +- src/TNL/Containers/Algorithms/PrefixSum.h | 164 +++++-------- src/TNL/Containers/Algorithms/PrefixSum.hpp | 227 +++++++----------- src/TNL/Containers/Vector.h | 54 +---- src/TNL/Containers/Vector.hpp | 73 ++---- src/TNL/Containers/VectorView.h | 29 +-- src/TNL/Containers/VectorView_impl.h | 74 ++---- src/UnitTests/Containers/VectorTest-4.h | 32 +-- 10 files changed, 253 insertions(+), 514 deletions(-) diff --git a/src/TNL/Containers/Algorithms/CommonVectorOperations.h b/src/TNL/Containers/Algorithms/CommonVectorOperations.h index 42b67a981..199bc63c4 100644 --- a/src/TNL/Containers/Algorithms/CommonVectorOperations.h +++ b/src/TNL/Containers/Algorithms/CommonVectorOperations.h @@ -18,7 +18,7 @@ template< typename Device > struct CommonVectorOperations { using DeviceType = Device; - + template< typename Vector, typename ResultType = typename Vector::RealType > static ResultType getVectorMax( const Vector& v ); @@ -70,28 +70,17 @@ struct CommonVectorOperations template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType > static ResultType getScalarProduct( const Vector1& v1, const Vector2& v2 ); - template< typename Vector > - static void computePrefixSum( Vector& v, - const typename Vector::IndexType begin, - const typename Vector::IndexType end ); - - template< typename Vector > - static void computeExclusivePrefixSum( Vector& v, - const typename Vector::IndexType begin, - const typename Vector::IndexType end ); - - template< typename Vector, typename Flags > - static void computeSegmentedPrefixSum( Vector& v, - Flags& f, - const typename Vector::IndexType begin, - const typename Vector::IndexType end ); - - template< typename Vector, typename Flags > - static void computeExclusiveSegmentedPrefixSum( Vector& v, - Flags& f, - const typename Vector::IndexType begin, - const typename Vector::IndexType end ); - + template< Algorithms::PrefixSumType Type, + typename Vector > + static void prefixSum( Vector& v, + const typename Vector::IndexType begin, + const typename Vector::IndexType end ); + + template< Algorithms::PrefixSumType Type, typename Vector, typename Flags > + static void segmentedPrefixSum( Vector& v, + Flags& f, + const typename Vector::IndexType begin, + const typename Vector::IndexType end ); }; } // namespace Algorithms diff --git a/src/TNL/Containers/Algorithms/CommonVectorOperations.hpp b/src/TNL/Containers/Algorithms/CommonVectorOperations.hpp index 62630863b..a9e9161cf 100644 --- a/src/TNL/Containers/Algorithms/CommonVectorOperations.hpp +++ b/src/TNL/Containers/Algorithms/CommonVectorOperations.hpp @@ -372,12 +372,13 @@ getScalarProduct( const Vector1& v1, } template< typename Device > -template< typename Vector > +template< Algorithms::PrefixSumType Type, + typename Vector > void CommonVectorOperations< Device >:: -computePrefixSum( Vector& v, - typename Vector::IndexType begin, - typename Vector::IndexType end ) +prefixSum( Vector& v, + typename Vector::IndexType begin, + typename Vector::IndexType end ) { using RealType = typename Vector::RealType; using IndexType = typename Vector::IndexType; @@ -385,16 +386,17 @@ computePrefixSum( Vector& v, auto reduction = [=] __cuda_callable__ ( RealType& a, const RealType& b ) { a += b; }; auto volatileReduction = [=] __cuda_callable__ ( volatile RealType& a, volatile RealType& b ) { a += b; }; - PrefixSum< Device >::inclusive( v, begin, end, reduction, volatileReduction, ( RealType ) 0.0 ); + PrefixSum< Device, Type >::perform( v, begin, end, reduction, volatileReduction, ( RealType ) 0.0 ); } template< typename Device > - template< typename Vector > + template< Algorithms::PrefixSumType Type, typename Vector, typename Flags > void CommonVectorOperations< Device >:: -computeExclusivePrefixSum( Vector& v, - typename Vector::IndexType begin, - typename Vector::IndexType end ) +segmentedPrefixSum( Vector& v, + Flags& f, + typename Vector::IndexType begin, + typename Vector::IndexType end ) { using RealType = typename Vector::RealType; using IndexType = typename Vector::IndexType; @@ -402,43 +404,7 @@ computeExclusivePrefixSum( Vector& v, auto reduction = [=] __cuda_callable__ ( RealType& a, const RealType& b ) { a += b; }; auto volatileReduction = [=] __cuda_callable__ ( volatile RealType& a, volatile RealType& b ) { a += b; }; - PrefixSum< Device >::exclusive( v, begin, end, reduction, volatileReduction, ( RealType ) 0.0 ); -} - -template< typename Device > - template< typename Vector, typename Flags > -void -CommonVectorOperations< Device >:: -computeSegmentedPrefixSum( Vector& v, - Flags& f, - typename Vector::IndexType begin, - typename Vector::IndexType end ) -{ - using RealType = typename Vector::RealType; - using IndexType = typename Vector::IndexType; - - auto reduction = [=] __cuda_callable__ ( RealType& a, const RealType& b ) { a += b; }; - auto volatileReduction = [=] __cuda_callable__ ( volatile RealType& a, volatile RealType& b ) { a += b; }; - - PrefixSum< Device >::inclusiveSegmented( v, f, begin, end, reduction, volatileReduction, ( RealType ) 0.0 ); -} - -template< typename Device > - template< typename Vector, typename Flags > -void -CommonVectorOperations< Device >:: -computeExclusiveSegmentedPrefixSum( Vector& v, - Flags& f, - typename Vector::IndexType begin, - typename Vector::IndexType end ) -{ - using RealType = typename Vector::RealType; - using IndexType = typename Vector::IndexType; - - auto reduction = [=] __cuda_callable__ ( RealType& a, const RealType& b ) { a += b; }; - auto volatileReduction = [=] __cuda_callable__ ( volatile RealType& a, volatile RealType& b ) { a += b; }; - - PrefixSum< Device >::exclusiveSegmented( v, f, begin, end, reduction, volatileReduction, ( RealType ) 0.0 ); + SegmentedPrefixSum< Device, Type >::perform( v, f, begin, end, reduction, volatileReduction, ( RealType ) 0.0 ); } diff --git a/src/TNL/Containers/Algorithms/CudaPrefixSumKernel.h b/src/TNL/Containers/Algorithms/CudaPrefixSumKernel.h index 966fe2d74..993280d19 100644 --- a/src/TNL/Containers/Algorithms/CudaPrefixSumKernel.h +++ b/src/TNL/Containers/Algorithms/CudaPrefixSumKernel.h @@ -1,5 +1,5 @@ /*************************************************************************** - cuda-prefix-sum.h - description + CudaPrefixSumKernel.h - description ------------------- begin : Jan 18, 2014 copyright : (C) 2014 by Tomas Oberhuber @@ -22,16 +22,6 @@ namespace TNL { namespace Containers { namespace Algorithms { -enum class PrefixSumType { - exclusive, - inclusive -}; - -enum class PrefixSumSegmentation { - nonsegmented, - segmented -}; - #ifdef HAVE_CUDA template< typename Real, @@ -62,7 +52,7 @@ cudaFirstPhaseBlockPrefixSum( const PrefixSumType prefixSumType, */ const Index blockOffset = blockIdx.x * elementsInBlock; Index idx = threadIdx.x; - if( prefixSumType == PrefixSumType::exclusive ) + if( prefixSumType == PrefixSumType::Exclusive ) { if( idx == 0 ) sharedData[ 0 ] = zero; @@ -155,7 +145,7 @@ cudaFirstPhaseBlockPrefixSum( const PrefixSumType prefixSumType, if( threadIdx.x == 0 ) { - if( prefixSumType == PrefixSumType::exclusive ) + if( prefixSumType == PrefixSumType::Exclusive ) { Real aux = zero; operation( aux, sharedData[ Devices::Cuda::getInterleaving( lastElementInBlock - 1 ) ] ); @@ -181,8 +171,6 @@ cudaSecondPhaseBlockPrefixSum( Operation operation, if( blockIdx.x > 0 ) { const Real shift = auxArray[ blockIdx.x - 1 ]; - //operation( gridShift, shift ); //auxArray[ blockIdx.x - 1 ] ); - const Index readOffset = blockIdx.x * elementsInBlock; Index readIdx = threadIdx.x; while( readIdx < elementsInBlock && readOffset + readIdx < size ) @@ -194,7 +182,6 @@ cudaSecondPhaseBlockPrefixSum( Operation operation, } template< PrefixSumType prefixSumType, - PrefixSumSegmentation segmentation, typename Real, typename Index > struct CudaPrefixSumKernelLauncher @@ -255,7 +242,7 @@ struct CudaPrefixSumKernelLauncher */ Real gridShift2 = zero; if( numberOfBlocks > 1 ) - cudaRecursivePrefixSum( PrefixSumType::inclusive, + cudaRecursivePrefixSum( PrefixSumType::Inclusive, operation, volatileOperation, zero, diff --git a/src/TNL/Containers/Algorithms/PrefixSum.h b/src/TNL/Containers/Algorithms/PrefixSum.h index 36f01fbb1..715d6f1b9 100644 --- a/src/TNL/Containers/Algorithms/PrefixSum.h +++ b/src/TNL/Containers/Algorithms/PrefixSum.h @@ -15,147 +15,91 @@ #include #include #include +#include namespace TNL { namespace Containers { namespace Algorithms { -template< typename Device > -class PrefixSum -{ -}; - -template<> -class PrefixSum< Devices::Host > -{ - public: - template< typename Vector, - typename PrefixSumOperation, - typename VolatilePrefixSumOperation > - static void - inclusive( Vector& v, - const typename Vector::IndexType begin, - const typename Vector::IndexType end, - PrefixSumOperation& reduction, - VolatilePrefixSumOperation& volatilePrefixSum, - const typename Vector::RealType& zero ); +template< typename Device, + PrefixSumType Type = PrefixSumType::Inclusive > +class PrefixSum {}; - template< typename Vector, - typename PrefixSumOperation, - typename VolatilePrefixSumOperation > - static void - exclusive( Vector& v, - const typename Vector::IndexType begin, - const typename Vector::IndexType end, - PrefixSumOperation& reduction, - VolatilePrefixSumOperation& volatilePrefixSum, - const typename Vector::RealType& zero ); +template< typename Device, + PrefixSumType Type = PrefixSumType::Inclusive > +class SegmentedPrefixSum {}; - template< typename Vector, - typename FlagsArray, - typename PrefixSumOperation, - typename VolatilePrefixSumOperation > - static void - inclusiveSegmented( Vector& v, - FlagsArray& f, - const typename Vector::IndexType begin, - const typename Vector::IndexType end, - PrefixSumOperation& reduction, - VolatilePrefixSumOperation& volatilePrefixSum, - const typename Vector::RealType& zero ); +template< PrefixSumType Type > +class PrefixSum< Devices::Host, Type > +{ + public: template< typename Vector, - typename FlagsArray, typename PrefixSumOperation, typename VolatilePrefixSumOperation > static void - exclusiveSegmented( Vector& v, - FlagsArray& f, - const typename Vector::IndexType begin, - const typename Vector::IndexType end, - PrefixSumOperation& reduction, - VolatilePrefixSumOperation& volatilePrefixSum, - const typename Vector::RealType& zero ); + perform( Vector& v, + const typename Vector::IndexType begin, + const typename Vector::IndexType end, + PrefixSumOperation& reduction, + VolatilePrefixSumOperation& volatilePrefixSum, + const typename Vector::RealType& zero ); }; -template<> -class PrefixSum< Devices::Cuda > +template< PrefixSumType Type > +class PrefixSum< Devices::Cuda, Type > { public: template< typename Vector, typename PrefixSumOperation, typename VolatilePrefixSumOperation > static void - inclusive( Vector& v, - const typename Vector::IndexType begin, - const typename Vector::IndexType end, - PrefixSumOperation& reduction, - VolatilePrefixSumOperation& volatilePrefixSum, - const typename Vector::RealType& zero ); - - template< typename Vector, - typename PrefixSumOperation, - typename VolatilePrefixSumOperation > - static void - exclusive( Vector& v, - const typename Vector::IndexType begin, - const typename Vector::IndexType end, - PrefixSumOperation& reduction, - VolatilePrefixSumOperation& volatilePrefixSum, - const typename Vector::RealType& zero ); - - template< typename Vector, - typename FlagsArray, - typename PrefixSumOperation, - typename VolatilePrefixSumOperation > - static void - inclusiveSegmented( Vector& v, - FlagsArray& f, - const typename Vector::IndexType begin, - const typename Vector::IndexType end, - PrefixSumOperation& reduction, - VolatilePrefixSumOperation& volatilePrefixSum, - const typename Vector::RealType& zero ); + perform( Vector& v, + const typename Vector::IndexType begin, + const typename Vector::IndexType end, + PrefixSumOperation& reduction, + VolatilePrefixSumOperation& volatilePrefixSum, + const typename Vector::RealType& zero ); +}; +template< PrefixSumType Type > +class SegmentedPrefixSum< Devices::Host, Type > +{ + public: template< typename Vector, - typename FlagsArray, typename PrefixSumOperation, - typename VolatilePrefixSumOperation > + typename VolatilePrefixSumOperation, + typename Flags > static void - exclusiveSegmented( Vector& v, - FlagsArray& f, - const typename Vector::IndexType begin, - const typename Vector::IndexType end, - PrefixSumOperation& reduction, - VolatilePrefixSumOperation& volatilePrefixSum, - const typename Vector::RealType& zero ); + perform( Vector& v, + Flags& flags, + const typename Vector::IndexType begin, + const typename Vector::IndexType end, + PrefixSumOperation& reduction, + VolatilePrefixSumOperation& volatilePrefixSum, + const typename Vector::RealType& zero ); }; -template<> -class PrefixSum< Devices::MIC > +template< PrefixSumType Type > +class SegmentedPrefixSum< Devices::Cuda, Type > { public: - template< typename Index, - typename Result, - typename PrefixSumOperation, - typename VolatilePrefixSumOperation > - static Result - inclusive( const Index size, - PrefixSumOperation& reduction, - VolatilePrefixSumOperation& volatilePrefixSum, - const Result& zero ); - - template< typename Index, - typename Result, + template< typename Vector, typename PrefixSumOperation, - typename VolatilePrefixSumOperation > - static Result - exclusive( const Index size, - PrefixSumOperation& reduction, - VolatilePrefixSumOperation& volatilePrefixSum, - const Result& zero ); + typename VolatilePrefixSumOperation, + typename Flags > + static void + perform( Vector& v, + Flags& flags, + const typename Vector::IndexType begin, + const typename Vector::IndexType end, + PrefixSumOperation& reduction, + VolatilePrefixSumOperation& volatilePrefixSum, + const typename Vector::RealType& zero ); }; + + } // namespace Algorithms } // namespace Containers } // namespace TNL diff --git a/src/TNL/Containers/Algorithms/PrefixSum.hpp b/src/TNL/Containers/Algorithms/PrefixSum.hpp index 58906b20f..7b6ced47a 100644 --- a/src/TNL/Containers/Algorithms/PrefixSum.hpp +++ b/src/TNL/Containers/Algorithms/PrefixSum.hpp @@ -40,122 +40,59 @@ static constexpr int PrefixSum_minGpuDataSize = 256;//65536; //16384;//1024;//25 //// // PrefixSum on host +template< PrefixSumType Type > template< typename Vector, typename PrefixSumOperation, typename VolatilePrefixSumOperation > void -PrefixSum< Devices::Host >:: -inclusive( Vector& v, - const typename Vector::IndexType begin, - const typename Vector::IndexType end, - PrefixSumOperation& reduction, - VolatilePrefixSumOperation& volatilePrefixSum, - const typename Vector::RealType& zero ) +PrefixSum< Devices::Host, Type >:: +perform( Vector& v, + const typename Vector::IndexType begin, + const typename Vector::IndexType end, + PrefixSumOperation& reduction, + VolatilePrefixSumOperation& volatilePrefixSum, + const typename Vector::RealType& zero ) { - using IndexType = typename Vector::IndexType; - - // TODO: parallelize with OpenMP - for( IndexType i = begin + 1; i < end; i++ ) - reduction( v[ i ], v[ i - 1 ] ); -} - -template< typename Vector, - typename PrefixSumOperation, - typename VolatilePrefixSumOperation > -void -PrefixSum< Devices::Host >:: -exclusive( Vector& v, - const typename Vector::IndexType begin, - const typename Vector::IndexType end, - PrefixSumOperation& reduction, - VolatilePrefixSumOperation& volatilePrefixSum, - const typename Vector::RealType& zero ) -{ - using IndexType = typename Vector::IndexType; using RealType = typename Vector::RealType; - - // TODO: parallelize with OpenMP - RealType aux( v[ begin ] ); - v[ begin ] = zero; - for( IndexType i = begin + 1; i < end; i++ ) - { - RealType x = v[ i ]; - v[ i ] = aux; - reduction( aux, x ); - } -} - -template< typename Vector, - typename FlagsArray, - typename PrefixSumOperation, - typename VolatilePrefixSumOperation > -void -PrefixSum< Devices::Host >:: -inclusiveSegmented( Vector& v, - FlagsArray& f, - const typename Vector::IndexType begin, - const typename Vector::IndexType end, - PrefixSumOperation& reduction, - VolatilePrefixSumOperation& volatilePrefixSum, - const typename Vector::RealType& zero ) -{ using IndexType = typename Vector::IndexType; // TODO: parallelize with OpenMP - for( IndexType i = begin + 1; i < end; i++ ) - if( ! f[ i ] ) + if( Type == PrefixSumType::Inclusive ) + for( IndexType i = begin + 1; i < end; i++ ) reduction( v[ i ], v[ i - 1 ] ); -} - -template< typename Vector, - typename FlagsArray, - typename PrefixSumOperation, - typename VolatilePrefixSumOperation > -void -PrefixSum< Devices::Host >:: -exclusiveSegmented( Vector& v, - FlagsArray& f, - const typename Vector::IndexType begin, - const typename Vector::IndexType end, - PrefixSumOperation& reduction, - VolatilePrefixSumOperation& volatilePrefixSum, - const typename Vector::RealType& zero ) -{ - using IndexType = typename Vector::IndexType; - using RealType = typename Vector::RealType; - - // TODO: parallelize with OpenMP - RealType aux( v[ begin ] ); - v[ begin ] = zero; - for( IndexType i = begin + 1; i < end; i++ ) + else // Exclusive prefix sum { - RealType x = v[ i ]; - if( f[ i ] ) - aux = zero; - v[ i ] = aux; - reduction( aux, x ); + RealType aux( v[ begin ] ); + v[ begin ] = zero; + for( IndexType i = begin + 1; i < end; i++ ) + { + RealType x = v[ i ]; + v[ i ] = aux; + reduction( aux, x ); + } } } //// // PrefixSum on CUDA device +template< PrefixSumType Type > template< typename Vector, typename PrefixSumOperation, typename VolatilePrefixSumOperation > void -PrefixSum< Devices::Cuda >:: -inclusive( Vector& v, - const typename Vector::IndexType begin, - const typename Vector::IndexType end, - PrefixSumOperation& reduction, - VolatilePrefixSumOperation& volatileReduction, - const typename Vector::RealType& zero ) +PrefixSum< Devices::Cuda, Type >:: +perform( Vector& v, + const typename Vector::IndexType begin, + const typename Vector::IndexType end, + PrefixSumOperation& reduction, + VolatilePrefixSumOperation& volatileReduction, + const typename Vector::RealType& zero ) { using RealType = typename Vector::RealType; using IndexType = typename Vector::IndexType; using IndexType = typename Vector::IndexType; #ifdef HAVE_CUDA - CudaPrefixSumKernelLauncher< PrefixSumType::inclusive, PrefixSumSegmentation::nonsegmented, RealType, IndexType >::start( + CudaPrefixSumKernelLauncher< Type, RealType, IndexType >::start( ( IndexType ) ( end - begin ), ( IndexType ) 256, &v[ begin ], @@ -166,71 +103,83 @@ inclusive( Vector& v, #endif } -template< typename Vector, - typename PrefixSumOperation, - typename VolatilePrefixSumOperation > + +//// +// PrefixSum on host +template< PrefixSumType Type > + template< typename Vector, + typename PrefixSumOperation, + typename VolatilePrefixSumOperation, + typename Flags > void -PrefixSum< Devices::Cuda >:: -exclusive( Vector& v, - const typename Vector::IndexType begin, - const typename Vector::IndexType end, - PrefixSumOperation& reduction, - VolatilePrefixSumOperation& volatileReduction, - const typename Vector::RealType& zero ) +SegmentedPrefixSum< Devices::Host, Type >:: +perform( Vector& v, + Flags& flags, + const typename Vector::IndexType begin, + const typename Vector::IndexType end, + PrefixSumOperation& reduction, + VolatilePrefixSumOperation& volatilePrefixSum, + const typename Vector::RealType& zero ) { - using IndexType = typename Vector::IndexType; using RealType = typename Vector::RealType; + using IndexType = typename Vector::IndexType; + + // TODO: parallelize with OpenMP + if( Type == PrefixSumType::Inclusive ) + { + for( IndexType i = begin + 1; i < end; i++ ) + if( ! flags[ i ] ) + reduction( v[ i ], v[ i - 1 ] ); + } + else // Exclusive prefix sum + { + RealType aux( v[ begin ] ); + v[ begin ] = zero; + for( IndexType i = begin + 1; i < end; i++ ) + { + RealType x = v[ i ]; + if( flags[ i ] ) + aux = zero; + v[ i ] = aux; + reduction( aux, x ); + } + } +} + +//// +// PrefixSum on CUDA device +template< PrefixSumType Type > + template< typename Vector, + typename PrefixSumOperation, + typename VolatilePrefixSumOperation, + typename Flags > +void +SegmentedPrefixSum< Devices::Cuda, Type >:: +perform( Vector& v, + Flags& flags, + const typename Vector::IndexType begin, + const typename Vector::IndexType end, + PrefixSumOperation& reduction, + VolatilePrefixSumOperation& volatileReduction, + const typename Vector::RealType& zero ) +{ using RealType = typename Vector::RealType; using IndexType = typename Vector::IndexType; using IndexType = typename Vector::IndexType; #ifdef HAVE_CUDA - CudaPrefixSumKernelLauncher< PrefixSumType::exclusive, PrefixSumSegmentation::nonsegmented, RealType, IndexType>::start( + throw 0; // NOT IMPLEMENTED YET + /*CudaPrefixSumKernelLauncher< Type, RealType, IndexType >::start( ( IndexType ) ( end - begin ), ( IndexType ) 256, &v[ begin ], &v[ begin ], reduction, volatileReduction, - zero ); + zero );*/ #endif } -template< typename Vector, - typename FlagsArray, - typename PrefixSumOperation, - typename VolatilePrefixSumOperation > -void -PrefixSum< Devices::Cuda >:: -inclusiveSegmented( Vector& v, - FlagsArray& f, - const typename Vector::IndexType begin, - const typename Vector::IndexType end, - PrefixSumOperation& reduction, - VolatilePrefixSumOperation& volatilePrefixSum, - const typename Vector::RealType& zero ) -{ - using IndexType = typename Vector::IndexType; - -} -template< typename Vector, - typename FlagsArray, - typename PrefixSumOperation, - typename VolatilePrefixSumOperation > -void -PrefixSum< Devices::Cuda >:: -exclusiveSegmented( Vector& v, - FlagsArray& f, - const typename Vector::IndexType begin, - const typename Vector::IndexType end, - PrefixSumOperation& reduction, - VolatilePrefixSumOperation& volatilePrefixSum, - const typename Vector::RealType& zero ) -{ - using IndexType = typename Vector::IndexType; - using RealType = typename Vector::RealType; - -} } // namespace Algorithms } // namespace Containers diff --git a/src/TNL/Containers/Vector.h b/src/TNL/Containers/Vector.h index 2aa5e60ff..6588b132d 100644 --- a/src/TNL/Containers/Vector.h +++ b/src/TNL/Containers/Vector.h @@ -12,6 +12,7 @@ #include #include +#include namespace TNL { namespace Containers { @@ -362,15 +363,6 @@ public: const Scalar2 multiplicator2, const Scalar3 thisMultiplicator = 1.0 ); - /** - * \brief Returns specific sums of elements of this vector. - * - * Goes in order from the first element to the last one and for every element - * in this vector computes sum of all previous elements including the element. - * Therefore this method returns a new vector with the length of this vector. - */ - void computePrefixSum(); - /** * \brief Returns specific sums of elements of this vector. * @@ -382,41 +374,21 @@ public: * \param begin Index of the element in this vector which to begin with. * \param end Index of the element in this vector which to end with. */ - void computePrefixSum( const IndexType begin, const IndexType end ); - - /** - * \brief Returns specific sums of elements of this vector. - * - * Goes in order from the first element to the last one and for every element - * in this vector computes sum of all previous elements excluding the element. - * Therefore returns a new vector with the length of this vector. - */ - void computeExclusivePrefixSum(); - - /** - * \brief Returns specific sums of elements of this vector. - * - * Does the same as \ref computeExclusivePrefixSum, but computes only sums for elements - * with the index in range from \e begin to \e end. The other elements of this - * vector remain untouched - with the same value. Therefore this method returns - * a new vector with the length of this vector. - * - * \param begin Index of the element in this vector which to begin with. - * \param end Index of the element in this vector which to end with. - */ - void computeExclusivePrefixSum( const IndexType begin, const IndexType end ); - - template< typename FlagsArray > - void computeSegmentedPrefixSum( FlagsArray& flags ); + template< Algorithms::PrefixSumType Type = Algorithms::PrefixSumType::Inclusive > + void prefixSum( const IndexType begin = - 1, const IndexType end = -1 ); - template< typename FlagsArray > - void computeSegmentedPrefixSum( FlagsArray& flags, const IndexType begin, const IndexType end ); + template< Algorithms::PrefixSumType Type = Algorithms::PrefixSumType::Inclusive, + typename FlagsArray > + void segmentedPrefixSum( FlagsArray& flags, const IndexType begin = -1, const IndexType end = -1 ); - template< typename FlagsArray > - void computeSegmentedExclusivePrefixSum( FlagsArray& flags ); + template< Algorithms::PrefixSumType Type = Algorithms::PrefixSumType::Inclusive, + typename VectorExpression > + void prefixSum( const VectorExpression& expression, const IndexType begin = - 1, const IndexType end = -1 ); - template< typename FlagsArray > - void computeSegmentedExclusivePrefixSum( FlagsArray& flags, const IndexType begin, const IndexType end ); + template< Algorithms::PrefixSumType Type = Algorithms::PrefixSumType::Inclusive, + typename VectorExpression, + typename FlagsArray > + void segmentedPrefixSum( const VectorExpression& expression, FlagsArray& flags, const IndexType begin = -1, const IndexType end = -1 ); }; } // namespace Containers diff --git a/src/TNL/Containers/Vector.hpp b/src/TNL/Containers/Vector.hpp index a8a6edc1e..1cc7e89bf 100644 --- a/src/TNL/Containers/Vector.hpp +++ b/src/TNL/Containers/Vector.hpp @@ -481,87 +481,56 @@ addVectors( const Vector1& v1, template< typename Real, typename Device, typename Index > -void Vector< Real, Device, Index >::computePrefixSum() -{ - Algorithms::VectorOperations< Device >::computePrefixSum( *this, 0, this->getSize() ); -} - -template< typename Real, - typename Device, - typename Index > + template< Algorithms::PrefixSumType Type > void Vector< Real, Device, Index >:: -computePrefixSum( const IndexType begin, - const IndexType end ) -{ - Algorithms::VectorOperations< Device >::computePrefixSum( *this, begin, end ); -} - -template< typename Real, - typename Device, - typename Index > -void Vector< Real, Device, Index >::computeExclusivePrefixSum() +prefixSum( const IndexType begin, const IndexType end ) { - Algorithms::VectorOperations< Device >::computeExclusivePrefixSum( *this, 0, this->getSize() ); + if( begin == -1 && end == -1 ) + Algorithms::VectorOperations< Device >::template prefixSum< Type >( *this, 0, this->getSize() ); + else + Algorithms::VectorOperations< Device >::template prefixSum< Type >( *this, begin, end ); } template< typename Real, typename Device, typename Index > + template< Algorithms::PrefixSumType Type, + typename FlagsArray > void Vector< Real, Device, Index >:: -computeExclusivePrefixSum( const IndexType begin, - const IndexType end ) +segmentedPrefixSum( FlagsArray& flags, const IndexType begin, const IndexType end ) { - Algorithms::VectorOperations< Device >::computeExclusivePrefixSum( *this, begin, end ); -} + if( begin == -1 && end == -1 ) + Algorithms::VectorOperations< Device >::template segmentedPrefixSum< Type >( *this, flags, 0, this->getSize() ); + else + Algorithms::VectorOperations< Device >::template SegmentedPrefixSum< Type >( *this, flags, begin, end ); -template< typename Real, - typename Device, - typename Index > - template< typename FlagsArray > -void -Vector< Real, Device, Index >:: -computeSegmentedPrefixSum( FlagsArray& flags ) -{ - Algorithms::VectorOperations< Device >::computeSegmentedPrefixSum( *this, flags, 0, this->getSize() ); } template< typename Real, typename Device, typename Index > - template< typename FlagsArray > + template< Algorithms::PrefixSumType Type, + typename VectorExpression > void Vector< Real, Device, Index >:: -computeSegmentedPrefixSum( FlagsArray& flags, - const IndexType begin, - const IndexType end ) +prefixSum( const VectorExpression& expression, const IndexType begin, const IndexType end ) { - Algorithms::VectorOperations< Device >::computeSegmentedPrefixSum( *this, flags, begin, end ); -} -template< typename Real, - typename Device, - typename Index > - template< typename FlagsArray > -void -Vector< Real, Device, Index >:: -computeSegmentedExclusivePrefixSum( FlagsArray& flags ) -{ - Algorithms::VectorOperations< Device >::computeSegmentedExclusivePrefixSum( *this, flags, 0, this->getSize() ); } template< typename Real, typename Device, typename Index > - template< typename FlagsArray > + template< Algorithms::PrefixSumType Type, + typename VectorExpression, + typename FlagsArray > void Vector< Real, Device, Index >:: -computeSegmentedExclusivePrefixSum( FlagsArray& flags, - const IndexType begin, - const IndexType end ) +segmentedPrefixSum( const VectorExpression& expression, FlagsArray& flags, const IndexType begin, const IndexType end ) { - Algorithms::VectorOperations< Device >::computeSegmentedExclusivePrefixSum( *this, flags, begin, end ); + } } // namespace Containers diff --git a/src/TNL/Containers/VectorView.h b/src/TNL/Containers/VectorView.h index 46a10c254..9c6e0956d 100644 --- a/src/TNL/Containers/VectorView.h +++ b/src/TNL/Containers/VectorView.h @@ -14,6 +14,7 @@ #include #include +#include namespace TNL { namespace Containers { @@ -188,25 +189,21 @@ public: Scalar2 multiplicator2, Scalar3 thisMultiplicator = 1.0 ); - void computePrefixSum(); + template< Algorithms::PrefixSumType Type = Algorithms::PrefixSumType::Inclusive > + void prefixSum( const IndexType begin = - 1, const IndexType end = -1 ); - void computePrefixSum( IndexType begin, IndexType end ); + template< Algorithms::PrefixSumType Type = Algorithms::PrefixSumType::Inclusive, + typename FlagsArray > + void segmentedPrefixSum( FlagsArray& flags, const IndexType begin = -1, const IndexType end = -1 ); - void computeExclusivePrefixSum(); + template< Algorithms::PrefixSumType Type = Algorithms::PrefixSumType::Inclusive, + typename VectorExpression > + void prefixSum( const VectorExpression& expression, const IndexType begin = - 1, const IndexType end = -1 ); - void computeExclusivePrefixSum( IndexType begin, IndexType end ); - - template< typename FlagsArray > - void computeSegmentedPrefixSum( FlagsArray& flags ); - - template< typename FlagsArray > - void computeSegmentedPrefixSum( FlagsArray& flags, const IndexType begin, const IndexType end ); - - template< typename FlagsArray > - void computeSegmentedExclusivePrefixSum( FlagsArray& flags ); - - template< typename FlagsArray > - void computeSegmentedExclusivePrefixSum( FlagsArray& flags, const IndexType begin, const IndexType end ); + template< Algorithms::PrefixSumType Type = Algorithms::PrefixSumType::Inclusive, + typename VectorExpression, + typename FlagsArray > + void segmentedPrefixSum( const VectorExpression& expression, FlagsArray& flags, const IndexType begin = -1, const IndexType end = -1 ); }; } // namespace Containers diff --git a/src/TNL/Containers/VectorView_impl.h b/src/TNL/Containers/VectorView_impl.h index 37be6d048..3cb84e03d 100644 --- a/src/TNL/Containers/VectorView_impl.h +++ b/src/TNL/Containers/VectorView_impl.h @@ -396,89 +396,55 @@ addVectors( const Vector1& v1, template< typename Real, typename Device, typename Index > + template< Algorithms::PrefixSumType Type > void VectorView< Real, Device, Index >:: -computePrefixSum() +prefixSum( const IndexType begin, const IndexType end ) { - Algorithms::VectorOperations< Device >::computePrefixSum( *this, 0, this->getSize() ); + if( begin == -1 && end == -1 ) + Algorithms::VectorOperations< Device >::template prefixSum< Type >( *this, 0, this->getSize() ); + else + Algorithms::VectorOperations< Device >::template prefixSum< Type >( *this, begin, end ); } template< typename Real, typename Device, typename Index > + template< Algorithms::PrefixSumType Type, + typename FlagsArray > void VectorView< Real, Device, Index >:: -computePrefixSum( IndexType begin, IndexType end ) +segmentedPrefixSum( FlagsArray& flags, const IndexType begin, const IndexType end ) { - Algorithms::VectorOperations< Device >::computePrefixSum( *this, begin, end ); + if( begin == -1 && end == -1 ) + Algorithms::VectorOperations< Device >::template segmentedPrefixSum< Type >( *this, flags, 0, this->getSize() ); + else + Algorithms::VectorOperations< Device >::template segmentedPrefixSum< Type >( *this, flags, begin, end ); } template< typename Real, typename Device, typename Index > + template< Algorithms::PrefixSumType Type, + typename VectorExpression > void VectorView< Real, Device, Index >:: -computeExclusivePrefixSum() +prefixSum( const VectorExpression& expression, const IndexType begin, const IndexType end ) { - Algorithms::VectorOperations< Device >::computeExclusivePrefixSum( *this, 0, this->getSize() ); -} - -template< typename Real, - typename Device, - typename Index > -void -VectorView< Real, Device, Index >:: -computeExclusivePrefixSum( IndexType begin, IndexType end ) -{ - Algorithms::VectorOperations< Device >::computeExclusivePrefixSum( *this, begin, end ); -} - -template< typename Real, - typename Device, - typename Index > - template< typename FlagsArray > -void -VectorView< Real, Device, Index >:: -computeSegmentedPrefixSum( FlagsArray& flags ) -{ - Algorithms::VectorOperations< Device >::computeSegmentedPrefixSum( *this, flags, 0, this->getSize() ); -} -template< typename Real, - typename Device, - typename Index > - template< typename FlagsArray > -void -VectorView< Real, Device, Index >:: -computeSegmentedPrefixSum( FlagsArray& flags, - const IndexType begin, - const IndexType end ) -{ - Algorithms::VectorOperations< Device >::computeSegmentedPrefixSum( *this, flags, begin, end ); } template< typename Real, typename Device, typename Index > - template< typename FlagsArray > + template< Algorithms::PrefixSumType Type, + typename VectorExpression, + typename FlagsArray > void VectorView< Real, Device, Index >:: -computeSegmentedExclusivePrefixSum( FlagsArray& flags ) +segmentedPrefixSum( const VectorExpression& expression, FlagsArray& flags, const IndexType begin, const IndexType end ) { - Algorithms::VectorOperations< Device >::computeSegmentedExclusivePrefixSum( *this, flags, 0, this->getSize() ); -} -template< typename Real, - typename Device, - typename Index > - template< typename FlagsArray > -void -VectorView< Real, Device, Index >:: -computeSegmentedExclusivePrefixSum( FlagsArray& flags, - const IndexType begin, - const IndexType end ) -{ - Algorithms::VectorOperations< Device >::computeSegmentedExclusivePrefixSum( *this, flags, begin, end ); } } // namespace Containers diff --git a/src/UnitTests/Containers/VectorTest-4.h b/src/UnitTests/Containers/VectorTest-4.h index 825409912..4d332711f 100644 --- a/src/UnitTests/Containers/VectorTest-4.h +++ b/src/UnitTests/Containers/VectorTest-4.h @@ -201,40 +201,40 @@ void Test() v = 1; host_v = 1; - v.computePrefixSum(); - host_v.computePrefixSum(); + v.prefixSum(); + host_v.prefixSum(); host_copy = v; for( IndexType i = 0; i < size; i ++ ) EXPECT_EQ( host_copy[ i ], host_v[ i ] ); setOscilatingLinearSequence( v ); setOscilatingLinearSequence( host_v ); - v.computePrefixSum(); - host_v.computePrefixSum(); + v.prefixSum(); + host_v.prefixSum(); host_copy = v; for( IndexType i = 0; i < size; i ++ ) EXPECT_EQ( host_copy[ i ], host_v[ i ] ); setOscilatingLinearSequence( v ); setOscilatingLinearSequence( host_v ); - v.computeExclusivePrefixSum(); - host_v.computeExclusivePrefixSum(); + v.prefixSum< Algorithms::PrefixSumType::Exclusive >(); + host_v.prefixSum< Algorithms::PrefixSumType::Exclusive >(); host_copy = v; for( IndexType i = 0; i < size; i ++ ) EXPECT_EQ( host_copy[ i ], host_v[ i ] ); setOscilatingConstantSequence( v, 1 ); setOscilatingConstantSequence( host_v, 1 ); - v_view.computePrefixSum(); - host_v_view.computePrefixSum(); + v_view.prefixSum< Algorithms::PrefixSumType::Inclusive >(); + host_v_view.prefixSum< Algorithms::PrefixSumType::Inclusive >(); host_copy = v; for( IndexType i = 0; i < size; i ++ ) EXPECT_EQ( host_copy[ i ], host_v[ i ] ); /*setOscilatingConstantSequence( v, 1 ); setOscilatingConstantSequence( host_v, 1 ); - v_view.computeExclusivePrefixSum(); - host_v_view.computeExclusivePrefixSum(); + v_view.prefixSum< Algorithms::PrefixSumType::Exclusive >(); + host_v_view.prefixSum< Algorithms::PrefixSumType::Exclusive >(); host_copy = v; for( IndexType i = 0; i < size; i ++ ) EXPECT_EQ( host_copy[ i ], host_v[ i ] ); @@ -254,32 +254,32 @@ TYPED_TEST( VectorTest, exclusivePrefixSum ) ViewType v_view( v ); setConstantSequence( v, 1 ); - v.computeExclusivePrefixSum(); + v.template prefixSum< Algorithms::PrefixSumType::Exclusive >(); for( int i = 0; i < size; i++ ) EXPECT_EQ( v.getElement( i ), i ); v.setValue( 0 ); - v.computeExclusivePrefixSum(); + v.template prefixSum< Algorithms::PrefixSumType::Exclusive >(); for( int i = 0; i < size; i++ ) EXPECT_EQ( v.getElement( i ), 0 ); setLinearSequence( v ); - v.computeExclusivePrefixSum(); + v.template prefixSum< Algorithms::PrefixSumType::Exclusive >(); for( int i = 1; i < size; i++ ) EXPECT_EQ( v.getElement( i ) - v.getElement( i - 1 ), i - 1 ); setConstantSequence( v, 1 ); - v_view.computeExclusivePrefixSum(); + v_view.template prefixSum< Algorithms::PrefixSumType::Exclusive >(); for( int i = 0; i < size; i++ ) EXPECT_EQ( v.getElement( i ), i ); v.setValue( 0 ); - v_view.computeExclusivePrefixSum(); + v_view.template prefixSum< Algorithms::PrefixSumType::Exclusive >(); for( int i = 0; i < size; i++ ) EXPECT_EQ( v.getElement( i ), 0 ); setLinearSequence( v ); - v_view.computeExclusivePrefixSum(); + v_view.template prefixSum< Algorithms::PrefixSumType::Exclusive >(); for( int i = 1; i < size; i++ ) EXPECT_EQ( v.getElement( i ) - v.getElement( i - 1 ), i - 1 ); } -- GitLab From 6f0209ca520e286ac06f3738205a9d987a9d7a67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Thu, 6 Jun 2019 20:52:27 +0200 Subject: [PATCH 55/93] Removing fetch from prefix sum which leads to templated recursive call and compiler crash. Since this is not important feature I do not implement it now. --- src/TNL/Containers/Algorithms/PrefixSumType.h | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 src/TNL/Containers/Algorithms/PrefixSumType.h diff --git a/src/TNL/Containers/Algorithms/PrefixSumType.h b/src/TNL/Containers/Algorithms/PrefixSumType.h new file mode 100644 index 000000000..ba03adfe6 --- /dev/null +++ b/src/TNL/Containers/Algorithms/PrefixSumType.h @@ -0,0 +1,24 @@ +/*************************************************************************** + PrefixSumType.h - description + ------------------- + begin : Jun 6, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +namespace TNL { +namespace Containers { +namespace Algorithms { + +enum class PrefixSumType { + Exclusive, + Inclusive +}; + +} // namespace Algorithms +} // namespace Containers +} // namespace TNL -- GitLab From fc615e79701f8d3176e98f468cdc26183b366592 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Thu, 6 Jun 2019 21:21:24 +0200 Subject: [PATCH 56/93] Adding CUDA prefix sum unit test. --- .../Algorithms/CudaPrefixSumKernel.h | 4 +-- .../Containers/Algorithms/CMakeLists.txt | 6 ++++ .../Algorithms/CudaPrefixSumTest.cu | 32 +++++++++++++++++++ 3 files changed, 40 insertions(+), 2 deletions(-) create mode 100644 src/UnitTests/Containers/Algorithms/CMakeLists.txt create mode 100644 src/UnitTests/Containers/Algorithms/CudaPrefixSumTest.cu diff --git a/src/TNL/Containers/Algorithms/CudaPrefixSumKernel.h b/src/TNL/Containers/Algorithms/CudaPrefixSumKernel.h index 993280d19..8f3bd852b 100644 --- a/src/TNL/Containers/Algorithms/CudaPrefixSumKernel.h +++ b/src/TNL/Containers/Algorithms/CudaPrefixSumKernel.h @@ -274,7 +274,7 @@ struct CudaPrefixSumKernelLauncher /**** * \brief Starts prefix sum in CUDA. * - * \tparam Operation operation to be peformed on particular elements - addition usually + * \tparam Operation operation to be performed on particular elements - addition usually * \tparam VolatileOperation - volatile version of Operation * \param size is number of elements to be scanned * \param blockSize is CUDA block size @@ -282,7 +282,7 @@ struct CudaPrefixSumKernelLauncher * \param deviceOutput is pointer to resulting array, can be the same as input * \param operation is instance of Operation * \param volatileOperation is instance of VolatileOperation - * \param zero is neutral element for fiven Operation + * \param zero is neutral element for given Operation */ template< typename Operation, typename VolatileOperation > diff --git a/src/UnitTests/Containers/Algorithms/CMakeLists.txt b/src/UnitTests/Containers/Algorithms/CMakeLists.txt new file mode 100644 index 000000000..1a5033f45 --- /dev/null +++ b/src/UnitTests/Containers/Algorithms/CMakeLists.txt @@ -0,0 +1,6 @@ +IF( BUILD_CUDA ) + CUDA_ADD_EXECUTABLE( CudaPrefixSumTest CudaPrefixSumTest.cu + OPTIONS ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( CudaPrefixSumTest ${GTEST_BOTH_LIBRARIES} ) + ADD_TEST( CudaPrefixSumTest ${EXECUTABLE_OUTPUT_PATH}/CudaPrefixSumTest${CMAKE_EXECUTABLE_SUFFIX} ) +ENDIF() diff --git a/src/UnitTests/Containers/Algorithms/CudaPrefixSumTest.cu b/src/UnitTests/Containers/Algorithms/CudaPrefixSumTest.cu new file mode 100644 index 000000000..f036904dd --- /dev/null +++ b/src/UnitTests/Containers/Algorithms/CudaPrefixSumTest.cu @@ -0,0 +1,32 @@ +/*************************************************************************** + CudaPrefixSumTest.cu - description + ------------------- + begin : Jun 6, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +///// +// NOTE: This test serves mainly for testing CUDA prefix sum when more than +// one CUDA grid is used. To avoid allocation of extremely large vectors and to +// speed-up this unit test, we decrease the grid size artificialy. + +#pragma once + +#ifdef HAVE_GTEST +#include + +#include +#include +#include +#include "VectorTestSetup.h" + +#include "gtest/gtest.h" + +using namespace TNL; +using namespace TNL::Containers; +using namespace TNL::Containers::Algorithms; +using namespace TNL::Arithmetics; + -- GitLab From 87528b48b0df4bb63006a4e13653247bf2cc982f Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Fri, 7 Jun 2019 14:56:02 +0200 Subject: [PATCH 57/93] Prefix sum revision is finished. Segmented prefix sum for CUDA is not implemented yet. --- .../Algorithms/CommonVectorOperations.h | 2 + .../Algorithms/CudaPrefixSumKernel.h | 36 +++- src/TNL/Matrices/BiEllpack_impl.h | 2 +- src/TNL/Matrices/CSR_impl.h | 2 +- src/TNL/Matrices/ChunkedEllpack_impl.h | 2 +- .../Matrices/SlicedEllpackSymmetric_impl.h | 2 +- src/TNL/Matrices/SlicedEllpack_impl.h | 2 +- .../Containers/Algorithms/CMakeLists.txt | 6 - .../Algorithms/CudaPrefixSumTest.cu | 32 --- src/UnitTests/Containers/VectorTest-4.h | 183 ++++++++++-------- 10 files changed, 140 insertions(+), 129 deletions(-) delete mode 100644 src/UnitTests/Containers/Algorithms/CMakeLists.txt delete mode 100644 src/UnitTests/Containers/Algorithms/CudaPrefixSumTest.cu diff --git a/src/TNL/Containers/Algorithms/CommonVectorOperations.h b/src/TNL/Containers/Algorithms/CommonVectorOperations.h index 199bc63c4..20d6ef16c 100644 --- a/src/TNL/Containers/Algorithms/CommonVectorOperations.h +++ b/src/TNL/Containers/Algorithms/CommonVectorOperations.h @@ -10,6 +10,8 @@ #pragma once +#include + namespace TNL { namespace Containers { namespace Algorithms { diff --git a/src/TNL/Containers/Algorithms/CudaPrefixSumKernel.h b/src/TNL/Containers/Algorithms/CudaPrefixSumKernel.h index 8f3bd852b..40a662b08 100644 --- a/src/TNL/Containers/Algorithms/CudaPrefixSumKernel.h +++ b/src/TNL/Containers/Algorithms/CudaPrefixSumKernel.h @@ -300,7 +300,7 @@ struct CudaPrefixSumKernelLauncher */ const Index elementsInBlock = 8 * blockSize; const Index numberOfBlocks = roundUpDivision( size, elementsInBlock ); - const auto maxGridSize = 3; //Devices::Cuda::getMaxGridSize(); + //const auto maxGridSize = 3; //Devices::Cuda::getMaxGridSize(); const Index numberOfGrids = Devices::Cuda::getNumberOfGrids( numberOfBlocks, maxGridSize ); Real gridShift = zero; //std::cerr << "numberOfgrids = " << numberOfGrids << std::endl; @@ -331,8 +331,42 @@ struct CudaPrefixSumKernelLauncher &deviceOutput[ gridOffset ] ); TNL_CHECK_CUDA_DEVICE; } + + /*** + * Store the number of CUDA grids for a purpose of unit testing, i.e. + * to check if we test the algorithm with more than one CUDA grid. + */ + gridsCount = numberOfGrids; } + + /**** + * The following serves for setting smaller maxGridSize so that we can force + * the prefix sum in CUDA to run with more the one grids in unit tests. + */ + static void setMaxGridSize( int newMaxGridSize ) { + maxGridSize = newMaxGridSize; + } + + static void resetMaxGridSize() { + maxGridSize = Devices::Cuda::getMaxGridSize(); + } + + static int maxGridSize; + + static int gridsCount; }; + +template< PrefixSumType prefixSumType, + typename Real, + typename Index > +int CudaPrefixSumKernelLauncher< prefixSumType, Real, Index >::maxGridSize = Devices::Cuda::getMaxGridSize(); + +template< PrefixSumType prefixSumType, + typename Real, + typename Index > +int CudaPrefixSumKernelLauncher< prefixSumType, Real, Index >::gridsCount = -1; + + #endif } // namespace Algorithms diff --git a/src/TNL/Matrices/BiEllpack_impl.h b/src/TNL/Matrices/BiEllpack_impl.h index 8608a0d99..dc520cc91 100644 --- a/src/TNL/Matrices/BiEllpack_impl.h +++ b/src/TNL/Matrices/BiEllpack_impl.h @@ -116,7 +116,7 @@ setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ) //DeviceDependentCode::performRowBubbleSort( *this, rowLengths ); //DeviceDependentCode::computeColumnSizes( *this, rowLengths ); - this->groupPointers.computeExclusivePrefixSum(); + this->groupPointers.template prefixSum< Containers::Algorithms::PrefixSumType::Exclusive >(); // uncomment to perform structure test //DeviceDependentCode::verifyRowPerm( *this, rowLengths ); diff --git a/src/TNL/Matrices/CSR_impl.h b/src/TNL/Matrices/CSR_impl.h index 3af550a7a..d4794d52e 100644 --- a/src/TNL/Matrices/CSR_impl.h +++ b/src/TNL/Matrices/CSR_impl.h @@ -104,7 +104,7 @@ void CSR< Real, Device, Index >::setCompressedRowLengths( ConstCompressedRowLeng rowPtrs.bind( this->rowPointers.getData(), this->getRows() ); rowPtrs = rowLengths; this->rowPointers.setElement( this->rows, 0 ); - this->rowPointers.computeExclusivePrefixSum(); + this->rowPointers.template prefixSum< Containers::Algorithms::PrefixSumType::Exclusive >(); this->maxRowLength = rowLengths.max(); /**** diff --git a/src/TNL/Matrices/ChunkedEllpack_impl.h b/src/TNL/Matrices/ChunkedEllpack_impl.h index a1aae9d97..e1a0868db 100644 --- a/src/TNL/Matrices/ChunkedEllpack_impl.h +++ b/src/TNL/Matrices/ChunkedEllpack_impl.h @@ -248,7 +248,7 @@ void ChunkedEllpack< Real, Device, Index >::setCompressedRowLengths( ConstCompre this->rowPointers.setElement( 0, 0 ); for( IndexType sliceIndex = 0; sliceIndex < numberOfSlices; sliceIndex++ ) this->setSlice( rowLengths, sliceIndex, elementsToAllocation ); - this->rowPointers.computePrefixSum(); + this->rowPointers.prefixSum(); } // std::cout << "\ngetRowLength after first if: " << std::endl; diff --git a/src/TNL/Matrices/SlicedEllpackSymmetric_impl.h b/src/TNL/Matrices/SlicedEllpackSymmetric_impl.h index 7089a45ad..86f85e211 100644 --- a/src/TNL/Matrices/SlicedEllpackSymmetric_impl.h +++ b/src/TNL/Matrices/SlicedEllpackSymmetric_impl.h @@ -80,7 +80,7 @@ void SlicedEllpackSymmetric< Real, Device, Index, SliceSize >::setCompressedRowL this->maxRowLength = rowLengths.max(); - this->slicePointers.computeExclusivePrefixSum(); + this->slicePointers.template prefixSum< Containers::Algorithms::PrefixSumType::Exclusive >(); this->allocateMatrixElements( this->slicePointers.getElement( slices ) ); } diff --git a/src/TNL/Matrices/SlicedEllpack_impl.h b/src/TNL/Matrices/SlicedEllpack_impl.h index d93984041..15240b137 100644 --- a/src/TNL/Matrices/SlicedEllpack_impl.h +++ b/src/TNL/Matrices/SlicedEllpack_impl.h @@ -97,7 +97,7 @@ void SlicedEllpack< Real, Device, Index, SliceSize >::setCompressedRowLengths( C this->maxRowLength = rowLengths.max(); - this->slicePointers.computeExclusivePrefixSum(); + this->slicePointers.template prefixSum< Containers::Algorithms::PrefixSumType::Exclusive >(); this->allocateMatrixElements( this->slicePointers.getElement( slices ) ); } diff --git a/src/UnitTests/Containers/Algorithms/CMakeLists.txt b/src/UnitTests/Containers/Algorithms/CMakeLists.txt deleted file mode 100644 index 1a5033f45..000000000 --- a/src/UnitTests/Containers/Algorithms/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -IF( BUILD_CUDA ) - CUDA_ADD_EXECUTABLE( CudaPrefixSumTest CudaPrefixSumTest.cu - OPTIONS ${CXX_TESTS_FLAGS} ) - TARGET_LINK_LIBRARIES( CudaPrefixSumTest ${GTEST_BOTH_LIBRARIES} ) - ADD_TEST( CudaPrefixSumTest ${EXECUTABLE_OUTPUT_PATH}/CudaPrefixSumTest${CMAKE_EXECUTABLE_SUFFIX} ) -ENDIF() diff --git a/src/UnitTests/Containers/Algorithms/CudaPrefixSumTest.cu b/src/UnitTests/Containers/Algorithms/CudaPrefixSumTest.cu deleted file mode 100644 index f036904dd..000000000 --- a/src/UnitTests/Containers/Algorithms/CudaPrefixSumTest.cu +++ /dev/null @@ -1,32 +0,0 @@ -/*************************************************************************** - CudaPrefixSumTest.cu - description - ------------------- - begin : Jun 6, 2019 - copyright : (C) 2019 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/* See Copyright Notice in tnl/Copyright */ - -///// -// NOTE: This test serves mainly for testing CUDA prefix sum when more than -// one CUDA grid is used. To avoid allocation of extremely large vectors and to -// speed-up this unit test, we decrease the grid size artificialy. - -#pragma once - -#ifdef HAVE_GTEST -#include - -#include -#include -#include -#include "VectorTestSetup.h" - -#include "gtest/gtest.h" - -using namespace TNL; -using namespace TNL::Containers; -using namespace TNL::Containers::Algorithms; -using namespace TNL::Arithmetics; - diff --git a/src/UnitTests/Containers/VectorTest-4.h b/src/UnitTests/Containers/VectorTest-4.h index 4d332711f..2eb5210d7 100644 --- a/src/UnitTests/Containers/VectorTest-4.h +++ b/src/UnitTests/Containers/VectorTest-4.h @@ -27,11 +27,10 @@ using namespace TNL::Containers; using namespace TNL::Containers::Algorithms; using namespace TNL::Arithmetics; -// should be small enough to have fast tests, but larger than minGPUReductionDataSize -// and large enough to require multiple CUDA blocks for reduction -constexpr int VECTOR_TEST_SIZE = 5000; +// Should be small enough to have fast tests, but larger than minGPUReductionDataSize +// and large enough to require multiple CUDA blocks for reduction. +constexpr int VECTOR_TEST_SIZE = 10000; -/* TYPED_TEST( VectorTest, addVector ) { using VectorType = typename TestFixture::VectorType; @@ -133,112 +132,75 @@ TYPED_TEST( VectorTest, prefixSum ) using IndexType = typename VectorType::IndexType; const int size = VECTOR_TEST_SIZE; + if( std::is_same< RealType, float >::value || + std::is_same< IndexType, short >::value ) + return; + VectorType v( size ); ViewType v_view( v ); v = 0; - v.computePrefixSum(); + v.prefixSum(); for( int i = 0; i < size; i++ ) EXPECT_EQ( v.getElement( i ), 0 ); setLinearSequence( v ); - v.computePrefixSum(); + v.prefixSum(); for( int i = 1; i < size; i++ ) EXPECT_EQ( v.getElement( i ) - v.getElement( i - 1 ), i ); setConstantSequence( v, 1 ); - v_view.computePrefixSum(); + v_view.prefixSum(); for( int i = 0; i < size; i++ ) EXPECT_EQ( v.getElement( i ), i + 1 ); v = 0; - v_view.computePrefixSum(); + v_view.prefixSum(); for( int i = 0; i < size; i++ ) EXPECT_EQ( v.getElement( i ), 0 ); setLinearSequence( v ); - v_view.computePrefixSum(); + v_view.prefixSum(); for( int i = 1; i < size; i++ ) EXPECT_EQ( v.getElement( i ) - v.getElement( i - 1 ), i ); -} - - */ -/*** - * The following test tekaes too long - 6 min approx. - */ -//TYPED_TEST( VectorTest, longPrefixSum ) -void Test() -{ - using VectorType = Containers::Vector< double, Devices::Cuda, int >; - using VectorOperations = Algorithms::VectorOperations< typename VectorType::DeviceType >; - using ViewType = Containers::VectorView< double, Devices::Cuda, int >; - - - //using VectorType = typename TestFixture::VectorType; - //using VectorOperations = typename TestFixture::VectorOperations; - //using ViewType = typename TestFixture::ViewType; - using RealType = typename VectorType::RealType; - using DeviceType = typename VectorType::DeviceType; - using IndexType = typename VectorType::IndexType; - using HostVectorType = Vector< RealType, Devices::Host, IndexType >; - using HostViewType = VectorView< RealType, Devices::Host, IndexType >; - - ///// - // This is test of prefix sum on long vectors to check of the correction - // across multiple CUDA grids is correct - if( std::is_same< DeviceType, Devices::Cuda >::value && - ! std::is_same< IndexType, short >::value && - ! std::is_same< RealType, float >::value ) + //// + // With CUDA, perform tests with multiple CUDA grids. + if( std::is_same< DeviceType, Devices::Cuda >::value ) { - const IndexType size = 15500; //134217728+100; - - VectorType v( size ); - ViewType v_view( v ); - - HostVectorType host_v( size ), host_copy( size ); - HostViewType host_v_view( host_v ); - - v = 1; - host_v = 1; +#ifdef HAVE_CUDA + Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Inclusive, RealType, IndexType >::setMaxGridSize( 3 ); + v = 0; v.prefixSum(); - host_v.prefixSum(); - host_copy = v; - for( IndexType i = 0; i < size; i ++ ) - EXPECT_EQ( host_copy[ i ], host_v[ i ] ); + EXPECT_GT( ( CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Inclusive, RealType, IndexType >::gridsCount ), 1 ); + for( int i = 0; i < size; i++ ) + EXPECT_EQ( v.getElement( i ), 0 ); - setOscilatingLinearSequence( v ); - setOscilatingLinearSequence( host_v ); + setLinearSequence( v ); v.prefixSum(); - host_v.prefixSum(); - host_copy = v; - for( IndexType i = 0; i < size; i ++ ) - EXPECT_EQ( host_copy[ i ], host_v[ i ] ); - - setOscilatingLinearSequence( v ); - setOscilatingLinearSequence( host_v ); - v.prefixSum< Algorithms::PrefixSumType::Exclusive >(); - host_v.prefixSum< Algorithms::PrefixSumType::Exclusive >(); - host_copy = v; - for( IndexType i = 0; i < size; i ++ ) - EXPECT_EQ( host_copy[ i ], host_v[ i ] ); - - setOscilatingConstantSequence( v, 1 ); - setOscilatingConstantSequence( host_v, 1 ); - v_view.prefixSum< Algorithms::PrefixSumType::Inclusive >(); - host_v_view.prefixSum< Algorithms::PrefixSumType::Inclusive >(); - host_copy = v; - for( IndexType i = 0; i < size; i ++ ) - EXPECT_EQ( host_copy[ i ], host_v[ i ] ); - - /*setOscilatingConstantSequence( v, 1 ); - setOscilatingConstantSequence( host_v, 1 ); - v_view.prefixSum< Algorithms::PrefixSumType::Exclusive >(); - host_v_view.prefixSum< Algorithms::PrefixSumType::Exclusive >(); - host_copy = v; - for( IndexType i = 0; i < size; i ++ ) - EXPECT_EQ( host_copy[ i ], host_v[ i ] ); - * */ + EXPECT_GT( ( CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Inclusive, RealType, IndexType >::gridsCount ), 1 ); + for( int i = 1; i < size; i++ ) + EXPECT_EQ( v.getElement( i ) - v.getElement( i - 1 ), i ); + + setConstantSequence( v, 1 ); + v_view.prefixSum(); + EXPECT_GT( ( CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Inclusive, RealType, IndexType >::gridsCount ), 1 ); + for( int i = 0; i < size; i++ ) + EXPECT_EQ( v.getElement( i ), i + 1 ); + + v = 0; + v_view.prefixSum(); + EXPECT_GT( ( CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Inclusive, RealType, IndexType >::gridsCount ), 1 ); + for( int i = 0; i < size; i++ ) + EXPECT_EQ( v.getElement( i ), 0 ); + + setLinearSequence( v ); + v_view.prefixSum(); + EXPECT_GT( ( CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Inclusive, RealType, IndexType >::gridsCount ), 1 ); + for( int i = 1; i < size; i++ ) + EXPECT_EQ( v.getElement( i ) - v.getElement( i - 1 ), i ); + CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Inclusive, RealType, IndexType >::resetMaxGridSize(); +#endif } } @@ -247,8 +209,15 @@ TYPED_TEST( VectorTest, exclusivePrefixSum ) using VectorType = typename TestFixture::VectorType; using VectorOperations = typename TestFixture::VectorOperations; using ViewType = typename TestFixture::ViewType; + using RealType = typename VectorType::RealType; + using DeviceType = typename VectorType::DeviceType; + using IndexType = typename VectorType::IndexType; const int size = VECTOR_TEST_SIZE; + if( std::is_same< RealType, float >::value || + std::is_same< IndexType, short >::value ) + return; + VectorType v; v.setSize( size ); ViewType v_view( v ); @@ -282,6 +251,52 @@ TYPED_TEST( VectorTest, exclusivePrefixSum ) v_view.template prefixSum< Algorithms::PrefixSumType::Exclusive >(); for( int i = 1; i < size; i++ ) EXPECT_EQ( v.getElement( i ) - v.getElement( i - 1 ), i - 1 ); + + //// + // With CUDA, perform tests with multiple CUDA grids. + if( std::is_same< DeviceType, Devices::Cuda >::value ) + { +#ifdef HAVE_CUDA + CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Exclusive, RealType, IndexType >::setMaxGridSize( 3 ); + + setConstantSequence( v, 1 ); + v.template prefixSum< Algorithms::PrefixSumType::Exclusive >(); + EXPECT_GT( ( CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Exclusive, RealType, IndexType >::gridsCount ), 1 ); + for( int i = 0; i < size; i++ ) + EXPECT_EQ( v.getElement( i ), i ); + + v.setValue( 0 ); + v.template prefixSum< Algorithms::PrefixSumType::Exclusive >(); + EXPECT_GT( ( CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Exclusive, RealType, IndexType >::gridsCount ), 1 ); + for( int i = 0; i < size; i++ ) + EXPECT_EQ( v.getElement( i ), 0 ); + + setLinearSequence( v ); + v.template prefixSum< Algorithms::PrefixSumType::Exclusive >(); + EXPECT_GT( ( CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Exclusive, RealType, IndexType >::gridsCount ), 1 ); + for( int i = 1; i < size; i++ ) + EXPECT_EQ( v.getElement( i ) - v.getElement( i - 1 ), i - 1 ); + + setConstantSequence( v, 1 ); + v_view.template prefixSum< Algorithms::PrefixSumType::Exclusive >(); + EXPECT_GT( ( CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Exclusive, RealType, IndexType >::gridsCount ), 1 ); + for( int i = 0; i < size; i++ ) + EXPECT_EQ( v.getElement( i ), i ); + + v.setValue( 0 ); + v_view.template prefixSum< Algorithms::PrefixSumType::Exclusive >(); + EXPECT_GT( ( CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Exclusive, RealType, IndexType >::gridsCount ), 1 ); + for( int i = 0; i < size; i++ ) + EXPECT_EQ( v.getElement( i ), 0 ); + + setLinearSequence( v ); + v_view.template prefixSum< Algorithms::PrefixSumType::Exclusive >(); + EXPECT_GT( ( CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Exclusive, RealType, IndexType >::gridsCount ), 1 ); + for( int i = 1; i < size; i++ ) + EXPECT_EQ( v.getElement( i ) - v.getElement( i - 1 ), i - 1 ); + CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Exclusive, RealType, IndexType >::resetMaxGridSize(); +#endif + } } @@ -383,8 +398,6 @@ TYPED_TEST( VectorTest, abs ) #include "../GtestMissingError.h" int main( int argc, char* argv[] ) { - Test(); - return 0; #ifdef HAVE_GTEST ::testing::InitGoogleTest( &argc, argv ); return RUN_ALL_TESTS(); -- GitLab From 7d389df70a3fce417782c95dd9bfe2ef28aee131 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Sun, 9 Jun 2019 00:03:18 +0200 Subject: [PATCH 58/93] Fixing ArrayTest. --- src/UnitTests/Containers/ArrayTest.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/UnitTests/Containers/ArrayTest.h b/src/UnitTests/Containers/ArrayTest.h index 65b8c8b1d..777f565e6 100644 --- a/src/UnitTests/Containers/ArrayTest.h +++ b/src/UnitTests/Containers/ArrayTest.h @@ -151,6 +151,7 @@ TYPED_TEST( ArrayTest, constructors ) ArrayType v( 10 ); EXPECT_EQ( v.getSize(), 10 ); + v = 0; // deep copy ArrayType w( v ); -- GitLab From 0b6c424dec153b58953cec03a76a8ed0e356e9cb Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Sun, 9 Jun 2019 00:03:54 +0200 Subject: [PATCH 59/93] Creating script for fetching example outputs. --- Documentation/Doxyfile | 3 ++- Documentation/fetch-outputs | 12 ++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) create mode 100755 Documentation/fetch-outputs diff --git a/Documentation/Doxyfile b/Documentation/Doxyfile index 39df663bf..e603b7dc0 100644 --- a/Documentation/Doxyfile +++ b/Documentation/Doxyfile @@ -914,7 +914,8 @@ EXCLUDE_SYMBOLS += TNL::Assert::* # internal namespace # that contain example code fragments that are included (see the \include # command). -EXAMPLE_PATH = ../src/Examples +EXAMPLE_PATH = ../src/Examples \ + Outputs # If the value of the EXAMPLE_PATH tag contains directories, you can use the # EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and diff --git a/Documentation/fetch-outputs b/Documentation/fetch-outputs new file mode 100755 index 000000000..bb1a669b5 --- /dev/null +++ b/Documentation/fetch-outputs @@ -0,0 +1,12 @@ +#!/bin/bash + +OUTPUTS_PATH=../Release/src/Examples + +DOC_ROOT_DIR=`pwd` + +echo $DOC_ROOT_DIR + +mkdir Outputs + +find $OUTPUTS_PATH -name "*.out" -exec echo '{}' Outputs/'{}' \; + -- GitLab From a4a31325c3a400c541baa8546a23a32de91ae3f7 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Mon, 10 Jun 2019 16:07:04 +0200 Subject: [PATCH 60/93] Writing array tutorial. --- Documentation/CMakeLists.txt | 25 +++++---- Documentation/{Tutorials => }/Doxyfile.in | 15 +++--- .../Tutorials/Arrays/ArrayAllocation.cpp | 46 ++++++++++++++++ .../Tutorials/Arrays/ArrayAllocation.cu | 1 + .../Tutorials/Arrays/ArrayBinding-1.cpp | 32 +++++++++++ .../Tutorials/Arrays/ArrayBinding-1.cu | 1 + .../Tutorials/Arrays/ArrayBinding-2.cpp | 44 +++++++++++++++ .../Tutorials/Arrays/ArrayBinding-2.cu | 1 + .../Tutorials/Arrays/ArrayBinding-3.cpp | 33 ++++++++++++ .../Tutorials/Arrays/ArrayBinding-3.cu | 1 + Documentation/Tutorials/Arrays/CMakeLists.txt | 34 ++++++++++++ .../Arrays/Doxyfile.in} | 12 ++--- Documentation/Tutorials/Arrays/main-page.md | 53 +++++++++++++++++++ Documentation/Tutorials/CMakeLists.txt | 17 +----- Documentation/Tutorials/main-page.md | 1 - Documentation/Tutorials/tutorial-1.cpp | 17 ------ Documentation/main-page.md | 4 +- build | 4 ++ 18 files changed, 279 insertions(+), 62 deletions(-) rename Documentation/{Tutorials => }/Doxyfile.in (99%) create mode 100644 Documentation/Tutorials/Arrays/ArrayAllocation.cpp create mode 120000 Documentation/Tutorials/Arrays/ArrayAllocation.cu create mode 100644 Documentation/Tutorials/Arrays/ArrayBinding-1.cpp create mode 120000 Documentation/Tutorials/Arrays/ArrayBinding-1.cu create mode 100644 Documentation/Tutorials/Arrays/ArrayBinding-2.cpp create mode 120000 Documentation/Tutorials/Arrays/ArrayBinding-2.cu create mode 100644 Documentation/Tutorials/Arrays/ArrayBinding-3.cpp create mode 120000 Documentation/Tutorials/Arrays/ArrayBinding-3.cu create mode 100644 Documentation/Tutorials/Arrays/CMakeLists.txt rename Documentation/{Doxyfile => Tutorials/Arrays/Doxyfile.in} (99%) create mode 100644 Documentation/Tutorials/Arrays/main-page.md delete mode 100644 Documentation/Tutorials/main-page.md delete mode 100644 Documentation/Tutorials/tutorial-1.cpp diff --git a/Documentation/CMakeLists.txt b/Documentation/CMakeLists.txt index 45f0c0eca..4aec23f9a 100644 --- a/Documentation/CMakeLists.txt +++ b/Documentation/CMakeLists.txt @@ -1,18 +1,17 @@ add_subdirectory( Tutorials ) ## set input and output files -#set(DOXYGEN_IN ${CMAKE_CURRENT_SOURCE_DIR}/Doxyfile.in) -#set(DOXYGEN_OUT ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile) -# +set(DOXYGEN_IN ${CMAKE_CURRENT_SOURCE_DIR}/Doxyfile.in) +set(DOXYGEN_OUT ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile) + ## request to configure the file -#configure_file(${DOXYGEN_IN} ${DOXYGEN_OUT} @ONLY) -# +configure_file(${DOXYGEN_IN} ${DOXYGEN_OUT} @ONLY) + ## note the option ALL which allows to build the docs together with the application -#add_custom_target( doc_doxygen ALL -# COMMAND ${DOXYGEN_EXECUTABLE} ${DOXYGEN_OUT} -# WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} -# COMMENT "Generating API documentation with Doxygen" -# VERBATIM ) -# -#INSTALL( DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/html DESTINATION ${CMAKE_INSTALL_PREFIX}/share/doc/tnl ) -# +add_custom_target( doc_doxygen ALL + COMMAND ${DOXYGEN_EXECUTABLE} ${DOXYGEN_OUT} + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + COMMENT "Generating API documentation with Doxygen" + VERBATIM ) + +INSTALL( DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/html DESTINATION ${CMAKE_INSTALL_PREFIX}/share/doc/tnl ) diff --git a/Documentation/Tutorials/Doxyfile.in b/Documentation/Doxyfile.in similarity index 99% rename from Documentation/Tutorials/Doxyfile.in rename to Documentation/Doxyfile.in index 784e1a51c..0c47450ef 100644 --- a/Documentation/Tutorials/Doxyfile.in +++ b/Documentation/Doxyfile.in @@ -51,7 +51,7 @@ PROJECT_BRIEF = # pixels and the maximum width should not exceed 200 pixels. Doxygen will copy # the logo to the output directory. -PROJECT_LOGO = tnl-logo.jpg +PROJECT_LOGO = # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path # into which the generated documentation will be written. If a relative path is @@ -318,7 +318,7 @@ TOC_INCLUDE_HEADINGS = 0 # globally by setting AUTOLINK_SUPPORT to NO. # The default value is: YES. -AUTOLINK_SUPPORT = YES +AUTOLINK_SUPPORT = NO # If you use STL classes (i.e. std::string, std::vector, etc.) but do not want # to include (a tag file for) the STL sources as input, then you should set this @@ -906,14 +906,16 @@ EXCLUDE_PATTERNS = */Debugging/* \ # Note that the wildcards are matched against the file with absolute path, so to # exclude all test directories use the pattern */test/* -EXCLUDE_SYMBOLS = *::__*impl # internal namespaces +EXCLUDE_SYMBOLS = detail::*,*::detail::* # internal namespaces +EXCLUDE_SYMBOLS += *::__*impl # internal namespaces EXCLUDE_SYMBOLS += TNL::Assert::* # internal namespace # The EXAMPLE_PATH tag can be used to specify one or more files or directories # that contain example code fragments that are included (see the \include # command). -EXAMPLE_PATH = ../src/Examples +EXAMPLE_PATH = @PROJECT_SOURCE_DIR@/src/Examples \ + @CMAKE_CURRENT_BINARY_DIR@/src/Examples # If the value of the EXAMPLE_PATH tag contains directories, you can use the # EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and @@ -1209,7 +1211,7 @@ HTML_EXTRA_STYLESHEET = # files will be copied as-is; there are no commands or markers available. # This tag requires that the tag GENERATE_HTML is set to YES. -HTML_EXTRA_FILES = tnl-logo.jpg +HTML_EXTRA_FILES = @PROJECT_SOURCE_DIR@/Documentation/tnl-logo.jpg # The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen # will adjust the colors in the style sheet and background images according to @@ -2106,7 +2108,8 @@ INCLUDE_FILE_PATTERNS = # recursively expanded use the := operator instead of the = operator. # This tag requires that the tag ENABLE_PREPROCESSING is set to YES. -PREDEFINED = +PREDEFINED = HAVE_MPI=1 + HAVE_CUDA=1 # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this # tag can be used to specify a list of macro names that should be expanded. The diff --git a/Documentation/Tutorials/Arrays/ArrayAllocation.cpp b/Documentation/Tutorials/Arrays/ArrayAllocation.cpp new file mode 100644 index 000000000..7b6439fe8 --- /dev/null +++ b/Documentation/Tutorials/Arrays/ArrayAllocation.cpp @@ -0,0 +1,46 @@ +#include +#include +#include +#include + +using namespace TNL; +using namespace TNL::Containers; + +int main( int argc, char* argv[] ) +{ + /**** + * Create one array on host and one array on device. + */ + Array< int > host_array( 10 ); + Array< int, Devices::Cuda > device_array; + + /*** + * Initiate the host array with number three and assign it to the device one. + */ + host_array = 3; + device_array = host_array; + + /**** + * Print both arrays. + */ + std::cout << "host_array = " << host_array << std::endl; + std::cout << "device_array = " << device_array << std::endl; + std::cout << std::endl; + + /**** + * And few other way how to initialize arrays... + */ + std::list< int > list { 1, 2, 3, 4, 5 }; + std::vector< int > vector { 6, 7, 8, 9, 10 }; + + Array< int, Devices::Cuda > device_array_list( list ); + Array< int, Devices::Cuda > device_array_vector( vector ); + Array< int, Devices::Cuda > device_array_init_list { 11, 12, 13, 14, 15 }; + + /**** + * ... and print them all + */ + std::cout << "device_array_list = " << device_array_list << std::endl; + std::cout << "device_array_vector = " << device_array_vector << std::endl; + std::cout << "device_array_init_list = " << device_array_init_list << std::endl; +} diff --git a/Documentation/Tutorials/Arrays/ArrayAllocation.cu b/Documentation/Tutorials/Arrays/ArrayAllocation.cu new file mode 120000 index 000000000..3d269df60 --- /dev/null +++ b/Documentation/Tutorials/Arrays/ArrayAllocation.cu @@ -0,0 +1 @@ +ArrayAllocation.cpp \ No newline at end of file diff --git a/Documentation/Tutorials/Arrays/ArrayBinding-1.cpp b/Documentation/Tutorials/Arrays/ArrayBinding-1.cpp new file mode 100644 index 000000000..5f22783e1 --- /dev/null +++ b/Documentation/Tutorials/Arrays/ArrayBinding-1.cpp @@ -0,0 +1,32 @@ +#include +#include + +using namespace TNL; +using namespace TNL::Containers; + +int main( int argc, char* argv[] ) +{ + /**** + * Allocate an array on host + */ + const int size = 10; + int* ai = new int[ size ]; + + /**** + * Bind the data with TNL array + */ + Array< int > host_array; + host_array.bind( ai, size ); + + /**** + * Initialize the data using the TNL array + */ + host_array = 66; + + /**** + * Check the data + */ + for( int i = 0; i < size; i++ ) + std::cout << i << " "; + std::cout << std::endl; +} diff --git a/Documentation/Tutorials/Arrays/ArrayBinding-1.cu b/Documentation/Tutorials/Arrays/ArrayBinding-1.cu new file mode 120000 index 000000000..c59236745 --- /dev/null +++ b/Documentation/Tutorials/Arrays/ArrayBinding-1.cu @@ -0,0 +1 @@ +ArrayBinding-1.cpp \ No newline at end of file diff --git a/Documentation/Tutorials/Arrays/ArrayBinding-2.cpp b/Documentation/Tutorials/Arrays/ArrayBinding-2.cpp new file mode 100644 index 000000000..8200d66e2 --- /dev/null +++ b/Documentation/Tutorials/Arrays/ArrayBinding-2.cpp @@ -0,0 +1,44 @@ +#include +#include + +using namespace TNL; +using namespace TNL::Containers; + +void initArray( Array< int >& a ) +{ + /**** + * Create new array, bind it with 'a' and initialize it + */ + Array< int > b( 10 ); + a.bind( b ); + b = 10; + + /**** + * Show that both arrays share the same data + */ + std::cout << "a data in initArray function is " << a.getData() << std::endl; + std::cout << "a value in initArray function is " << a << std::endl; + std::cout << "--------------------------------------" << std::endl; + std::cout << "b data in initArray function is " << b.getData() << std::endl; + std::cout << "b in initArray function is " << b << std::endl; + std::cout << "--------------------------------------" << std::endl; +} + +int main( int argc, char* argv[] ) +{ + /**** + * Create array but do not initialize it + */ + Array< int > a; + + /*** + * Call function initArray for the array initialization + */ + initArray( a ); + + /**** + * Print the initialized array + */ + std::cout << "a data in initArray function is " << a.getData() << std::endl; + std::cout << "a in main function is " << a << std::endl; +} diff --git a/Documentation/Tutorials/Arrays/ArrayBinding-2.cu b/Documentation/Tutorials/Arrays/ArrayBinding-2.cu new file mode 120000 index 000000000..e03207089 --- /dev/null +++ b/Documentation/Tutorials/Arrays/ArrayBinding-2.cu @@ -0,0 +1 @@ +ArrayBinding-2.cpp \ No newline at end of file diff --git a/Documentation/Tutorials/Arrays/ArrayBinding-3.cpp b/Documentation/Tutorials/Arrays/ArrayBinding-3.cpp new file mode 100644 index 000000000..28f809407 --- /dev/null +++ b/Documentation/Tutorials/Arrays/ArrayBinding-3.cpp @@ -0,0 +1,33 @@ +#include +#include + +using namespace TNL; +using namespace TNL::Containers; + +int main( int argc, char* argv[] ) +{ + /**** + * Allocate data for all degrees of freedom + */ + const int size = 5; + Array< float > a( 3 * size ); + + /*** + * Partition the data into density and velocity components + */ + Array< float > rho( a, 0, size ); + Array< float > v_1( a, size, size ); + Array< float > v_2( a, 2 * size, size ); + + rho = 10.0; + v_1 = 1.0; + v_2 = 0.0; + + /**** + * Print the initialized arrays + */ + std::cout << "rho = " << rho << std::endl; + std::cout << "v1 = " << v_1 << std::endl; + std::cout << "v2 = " << v_2 << std::endl; + std::cout << "a = " << a << std::endl; +} diff --git a/Documentation/Tutorials/Arrays/ArrayBinding-3.cu b/Documentation/Tutorials/Arrays/ArrayBinding-3.cu new file mode 120000 index 000000000..09b9a603a --- /dev/null +++ b/Documentation/Tutorials/Arrays/ArrayBinding-3.cu @@ -0,0 +1 @@ +ArrayBinding-3.cpp \ No newline at end of file diff --git a/Documentation/Tutorials/Arrays/CMakeLists.txt b/Documentation/Tutorials/Arrays/CMakeLists.txt new file mode 100644 index 000000000..9bf6bcbd1 --- /dev/null +++ b/Documentation/Tutorials/Arrays/CMakeLists.txt @@ -0,0 +1,34 @@ +IF( BUILD_CUDA ) + CUDA_ADD_EXECUTABLE( ArrayAllocation ArrayAllocation.cu ) + ADD_CUSTOM_COMMAND( COMMAND ArrayAllocation > ArrayAllocation.out OUTPUT ArrayAllocation.out ) + CUDA_ADD_EXECUTABLE( ArrayBinding-1 ArrayBinding-1.cu ) + ADD_CUSTOM_COMMAND( COMMAND ArrayBinding-1 > ArrayBinding-1.out OUTPUT ArrayBinding-1.out ) + CUDA_ADD_EXECUTABLE( ArrayBinding-2 ArrayBinding-2.cu ) + ADD_CUSTOM_COMMAND( COMMAND ArrayBinding-2 > ArrayBinding-2.out OUTPUT ArrayBinding-2.out ) + CUDA_ADD_EXECUTABLE( ArrayBinding-3 ArrayBinding-3.cu ) + ADD_CUSTOM_COMMAND( COMMAND ArrayBinding-3 > ArrayBinding-3.out OUTPUT ArrayBinding-3.out ) +ENDIF() + +IF( BUILD_CUDA ) +ADD_CUSTOM_TARGET( TutorialsArrays-cuda ALL DEPENDS + ArrayAllocation.out + ArrayBinding-1.out + ArrayBinding-2.out + ArrayBinding-3.out ) +ENDIF() + +# set input and output files +set(DOXYGEN_IN ${CMAKE_CURRENT_SOURCE_DIR}/Doxyfile.in) +set(DOXYGEN_OUT ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile) + +# request to configure the file +configure_file(${DOXYGEN_IN} ${DOXYGEN_OUT} @ONLY) + +# note the option ALL which allows to build the docs together with the application +add_custom_target( doc_doxygen_tutorial_1 ALL + COMMAND ${DOXYGEN_EXECUTABLE} ${DOXYGEN_OUT} + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + COMMENT "Generating API documentation with Doxygen" + VERBATIM ) + +INSTALL( DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/html/ DESTINATION ${CMAKE_INSTALL_PREFIX}/share/doc/tnl/html/Tutorials/Arrays ) \ No newline at end of file diff --git a/Documentation/Doxyfile b/Documentation/Tutorials/Arrays/Doxyfile.in similarity index 99% rename from Documentation/Doxyfile rename to Documentation/Tutorials/Arrays/Doxyfile.in index e603b7dc0..cf4f54ee2 100644 --- a/Documentation/Doxyfile +++ b/Documentation/Tutorials/Arrays/Doxyfile.in @@ -58,7 +58,7 @@ PROJECT_LOGO = # entered, it will be relative to the location where doxygen was started. If # left blank the current directory will be used. -OUTPUT_DIRECTORY = +OUTPUT_DIRECTORY = @CMAKE_CURRENT_BINARY_DIR@ # If the CREATE_SUBDIRS tag is set to YES then doxygen will create 4096 sub- # directories (in 2 levels) under the output directory of each output format and @@ -790,8 +790,8 @@ WARN_LOGFILE = # spaces. See also FILE_PATTERNS and EXTENSION_MAPPING # Note: If this tag is empty the current directory is searched. -INPUT = main-page.md -INPUT += ../src/TNL +INPUT = @CMAKE_CURRENT_SOURCE_DIR@/main-page.md +INPUT += @CMAKE_CURRENT_SOURCE_DIR@/../src/TNL # This tag can be used to specify the character encoding of the source files # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses @@ -914,8 +914,8 @@ EXCLUDE_SYMBOLS += TNL::Assert::* # internal namespace # that contain example code fragments that are included (see the \include # command). -EXAMPLE_PATH = ../src/Examples \ - Outputs +EXAMPLE_PATH = @CMAKE_CURRENT_SOURCE_DIR@ \ + @CMAKE_CURRENT_BINARY_DIR@ # If the value of the EXAMPLE_PATH tag contains directories, you can use the # EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and @@ -1211,7 +1211,7 @@ HTML_EXTRA_STYLESHEET = # files will be copied as-is; there are no commands or markers available. # This tag requires that the tag GENERATE_HTML is set to YES. -HTML_EXTRA_FILES = tnl-logo.jpg +HTML_EXTRA_FILES = @PROJECT_SOURCE_DIR@/Documentation/tnl-logo.jpg # The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen # will adjust the colors in the style sheet and background images according to diff --git a/Documentation/Tutorials/Arrays/main-page.md b/Documentation/Tutorials/Arrays/main-page.md new file mode 100644 index 000000000..95cee092b --- /dev/null +++ b/Documentation/Tutorials/Arrays/main-page.md @@ -0,0 +1,53 @@ +# Arrays tutorial + +## Introduction + +This tutorial introduces arrays and vectors in TNL. Array is one of the most important structure for memory management. Vector, in addition, offers also basic operations from linear algebra. Methods implemented in arrays and vectors are particularly usefull for GPU programming. From this point of view, the reader will learn how to easily allocate memory on GPU, transfer data between GPU and CPU but also, how to initialise data allocated on GPU and perform parallel reduction and vector operations without writting low-level CUDA kernels. In addition, the resulting code is hardware platform independent, so it can be ran on CPU without any changes. + +## Arrays + +Array is templated class define in namespace ```TNL::Containers``` having three template parameters: + +* ```Value``` is type of data to be stored in the array +* ```Device``` is the device wheer the array is allocated. Currently it can be either ```Devices::Host``` for CPU or ```Devices::Cuda``` for GPU supporting CUDA. +* ```Index``` is the type to be used for indexing the array elements. + +The following example shows how to allocate arrays on CPU and GPU and how to manipulate the data. + +\include ArrayAllocation.cpp + +The result looks as follows: + +\include ArrayAllocation.out + + +## Arrays binding + +Arrays can share data with each other or data allocated elsewhere. It is called binding and it can be done using method ```bind```. The following example shows how to bind data allocated on host using the ```new``` operator. In this case, the TNL array do not free this data at the and of its life cycle. + +\include ArrayBinding-1.cpp + +It generates output like this: + +\include ArrayBinding-1.out + +One may also bind another TNL array. In this case, the data is shared and can be shared between multiple arrays. Reference counter ensures that the data is freed after the last array sharing the data ends its life cycle. + +\include ArrayBinding-2.cpp + +The result is: + +\include ArrayBinding-2.out + +Binding may also serve for data partitioning. Both CPU and GPU prefere data allocated in large contiguous blocks instead of many fragmented pieces of allocated memory. Another reason why one might want to partition the allocated data is demonstrated in the following example. Consider a situation of solving incompressible flow in 2D. The degrees of freedom consist of density and two components of velocity. Mostly, we want to manipulate either density or velocity. But some numerical solvers may need to have all degrees of freedom in one array. It can be managed like this: + +\include ArrayBinding-3.cpp + +The result is: + +\include ArrayBinding-3.out + + +## Array views + +Because of the data sharing, TNL Array is relatively complicated structure. In many situations, we prefer lightweight structure which only encapsulates the data pointer and keeps information about the data size. Passing array structure to GPU kernel can be one example. For this purpose there is ```ArrayView``` in TNL. It templated structure having the same template parameters as ```Array``` (it means ```Value```, ```Device``` and ```Index```). In fact, it is recommended to use ```Array``` only for the data allocation and to use ```ArrayView``` for most of the operations with the data since array view offer better functionality (for example ```ArrayView``` can be captured by lambda functions in CUDA while ```Array``` cannot). diff --git a/Documentation/Tutorials/CMakeLists.txt b/Documentation/Tutorials/CMakeLists.txt index df1a85a0f..fae1f7a51 100644 --- a/Documentation/Tutorials/CMakeLists.txt +++ b/Documentation/Tutorials/CMakeLists.txt @@ -1,16 +1 @@ -# set input and output files -set(DOXYGEN_IN ${CMAKE_CURRENT_SOURCE_DIR}/Doxyfile.in) -set(DOXYGEN_OUT ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile) - -# request to configure the file -configure_file(${DOXYGEN_IN} ${DOXYGEN_OUT} @ONLY) - -# note the option ALL which allows to build the docs together with the application -add_custom_target( doc_doxygen_tutorial_1 ALL - COMMAND ${DOXYGEN_EXECUTABLE} ${DOXYGEN_OUT} - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} - COMMENT "Generating API documentation with Doxygen" - VERBATIM ) - -INSTALL( DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/html DESTINATION ${CMAKE_INSTALL_PREFIX}/share/doc/tnl/tutorials ) - +add_subdirectory( Arrays ) \ No newline at end of file diff --git a/Documentation/Tutorials/main-page.md b/Documentation/Tutorials/main-page.md deleted file mode 100644 index e63165799..000000000 --- a/Documentation/Tutorials/main-page.md +++ /dev/null @@ -1 +0,0 @@ -# The first tutorial \ No newline at end of file diff --git a/Documentation/Tutorials/tutorial-1.cpp b/Documentation/Tutorials/tutorial-1.cpp deleted file mode 100644 index 6f1555a86..000000000 --- a/Documentation/Tutorials/tutorial-1.cpp +++ /dev/null @@ -1,17 +0,0 @@ -/**** - * This tutorial explains work with arrays and vectors. - * - * One first need to include necessary header files. - */ - -#include - -//! Main class -class m{}; - -int main( int argc, char* argv[] ) -{ - using namespace TNL; - - -} \ No newline at end of file diff --git a/Documentation/main-page.md b/Documentation/main-page.md index 324483784..501f3b59a 100644 --- a/Documentation/main-page.md +++ b/Documentation/main-page.md @@ -109,6 +109,4 @@ for details. ## Tutorials -> TODO - -1. [Vectors](Tutorials/main-page.md) +1. [Arrays](Tutorials/Arrays/index.html) diff --git a/build b/build index 989e63ad7..cf226271a 100755 --- a/build +++ b/build @@ -24,6 +24,7 @@ WITH_GMP="no" WITH_TESTS="yes" WITH_PROFILING="no" WITH_COVERAGE="no" +WITH_DOC="yes" WITH_EXAMPLES="yes" WITH_PYTHON="yes" WITH_TOOLS="yes" @@ -61,6 +62,7 @@ do --with-tests=* ) WITH_TESTS="${option#*=}" ;; --with-profiling=* ) WITH_PROFILING="${option#*=}" ;; --with-coverage=* ) WITH_COVERAGE="${option#*=}" ;; + --with-doc=* ) WITH_DOC="${option#*=}" ;; --with-examples=* ) WITH_EXAMPLES="${option#*=}" ;; --with-tools=* ) WITH_TOOLS="${option#*=}" ;; --with-benchmarks=* ) WITH_BENCHMARKS="${option#*=}" ;; @@ -101,6 +103,7 @@ if [[ ${HELP} == "yes" ]]; then echo " --with-tests=yes/no Enables unit tests. 'yes' by default." echo " --with-profiling=yes/no Enables code profiling compiler falgs. 'no' by default." echo " --with-coverage=yes/no Enables code coverage reports for unit tests. 'no' by default (lcov is required)." + echo " --with-doc=yes/no Build documentation. 'yes' by default." echo " --with-examples=yes/no Compile the 'examples' directory. 'yes' by default." echo " --with-tools=yes/no Compile the 'src/Tools' directory. 'yes' by default." echo " --with-python=yes/no Compile the Python bindings. 'yes' by default." @@ -174,6 +177,7 @@ cmake_command=( -DWITH_TESTS=${WITH_TESTS} -DWITH_PROFILING=${WITH_PROFILING} -DWITH_COVERAGE=${WITH_COVERAGE} + -DWITH_DOC=${WITH_DOC} -DWITH_EXAMPLES=${WITH_EXAMPLES} -DWITH_TOOLS=${WITH_TOOLS} -DWITH_PYTHON=${WITH_PYTHON} -- GitLab From 7ecb7bcf47dd06b40c2220afed3bbcfb80e5585b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Mon, 10 Jun 2019 22:21:35 +0200 Subject: [PATCH 61/93] Writing the arrays tutorial. --- .../Tutorials/Arrays/ArrayAllocation.cpp | 3 +- .../Tutorials/Arrays/ArrayView-1.cpp | 31 ++++++++++++ Documentation/Tutorials/Arrays/ArrayView-1.cu | 1 + .../Tutorials/Arrays/ArrayView-2.cpp | 26 ++++++++++ Documentation/Tutorials/Arrays/ArrayView-2.cu | 1 + Documentation/Tutorials/Arrays/CMakeLists.txt | 12 ++++- .../Tutorials/Arrays/ElementsAccessing-1.cpp | 48 +++++++++++++++++++ .../Tutorials/Arrays/ElementsAccessing-1.cu | 1 + Documentation/Tutorials/Arrays/main-page.md | 43 ++++++++++++++++- 9 files changed, 163 insertions(+), 3 deletions(-) create mode 100644 Documentation/Tutorials/Arrays/ArrayView-1.cpp create mode 120000 Documentation/Tutorials/Arrays/ArrayView-1.cu create mode 100644 Documentation/Tutorials/Arrays/ArrayView-2.cpp create mode 120000 Documentation/Tutorials/Arrays/ArrayView-2.cu create mode 100644 Documentation/Tutorials/Arrays/ElementsAccessing-1.cpp create mode 120000 Documentation/Tutorials/Arrays/ElementsAccessing-1.cu diff --git a/Documentation/Tutorials/Arrays/ArrayAllocation.cpp b/Documentation/Tutorials/Arrays/ArrayAllocation.cpp index 7b6439fe8..0008fab6f 100644 --- a/Documentation/Tutorials/Arrays/ArrayAllocation.cpp +++ b/Documentation/Tutorials/Arrays/ArrayAllocation.cpp @@ -16,6 +16,7 @@ int main( int argc, char* argv[] ) /*** * Initiate the host array with number three and assign it to the device one. + * NOTE: Of course, you may do directly 'device_array = 3' as well. */ host_array = 3; device_array = host_array; @@ -28,7 +29,7 @@ int main( int argc, char* argv[] ) std::cout << std::endl; /**** - * And few other way how to initialize arrays... + * And few other ways how to initialize arrays... */ std::list< int > list { 1, 2, 3, 4, 5 }; std::vector< int > vector { 6, 7, 8, 9, 10 }; diff --git a/Documentation/Tutorials/Arrays/ArrayView-1.cpp b/Documentation/Tutorials/Arrays/ArrayView-1.cpp new file mode 100644 index 000000000..cd8f7541c --- /dev/null +++ b/Documentation/Tutorials/Arrays/ArrayView-1.cpp @@ -0,0 +1,31 @@ +#include +#include +#include + +using namespace TNL; +using namespace TNL::Containers; + +int main( int argc, char* argv[] ) +{ + /**** + * Create new array + */ + const int size = 5; + Array< float > a( size ); + + /**** + * Bind an array view with it + */ + ArrayView< float > a_view = a.getView(); + auto another_view = a.getView(); + auto const_view = a.getConstView(); + + another_view = -5; + std::cout << " a = " << a << std::endl; + std::cout << " a_view = " << a_view << std::endl; + std::cout << " another_view = " << another_view << std::endl; + std::cout << " const_view = " << const_view << std::endl; + + //const_view = 3; this would not compile +} + diff --git a/Documentation/Tutorials/Arrays/ArrayView-1.cu b/Documentation/Tutorials/Arrays/ArrayView-1.cu new file mode 120000 index 000000000..ad563d95e --- /dev/null +++ b/Documentation/Tutorials/Arrays/ArrayView-1.cu @@ -0,0 +1 @@ +ArrayView-1.cpp \ No newline at end of file diff --git a/Documentation/Tutorials/Arrays/ArrayView-2.cpp b/Documentation/Tutorials/Arrays/ArrayView-2.cpp new file mode 100644 index 000000000..608fb36ce --- /dev/null +++ b/Documentation/Tutorials/Arrays/ArrayView-2.cpp @@ -0,0 +1,26 @@ +#include +#include +#include + +using namespace TNL; +using namespace TNL::Containers; + +int main( int argc, char* argv[] ) +{ + /**** + * Allocate your own data + */ + const int size = 5; + float* a = new float[ size ]; + + /**** + * Wrap the data with an array view + */ + ArrayView< float > a_view( a, size ); + a_view = -5; + + std::cout << " a_view = " << a_view << std::endl; + for( int i = 0; i < size; i++ ) + std::cout << a[ i ] << " "; +} + diff --git a/Documentation/Tutorials/Arrays/ArrayView-2.cu b/Documentation/Tutorials/Arrays/ArrayView-2.cu new file mode 120000 index 000000000..87e38a613 --- /dev/null +++ b/Documentation/Tutorials/Arrays/ArrayView-2.cu @@ -0,0 +1 @@ +ArrayView-2.cpp \ No newline at end of file diff --git a/Documentation/Tutorials/Arrays/CMakeLists.txt b/Documentation/Tutorials/Arrays/CMakeLists.txt index 9bf6bcbd1..71545a820 100644 --- a/Documentation/Tutorials/Arrays/CMakeLists.txt +++ b/Documentation/Tutorials/Arrays/CMakeLists.txt @@ -7,6 +7,13 @@ IF( BUILD_CUDA ) ADD_CUSTOM_COMMAND( COMMAND ArrayBinding-2 > ArrayBinding-2.out OUTPUT ArrayBinding-2.out ) CUDA_ADD_EXECUTABLE( ArrayBinding-3 ArrayBinding-3.cu ) ADD_CUSTOM_COMMAND( COMMAND ArrayBinding-3 > ArrayBinding-3.out OUTPUT ArrayBinding-3.out ) + CUDA_ADD_EXECUTABLE( ArrayView-1 ArrayView-1.cu ) + ADD_CUSTOM_COMMAND( COMMAND ArrayView-1 > ArrayView-1.out OUTPUT ArrayView-1.out ) + CUDA_ADD_EXECUTABLE( ArrayView-2 ArrayView-2.cu ) + ADD_CUSTOM_COMMAND( COMMAND ArrayView-2 > ArrayView-2.out OUTPUT ArrayView-2.out ) + CUDA_ADD_EXECUTABLE( ElementsAccessing-1 ElementsAccessing-1.cu ) + ADD_CUSTOM_COMMAND( COMMAND ElementsAccessing-1 > ElementsAccessing-1.out OUTPUT ElementsAccessing-1.out ) + ENDIF() IF( BUILD_CUDA ) @@ -14,7 +21,10 @@ ADD_CUSTOM_TARGET( TutorialsArrays-cuda ALL DEPENDS ArrayAllocation.out ArrayBinding-1.out ArrayBinding-2.out - ArrayBinding-3.out ) + ArrayBinding-3.out + ArrayView-1.out + ArrayView-2.out + ElementsAccessing-1.out ) ENDIF() # set input and output files diff --git a/Documentation/Tutorials/Arrays/ElementsAccessing-1.cpp b/Documentation/Tutorials/Arrays/ElementsAccessing-1.cpp new file mode 100644 index 000000000..4922fbdc1 --- /dev/null +++ b/Documentation/Tutorials/Arrays/ElementsAccessing-1.cpp @@ -0,0 +1,48 @@ +#include +#include +#include + +using namespace TNL; +using namespace TNL::Containers; + +__global__ void initKernel( ArrayView< float, Devices::Cuda > view ) +{ + int tid = threadIdx.x; + if( tid < view.getSize() ) + view[ tid ] = -tid; +} + +int main( int argc, char* argv[] ) +{ + /**** + * Create new arrays on both host and device + */ + const int size = 5; + Array< float, Devices::Host > host_array( size ); + Array< float, Devices::Cuda > device_array( size ); + + /**** + * Initiate the host array + */ + for( int i = 0; i < size; i++ ) + host_array[ i ] = i; + + /**** + * Prepare array view for the device array - we will pass it to a CUDA kernel. + * NOTE: Better way is to use ArrayView::evaluate or ParallelFor, this is just + * example. + */ + auto device_view = device_array.getView(); + + /**** + * Call CUDA kernel to initiate the array on the device + */ + initKernel<<< 1, size >>>( device_view ); + + /**** + * Print the results + */ + std::cout << " host_array = " << host_array << std::endl; + std::cout << " device_array = " << device_array << std::endl; +} + diff --git a/Documentation/Tutorials/Arrays/ElementsAccessing-1.cu b/Documentation/Tutorials/Arrays/ElementsAccessing-1.cu new file mode 120000 index 000000000..d23ce3b3d --- /dev/null +++ b/Documentation/Tutorials/Arrays/ElementsAccessing-1.cu @@ -0,0 +1 @@ +ElementsAccessing-1.cpp \ No newline at end of file diff --git a/Documentation/Tutorials/Arrays/main-page.md b/Documentation/Tutorials/Arrays/main-page.md index 95cee092b..fe4c27137 100644 --- a/Documentation/Tutorials/Arrays/main-page.md +++ b/Documentation/Tutorials/Arrays/main-page.md @@ -50,4 +50,45 @@ The result is: ## Array views -Because of the data sharing, TNL Array is relatively complicated structure. In many situations, we prefer lightweight structure which only encapsulates the data pointer and keeps information about the data size. Passing array structure to GPU kernel can be one example. For this purpose there is ```ArrayView``` in TNL. It templated structure having the same template parameters as ```Array``` (it means ```Value```, ```Device``` and ```Index```). In fact, it is recommended to use ```Array``` only for the data allocation and to use ```ArrayView``` for most of the operations with the data since array view offer better functionality (for example ```ArrayView``` can be captured by lambda functions in CUDA while ```Array``` cannot). +Because of the data sharing, TNL Array is relatively complicated structure. In many situations, we prefer lightweight structure which only encapsulates the data pointer and keeps information about the data size. Passing array structure to GPU kernel can be one example. For this purpose there is ```ArrayView``` in TNL. It templated structure having the same template parameters as ```Array``` (it means ```Value```, ```Device``` and ```Index```). In fact, it is recommended to use ```Array``` only for the data allocation and to use ```ArrayView``` for most of the operations with the data since array view offer better functionality (for example ```ArrayView``` can be captured by lambda functions in CUDA while ```Array``` cannot). The following code snippet shows how to create an array view. + +\include ArrayView-1.cpp + +Its output is: + +\include ArrayView-1.out + +Of course, one may bind his own data into array view: + +\include ArrayView-2.cpp + +Output: + +\include ArrayView-2.out + +Array view never deallocate the memory managed by it. + +## Accessing the array elements + +There are two ways how to work with the array (or array view) elements - using the indexing operator (```operator[]```) which is more efficient or methods ```setElement``` and ```getElement``` which is more flexible. + +### Accessing the array elements with ```operator[]``` + +Indexing operator ```operator[]``` is implemented in both ```Array``` and ```ArrayView``` and it is defined as ```__cuda_callable__```. It means that it can be called even in CUDA kernels if the data is allocated on GPU, i.e. the ```Device``` parameter is ```Devicess::Cuda```. This operator returns a reference to given array element and so it is very efficient. However, calling this operator from host for data allocated in device (or vice versa) leads to segmentation fault (on the host system) or broken state of the device. It means: + +* You may call the ```operator[]``` on the **host** only for data allocated on the **host** (with device ```Devices::Host```). +* You may call the ```operator[]``` on the **device** only for data allocated on the **device** (with device ```Devices::Cuda```). + +The following example shows use of ```operator[]```. + +\include ElementsAccessing-1.cpp + +Output: + +\include ElementsAccessing-1.out + +In general in TNL, each method defined as ```__cuda_callable__``` can be called from the CUDA kernels. The method ```ArrayView::getSize``` is another example. We also would like to point the reader to better ways of arrays initiation for example with method ```ArrayView::evaluate``` or with ```ParalleFor```. + +### Accessing the array element with ```setElement``` and ```getElement``` + + -- GitLab From f9941f924183d5b586048d83bcbc8987f2e78714 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Tue, 11 Jun 2019 14:21:57 +0200 Subject: [PATCH 62/93] Writing arrays and vectors tutorial. --- .../Tutorials/Arrays/ArrayBinding-2.cpp | 2 +- .../Tutorials/Arrays/ArrayViewEvaluate.cpp | 34 + .../Tutorials/Arrays/ArrayViewEvaluate.cu | 1 + Documentation/Tutorials/Arrays/CMakeLists.txt | 14 +- .../Tutorials/Arrays/ContainsValue.cpp | 52 + .../Tutorials/Arrays/ContainsValue.cu | 1 + .../Tutorials/Arrays/ElementsAccessing-2.cpp | 40 + .../Tutorials/Arrays/ElementsAccessing-2.cu | 1 + Documentation/Tutorials/Arrays/main-page.md | 42 +- Documentation/Tutorials/CMakeLists.txt | 3 +- .../Tutorials/Vectors/CMakeLists.txt | 25 + Documentation/Tutorials/Vectors/Doxyfile.in | 2506 +++++++++++++++++ Documentation/Tutorials/Vectors/main-page.md | 14 + src/TNL/Containers/ArrayView.h | 8 +- src/TNL/Containers/ArrayView.hpp | 12 +- 15 files changed, 2741 insertions(+), 14 deletions(-) create mode 100644 Documentation/Tutorials/Arrays/ArrayViewEvaluate.cpp create mode 120000 Documentation/Tutorials/Arrays/ArrayViewEvaluate.cu create mode 100644 Documentation/Tutorials/Arrays/ContainsValue.cpp create mode 120000 Documentation/Tutorials/Arrays/ContainsValue.cu create mode 100644 Documentation/Tutorials/Arrays/ElementsAccessing-2.cpp create mode 120000 Documentation/Tutorials/Arrays/ElementsAccessing-2.cu create mode 100644 Documentation/Tutorials/Vectors/CMakeLists.txt create mode 100644 Documentation/Tutorials/Vectors/Doxyfile.in create mode 100644 Documentation/Tutorials/Vectors/main-page.md diff --git a/Documentation/Tutorials/Arrays/ArrayBinding-2.cpp b/Documentation/Tutorials/Arrays/ArrayBinding-2.cpp index 8200d66e2..a52e873f0 100644 --- a/Documentation/Tutorials/Arrays/ArrayBinding-2.cpp +++ b/Documentation/Tutorials/Arrays/ArrayBinding-2.cpp @@ -39,6 +39,6 @@ int main( int argc, char* argv[] ) /**** * Print the initialized array */ - std::cout << "a data in initArray function is " << a.getData() << std::endl; + std::cout << "a data in main function is " << a.getData() << std::endl; std::cout << "a in main function is " << a << std::endl; } diff --git a/Documentation/Tutorials/Arrays/ArrayViewEvaluate.cpp b/Documentation/Tutorials/Arrays/ArrayViewEvaluate.cpp new file mode 100644 index 000000000..2bbf89a4d --- /dev/null +++ b/Documentation/Tutorials/Arrays/ArrayViewEvaluate.cpp @@ -0,0 +1,34 @@ +#include +#include +#include + +using namespace TNL; +using namespace TNL::Containers; + +int main( int argc, char* argv[] ) +{ + /**** + * Create new arrays + */ + const int size = 10; + Array< float, Devices::Cuda > a( size ), b( size ); + b = 0; + + /**** + * Create an ArrayView and use it for initiation + */ + auto a_view = a.getView(); + a_view.evaluate( [] __cuda_callable__ ( int i ) -> float { return i; } ); + + /**** + * Initiate elements of b with indexes 0-4 using a_view + */ + b.getView().evaluate( [=] __cuda_callable__ ( int i ) -> float { return a_view[ i ] + 4.0; }, 0, 5 ); + + /**** + * Print the results + */ + std::cout << " a = " << a << std::endl; + std::cout << " b = " << b << std::endl; +} + diff --git a/Documentation/Tutorials/Arrays/ArrayViewEvaluate.cu b/Documentation/Tutorials/Arrays/ArrayViewEvaluate.cu new file mode 120000 index 000000000..c457e9413 --- /dev/null +++ b/Documentation/Tutorials/Arrays/ArrayViewEvaluate.cu @@ -0,0 +1 @@ +ArrayViewEvaluate.cpp \ No newline at end of file diff --git a/Documentation/Tutorials/Arrays/CMakeLists.txt b/Documentation/Tutorials/Arrays/CMakeLists.txt index 71545a820..9e8d0864f 100644 --- a/Documentation/Tutorials/Arrays/CMakeLists.txt +++ b/Documentation/Tutorials/Arrays/CMakeLists.txt @@ -11,9 +11,14 @@ IF( BUILD_CUDA ) ADD_CUSTOM_COMMAND( COMMAND ArrayView-1 > ArrayView-1.out OUTPUT ArrayView-1.out ) CUDA_ADD_EXECUTABLE( ArrayView-2 ArrayView-2.cu ) ADD_CUSTOM_COMMAND( COMMAND ArrayView-2 > ArrayView-2.out OUTPUT ArrayView-2.out ) + CUDA_ADD_EXECUTABLE( ArrayViewEvaluate ArrayViewEvaluate.cu ) + ADD_CUSTOM_COMMAND( COMMAND ArrayViewEvaluate > ArrayViewEvaluate.out OUTPUT ArrayViewEvaluate.out ) + CUDA_ADD_EXECUTABLE( ContainsValue ContainsValue.cu ) + ADD_CUSTOM_COMMAND( COMMAND ContainsValue > ContainsValue.out OUTPUT ContainsValue.out ) CUDA_ADD_EXECUTABLE( ElementsAccessing-1 ElementsAccessing-1.cu ) ADD_CUSTOM_COMMAND( COMMAND ElementsAccessing-1 > ElementsAccessing-1.out OUTPUT ElementsAccessing-1.out ) - + CUDA_ADD_EXECUTABLE( ElementsAccessing-2 ElementsAccessing-2.cu ) + ADD_CUSTOM_COMMAND( COMMAND ElementsAccessing-2 > ElementsAccessing-2.out OUTPUT ElementsAccessing-2.out ) ENDIF() IF( BUILD_CUDA ) @@ -24,7 +29,10 @@ ADD_CUSTOM_TARGET( TutorialsArrays-cuda ALL DEPENDS ArrayBinding-3.out ArrayView-1.out ArrayView-2.out - ElementsAccessing-1.out ) + ContainsValue.out + ElementsAccessing-1.out + ElementsAccessing-2.out + ArrayViewEvaluate.out ) ENDIF() # set input and output files @@ -35,7 +43,7 @@ set(DOXYGEN_OUT ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile) configure_file(${DOXYGEN_IN} ${DOXYGEN_OUT} @ONLY) # note the option ALL which allows to build the docs together with the application -add_custom_target( doc_doxygen_tutorial_1 ALL +add_custom_target( doc_doxygen_tutorial_arrays ALL COMMAND ${DOXYGEN_EXECUTABLE} ${DOXYGEN_OUT} WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} COMMENT "Generating API documentation with Doxygen" diff --git a/Documentation/Tutorials/Arrays/ContainsValue.cpp b/Documentation/Tutorials/Arrays/ContainsValue.cpp new file mode 100644 index 000000000..1b1f30e4d --- /dev/null +++ b/Documentation/Tutorials/Arrays/ContainsValue.cpp @@ -0,0 +1,52 @@ +#include +#include +#include + +using namespace TNL; +using namespace TNL::Containers; + +int main( int argc, char* argv[] ) +{ + /**** + * Create new arrays and initiate them + */ + const int size = 10; + Array< float, Devices::Cuda > a( size ), b( size ); + a = 0; + b.getView().evaluate( [=] __cuda_callable__ ( int i ) -> float { return i; } ); + + /**** + * Test the values store in the arrays + */ + if( a.containsValue( 0.0 ) ) + std::cout << "a contains 0" << std::endl; + + if( a.containsValue( 1.0 ) ) + std::cout << "a contains 1" << std::endl; + + if( b.containsValue( 0.0 ) ) + std::cout << "b contains 0" << std::endl; + + if( b.containsValue( 1.0 ) ) + std::cout << "b contains 1" << std::endl; + + if( a.containsOnlyValue( 0.0 ) ) + std::cout << "a contains only 0" << std::endl; + + if( a.containsOnlyValue( 1.0 ) ) + std::cout << "a contains only 1" << std::endl; + + if( b.containsOnlyValue( 0.0 ) ) + std::cout << "b contains only 0" << std::endl; + + if( b.containsOnlyValue( 1.0 ) ) + std::cout << "b contains only 1" << std::endl; + + /**** + * Change the first half of b and test it again + */ + b.getView().evaluate( [=] __cuda_callable__ ( int i ) -> float { return 0.0; }, 0, 5 ); + if( b.containsOnlyValue( 0.0, 0, 5 ) ) + std::cout << "First five elements of b contains only 0" << std::endl; +} + diff --git a/Documentation/Tutorials/Arrays/ContainsValue.cu b/Documentation/Tutorials/Arrays/ContainsValue.cu new file mode 120000 index 000000000..015d07af1 --- /dev/null +++ b/Documentation/Tutorials/Arrays/ContainsValue.cu @@ -0,0 +1 @@ +ContainsValue.cpp \ No newline at end of file diff --git a/Documentation/Tutorials/Arrays/ElementsAccessing-2.cpp b/Documentation/Tutorials/Arrays/ElementsAccessing-2.cpp new file mode 100644 index 000000000..b468e41aa --- /dev/null +++ b/Documentation/Tutorials/Arrays/ElementsAccessing-2.cpp @@ -0,0 +1,40 @@ +#include +#include +#include + +using namespace TNL; +using namespace TNL::Containers; + +int main( int argc, char* argv[] ) +{ + /**** + * Create new arrays on both host and device + */ + const int size = 5; + Array< float, Devices::Host > host_array( size ); + Array< float, Devices::Cuda > device_array( size ); + + /**** + * Initiate the arrays with setElement + */ + for( int i = 0; i < size; i++ ) + { + host_array.setElement( i, i ); + device_array.setElement( i, i ); + } + + /**** + * Compare the arrays using getElement + */ + for( int i = 0; i < size; i++ ) + if( host_array.getElement( i ) == device_array.getElement( i ) ) + std::cout << "Elements at position " << i << " match." << std::endl; + + /**** + * Print the results + */ + std::cout << std::endl; + std::cout << "host_array = " << host_array << std::endl; + std::cout << "device_array = " << device_array << std::endl; +} + diff --git a/Documentation/Tutorials/Arrays/ElementsAccessing-2.cu b/Documentation/Tutorials/Arrays/ElementsAccessing-2.cu new file mode 120000 index 000000000..8b789d324 --- /dev/null +++ b/Documentation/Tutorials/Arrays/ElementsAccessing-2.cu @@ -0,0 +1 @@ +ElementsAccessing-2.cpp \ No newline at end of file diff --git a/Documentation/Tutorials/Arrays/main-page.md b/Documentation/Tutorials/Arrays/main-page.md index fe4c27137..a410441ba 100644 --- a/Documentation/Tutorials/Arrays/main-page.md +++ b/Documentation/Tutorials/Arrays/main-page.md @@ -2,11 +2,11 @@ ## Introduction -This tutorial introduces arrays and vectors in TNL. Array is one of the most important structure for memory management. Vector, in addition, offers also basic operations from linear algebra. Methods implemented in arrays and vectors are particularly usefull for GPU programming. From this point of view, the reader will learn how to easily allocate memory on GPU, transfer data between GPU and CPU but also, how to initialise data allocated on GPU and perform parallel reduction and vector operations without writting low-level CUDA kernels. In addition, the resulting code is hardware platform independent, so it can be ran on CPU without any changes. +This tutorial introduces arrays in TNL. Array is one of the most important structure for memory management. Methods implemented in arrays are particularly usefull for GPU programming. From this point of view, the reader will learn how to easily allocate memory on GPU, transfer data between GPU and CPU but also, how to initialise data allocated on GPU. In addition, the resulting code is hardware platform independent, so it can be ran on CPU without any changes. ## Arrays -Array is templated class define in namespace ```TNL::Containers``` having three template parameters: +Array is templated class defined in namespace ```TNL::Containers``` having three template parameters: * ```Value``` is type of data to be stored in the array * ```Device``` is the device wheer the array is allocated. Currently it can be either ```Devices::Host``` for CPU or ```Devices::Cuda``` for GPU supporting CUDA. @@ -66,7 +66,7 @@ Output: \include ArrayView-2.out -Array view never deallocate the memory managed by it. +Array view never allocated or deallocate the memory managed by it. Therefore it can be created even in CUDA kernels which is not true for ```Array```. ## Accessing the array elements @@ -91,4 +91,40 @@ In general in TNL, each method defined as ```__cuda_callable__``` can be called ### Accessing the array element with ```setElement``` and ```getElement``` +On the other hand, the methods ```setElement``` and ```getElement``` can be called **from the host only** no matter where the array is allocated. None of the methods can be used in CUDA kernels. ```getElement``` returns copy of an element rather than a reference. Therefore it is slightly slower. If the array is on GPU, the array element is copied from the device on the host (or vice versa) which is significantly slower. In those parts of code where the perfomance matters, these methods shall not be called. Their use is, however, much easier and they allow to write one simple code for both CPU and GPU. Both methods are good candidates for: + +* reading/wiriting of only few elements in the array +* arrays inititation which is done only once and it is not time critical part of a code +* debugging purposes + +The following example shows the use of ```getElement``` and ```setElement```: + +\include ElementsAccessing-2.cpp + +Output: + +\include ElementsAccessing-2.out + +## Arrays initiation with lambdas + +More eifficient and still quite simple method for the arrays initiation is with the use of C++ lambda functions and method ```evaluate```. This method is implemented in ```ArrayView``` only. As an argument a lambda function is passed which is then evaluated for all elemeents. Optionaly one may define only subinterval of element indexes where the lambda shall be evaluated. If the underlaying array is allocated on GPU, the lambda function is called from CUDA kernel. This is why it is more efficient than use of ```setElement```. On the other hand, one must be carefull to use only ```__cuda_callable__``` methods inside the lambda. The use of the method ```evaluate``` demonstrates the following example. + +\include ArrayViewEvaluate.cpp + +Output: + +\include ArrayViewEvaluate.out + +## Checking the array contents + +Methods ```containsValue``` and ```containsOnlyValue``` serve for testing the contents of the arrays. ```containsValue``` returns ```true``` of there is at least one element in the array with given value. ```containsOnlyValue``` returnd ```true``` only if all elements of the array equal given value. The test can be restricted to subinterval of array elements. Both methods are implemented in ```Array``` as well as in ```ArrayView```. See the following code snippet for example of use. + +\include ContainsValue.cpp + +Output: + +\include ContainsValue.out + +## IO operations with Arrays + diff --git a/Documentation/Tutorials/CMakeLists.txt b/Documentation/Tutorials/CMakeLists.txt index fae1f7a51..15e5b3623 100644 --- a/Documentation/Tutorials/CMakeLists.txt +++ b/Documentation/Tutorials/CMakeLists.txt @@ -1 +1,2 @@ -add_subdirectory( Arrays ) \ No newline at end of file +add_subdirectory( Arrays ) +add_subdirectory( Vectors ) \ No newline at end of file diff --git a/Documentation/Tutorials/Vectors/CMakeLists.txt b/Documentation/Tutorials/Vectors/CMakeLists.txt new file mode 100644 index 000000000..ab3fedc30 --- /dev/null +++ b/Documentation/Tutorials/Vectors/CMakeLists.txt @@ -0,0 +1,25 @@ +#IF( BUILD_CUDA ) +# CUDA_ADD_EXECUTABLE( ArrayAllocation ArrayAllocation.cu ) +# ADD_CUSTOM_COMMAND( COMMAND ArrayAllocation > ArrayAllocation.out OUTPUT ArrayAllocation.out ) +#ENDIF() + +#IF( BUILD_CUDA ) +#ADD_CUSTOM_TARGET( TutorialsVectors-cuda ALL DEPENDS +# ArrayViewEvaluate.out ) +#ENDIF() + +# set input and output files +set(DOXYGEN_IN ${CMAKE_CURRENT_SOURCE_DIR}/Doxyfile.in) +set(DOXYGEN_OUT ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile) + +# request to configure the file +configure_file(${DOXYGEN_IN} ${DOXYGEN_OUT} @ONLY) + +# note the option ALL which allows to build the docs together with the application +add_custom_target( doc_doxygen_tutorial_vectors ALL + COMMAND ${DOXYGEN_EXECUTABLE} ${DOXYGEN_OUT} + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + COMMENT "Generating API documentation with Doxygen" + VERBATIM ) + +INSTALL( DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/html/ DESTINATION ${CMAKE_INSTALL_PREFIX}/share/doc/tnl/html/Tutorials/Arrays ) \ No newline at end of file diff --git a/Documentation/Tutorials/Vectors/Doxyfile.in b/Documentation/Tutorials/Vectors/Doxyfile.in new file mode 100644 index 000000000..cf4f54ee2 --- /dev/null +++ b/Documentation/Tutorials/Vectors/Doxyfile.in @@ -0,0 +1,2506 @@ +# Doxyfile 1.8.13 + +# This file describes the settings to be used by the documentation system +# doxygen (www.doxygen.org) for a project. +# +# All text after a double hash (##) is considered a comment and is placed in +# front of the TAG it is preceding. +# +# All text after a single hash (#) is considered a comment and will be ignored. +# The format is: +# TAG = value [value, ...] +# For lists, items can also be appended using: +# TAG += value [value, ...] +# Values that contain spaces should be placed between quotes (\" \"). + +#--------------------------------------------------------------------------- +# Project related configuration options +#--------------------------------------------------------------------------- + +# This tag specifies the encoding used for all characters in the config file +# that follow. The default is UTF-8 which is also the encoding used for all text +# before the first occurrence of this tag. Doxygen uses libiconv (or the iconv +# built into libc) for the transcoding. See http://www.gnu.org/software/libiconv +# for the list of possible encodings. +# The default value is: UTF-8. + +DOXYFILE_ENCODING = UTF-8 + +# The PROJECT_NAME tag is a single word (or a sequence of words surrounded by +# double-quotes, unless you are using Doxywizard) that should identify the +# project for which the documentation is generated. This name is used in the +# title of most generated pages and in a few other places. +# The default value is: My Project. + +PROJECT_NAME = "Template Numerical Library" + +# The PROJECT_NUMBER tag can be used to enter a project or revision number. This +# could be handy for archiving the generated documentation or if some version +# control system is used. + +PROJECT_NUMBER = 0.1 + +# Using the PROJECT_BRIEF tag one can provide an optional one line description +# for a project that appears at the top of each page and should give viewer a +# quick idea about the purpose of the project. Keep the description short. + +PROJECT_BRIEF = + +# With the PROJECT_LOGO tag one can specify a logo or an icon that is included +# in the documentation. The maximum height of the logo should not exceed 55 +# pixels and the maximum width should not exceed 200 pixels. Doxygen will copy +# the logo to the output directory. + +PROJECT_LOGO = + +# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path +# into which the generated documentation will be written. If a relative path is +# entered, it will be relative to the location where doxygen was started. If +# left blank the current directory will be used. + +OUTPUT_DIRECTORY = @CMAKE_CURRENT_BINARY_DIR@ + +# If the CREATE_SUBDIRS tag is set to YES then doxygen will create 4096 sub- +# directories (in 2 levels) under the output directory of each output format and +# will distribute the generated files over these directories. Enabling this +# option can be useful when feeding doxygen a huge amount of source files, where +# putting all generated files in the same directory would otherwise causes +# performance problems for the file system. +# The default value is: NO. + +CREATE_SUBDIRS = NO + +# If the ALLOW_UNICODE_NAMES tag is set to YES, doxygen will allow non-ASCII +# characters to appear in the names of generated files. If set to NO, non-ASCII +# characters will be escaped, for example _xE3_x81_x84 will be used for Unicode +# U+3044. +# The default value is: NO. + +ALLOW_UNICODE_NAMES = NO + +# The OUTPUT_LANGUAGE tag is used to specify the language in which all +# documentation generated by doxygen is written. Doxygen will use this +# information to generate all constant output in the proper language. +# Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Catalan, Chinese, +# Chinese-Traditional, Croatian, Czech, Danish, Dutch, English (United States), +# Esperanto, Farsi (Persian), Finnish, French, German, Greek, Hungarian, +# Indonesian, Italian, Japanese, Japanese-en (Japanese with English messages), +# Korean, Korean-en (Korean with English messages), Latvian, Lithuanian, +# Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, Romanian, Russian, +# Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, Swedish, Turkish, +# Ukrainian and Vietnamese. +# The default value is: English. + +OUTPUT_LANGUAGE = English + +# If the BRIEF_MEMBER_DESC tag is set to YES, doxygen will include brief member +# descriptions after the members that are listed in the file and class +# documentation (similar to Javadoc). Set to NO to disable this. +# The default value is: YES. + +BRIEF_MEMBER_DESC = YES + +# If the REPEAT_BRIEF tag is set to YES, doxygen will prepend the brief +# description of a member or function before the detailed description +# +# Note: If both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the +# brief descriptions will be completely suppressed. +# The default value is: YES. + +REPEAT_BRIEF = YES + +# This tag implements a quasi-intelligent brief description abbreviator that is +# used to form the text in various listings. Each string in this list, if found +# as the leading text of the brief description, will be stripped from the text +# and the result, after processing the whole list, is used as the annotated +# text. Otherwise, the brief description is used as-is. If left blank, the +# following values are used ($name is automatically replaced with the name of +# the entity):The $name class, The $name widget, The $name file, is, provides, +# specifies, contains, represents, a, an and the. + +ABBREVIATE_BRIEF = "The $name class" \ + "The $name widget" \ + "The $name file" \ + is \ + provides \ + specifies \ + contains \ + represents \ + a \ + an \ + the + +# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then +# doxygen will generate a detailed section even if there is only a brief +# description. +# The default value is: NO. + +ALWAYS_DETAILED_SEC = NO + +# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all +# inherited members of a class in the documentation of that class as if those +# members were ordinary class members. Constructors, destructors and assignment +# operators of the base classes will not be shown. +# The default value is: NO. + +INLINE_INHERITED_MEMB = NO + +# If the FULL_PATH_NAMES tag is set to YES, doxygen will prepend the full path +# before files name in the file list and in the header files. If set to NO the +# shortest path that makes the file name unique will be used +# The default value is: YES. + +FULL_PATH_NAMES = YES + +# The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path. +# Stripping is only done if one of the specified strings matches the left-hand +# part of the path. The tag can be used to show relative paths in the file list. +# If left blank the directory from which doxygen is run is used as the path to +# strip. +# +# Note that you can specify absolute paths here, but also relative paths, which +# will be relative from the directory where doxygen is started. +# This tag requires that the tag FULL_PATH_NAMES is set to YES. + +STRIP_FROM_PATH = ../ + +# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the +# path mentioned in the documentation of a class, which tells the reader which +# header file to include in order to use a class. If left blank only the name of +# the header file containing the class definition is used. Otherwise one should +# specify the list of include paths that are normally passed to the compiler +# using the -I flag. + +STRIP_FROM_INC_PATH = + +# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but +# less readable) file names. This can be useful is your file systems doesn't +# support long names like on DOS, Mac, or CD-ROM. +# The default value is: NO. + +SHORT_NAMES = NO + +# If the JAVADOC_AUTOBRIEF tag is set to YES then doxygen will interpret the +# first line (until the first dot) of a Javadoc-style comment as the brief +# description. If set to NO, the Javadoc-style will behave just like regular Qt- +# style comments (thus requiring an explicit @brief command for a brief +# description.) +# The default value is: NO. + +JAVADOC_AUTOBRIEF = NO + +# If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first +# line (until the first dot) of a Qt-style comment as the brief description. If +# set to NO, the Qt-style will behave just like regular Qt-style comments (thus +# requiring an explicit \brief command for a brief description.) +# The default value is: NO. + +QT_AUTOBRIEF = NO + +# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make doxygen treat a +# multi-line C++ special comment block (i.e. a block of //! or /// comments) as +# a brief description. This used to be the default behavior. The new default is +# to treat a multi-line C++ comment block as a detailed description. Set this +# tag to YES if you prefer the old behavior instead. +# +# Note that setting this tag to YES also means that rational rose comments are +# not recognized any more. +# The default value is: NO. + +MULTILINE_CPP_IS_BRIEF = NO + +# If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the +# documentation from any documented member that it re-implements. +# The default value is: YES. + +INHERIT_DOCS = YES + +# If the SEPARATE_MEMBER_PAGES tag is set to YES then doxygen will produce a new +# page for each member. If set to NO, the documentation of a member will be part +# of the file/class/namespace that contains it. +# The default value is: NO. + +SEPARATE_MEMBER_PAGES = NO + +# The TAB_SIZE tag can be used to set the number of spaces in a tab. Doxygen +# uses this value to replace tabs by spaces in code fragments. +# Minimum value: 1, maximum value: 16, default value: 4. + +TAB_SIZE = 4 + +# This tag can be used to specify a number of aliases that act as commands in +# the documentation. An alias has the form: +# name=value +# For example adding +# "sideeffect=@par Side Effects:\n" +# will allow you to put the command \sideeffect (or @sideeffect) in the +# documentation, which will result in a user-defined paragraph with heading +# "Side Effects:". You can put \n's in the value part of an alias to insert +# newlines. + +ALIASES = + +# This tag can be used to specify a number of word-keyword mappings (TCL only). +# A mapping has the form "name=value". For example adding "class=itcl::class" +# will allow you to use the command class in the itcl::class meaning. + +TCL_SUBST = + +# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources +# only. Doxygen will then generate output that is more tailored for C. For +# instance, some of the names that are used will be different. The list of all +# members will be omitted, etc. +# The default value is: NO. + +OPTIMIZE_OUTPUT_FOR_C = NO + +# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or +# Python sources only. Doxygen will then generate output that is more tailored +# for that language. For instance, namespaces will be presented as packages, +# qualified scopes will look different, etc. +# The default value is: NO. + +OPTIMIZE_OUTPUT_JAVA = NO + +# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran +# sources. Doxygen will then generate output that is tailored for Fortran. +# The default value is: NO. + +OPTIMIZE_FOR_FORTRAN = NO + +# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL +# sources. Doxygen will then generate output that is tailored for VHDL. +# The default value is: NO. + +OPTIMIZE_OUTPUT_VHDL = NO + +# Doxygen selects the parser to use depending on the extension of the files it +# parses. With this tag you can assign which parser to use for a given +# extension. Doxygen has a built-in mapping, but you can override or extend it +# using this tag. The format is ext=language, where ext is a file extension, and +# language is one of the parsers supported by doxygen: IDL, Java, Javascript, +# C#, C, C++, D, PHP, Objective-C, Python, Fortran (fixed format Fortran: +# FortranFixed, free formatted Fortran: FortranFree, unknown formatted Fortran: +# Fortran. In the later case the parser tries to guess whether the code is fixed +# or free formatted code, this is the default for Fortran type files), VHDL. For +# instance to make doxygen treat .inc files as Fortran files (default is PHP), +# and .f files as C (default is Fortran), use: inc=Fortran f=C. +# +# Note: For files without extension you can use no_extension as a placeholder. +# +# Note that for custom extensions you also need to set FILE_PATTERNS otherwise +# the files are not read by doxygen. + +EXTENSION_MAPPING = + +# If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments +# according to the Markdown format, which allows for more readable +# documentation. See http://daringfireball.net/projects/markdown/ for details. +# The output of markdown processing is further processed by doxygen, so you can +# mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in +# case of backward compatibilities issues. +# The default value is: YES. + +MARKDOWN_SUPPORT = YES + +# When the TOC_INCLUDE_HEADINGS tag is set to a non-zero value, all headings up +# to that level are automatically included in the table of contents, even if +# they do not have an id attribute. +# Note: This feature currently applies only to Markdown headings. +# Minimum value: 0, maximum value: 99, default value: 0. +# This tag requires that the tag MARKDOWN_SUPPORT is set to YES. + +TOC_INCLUDE_HEADINGS = 0 + +# When enabled doxygen tries to link words that correspond to documented +# classes, or namespaces to their corresponding documentation. Such a link can +# be prevented in individual cases by putting a % sign in front of the word or +# globally by setting AUTOLINK_SUPPORT to NO. +# The default value is: YES. + +AUTOLINK_SUPPORT = NO + +# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want +# to include (a tag file for) the STL sources as input, then you should set this +# tag to YES in order to let doxygen match functions declarations and +# definitions whose arguments contain STL classes (e.g. func(std::string); +# versus func(std::string) {}). This also make the inheritance and collaboration +# diagrams that involve STL classes more complete and accurate. +# The default value is: NO. + +BUILTIN_STL_SUPPORT = YES + +# If you use Microsoft's C++/CLI language, you should set this option to YES to +# enable parsing support. +# The default value is: NO. + +CPP_CLI_SUPPORT = NO + +# Set the SIP_SUPPORT tag to YES if your project consists of sip (see: +# http://www.riverbankcomputing.co.uk/software/sip/intro) sources only. Doxygen +# will parse them like normal C++ but will assume all classes use public instead +# of private inheritance when no explicit protection keyword is present. +# The default value is: NO. + +SIP_SUPPORT = NO + +# For Microsoft's IDL there are propget and propput attributes to indicate +# getter and setter methods for a property. Setting this option to YES will make +# doxygen to replace the get and set methods by a property in the documentation. +# This will only work if the methods are indeed getting or setting a simple +# type. If this is not the case, or you want to show the methods anyway, you +# should set this option to NO. +# The default value is: YES. + +IDL_PROPERTY_SUPPORT = YES + +# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC +# tag is set to YES then doxygen will reuse the documentation of the first +# member in the group (if any) for the other members of the group. By default +# all members of a group must be documented explicitly. +# The default value is: NO. + +DISTRIBUTE_GROUP_DOC = NO + +# If one adds a struct or class to a group and this option is enabled, then also +# any nested class or struct is added to the same group. By default this option +# is disabled and one has to add nested compounds explicitly via \ingroup. +# The default value is: NO. + +GROUP_NESTED_COMPOUNDS = NO + +# Set the SUBGROUPING tag to YES to allow class member groups of the same type +# (for instance a group of public functions) to be put as a subgroup of that +# type (e.g. under the Public Functions section). Set it to NO to prevent +# subgrouping. Alternatively, this can be done per class using the +# \nosubgrouping command. +# The default value is: YES. + +SUBGROUPING = YES + +# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and unions +# are shown inside the group in which they are included (e.g. using \ingroup) +# instead of on a separate page (for HTML and Man pages) or section (for LaTeX +# and RTF). +# +# Note that this feature does not work in combination with +# SEPARATE_MEMBER_PAGES. +# The default value is: NO. + +INLINE_GROUPED_CLASSES = NO + +# When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and unions +# with only public data fields or simple typedef fields will be shown inline in +# the documentation of the scope in which they are defined (i.e. file, +# namespace, or group documentation), provided this scope is documented. If set +# to NO, structs, classes, and unions are shown on a separate page (for HTML and +# Man pages) or section (for LaTeX and RTF). +# The default value is: NO. + +INLINE_SIMPLE_STRUCTS = NO + +# When TYPEDEF_HIDES_STRUCT tag is enabled, a typedef of a struct, union, or +# enum is documented as struct, union, or enum with the name of the typedef. So +# typedef struct TypeS {} TypeT, will appear in the documentation as a struct +# with name TypeT. When disabled the typedef will appear as a member of a file, +# namespace, or class. And the struct will be named TypeS. This can typically be +# useful for C code in case the coding convention dictates that all compound +# types are typedef'ed and only the typedef is referenced, never the tag name. +# The default value is: NO. + +TYPEDEF_HIDES_STRUCT = NO + +# The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This +# cache is used to resolve symbols given their name and scope. Since this can be +# an expensive process and often the same symbol appears multiple times in the +# code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small +# doxygen will become slower. If the cache is too large, memory is wasted. The +# cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range +# is 0..9, the default is 0, corresponding to a cache size of 2^16=65536 +# symbols. At the end of a run doxygen will report the cache usage and suggest +# the optimal cache size from a speed point of view. +# Minimum value: 0, maximum value: 9, default value: 0. + +LOOKUP_CACHE_SIZE = 0 + +#--------------------------------------------------------------------------- +# Build related configuration options +#--------------------------------------------------------------------------- + +# If the EXTRACT_ALL tag is set to YES, doxygen will assume all entities in +# documentation are documented, even if no documentation was available. Private +# class members and static file members will be hidden unless the +# EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES. +# Note: This will also disable the warnings about undocumented members that are +# normally produced when WARNINGS is set to YES. +# The default value is: NO. + +EXTRACT_ALL = NO + +# If the EXTRACT_PRIVATE tag is set to YES, all private members of a class will +# be included in the documentation. +# The default value is: NO. + +EXTRACT_PRIVATE = NO + +# If the EXTRACT_PACKAGE tag is set to YES, all members with package or internal +# scope will be included in the documentation. +# The default value is: NO. + +EXTRACT_PACKAGE = NO + +# If the EXTRACT_STATIC tag is set to YES, all static members of a file will be +# included in the documentation. +# The default value is: NO. + +EXTRACT_STATIC = NO + +# If the EXTRACT_LOCAL_CLASSES tag is set to YES, classes (and structs) defined +# locally in source files will be included in the documentation. If set to NO, +# only classes defined in header files are included. Does not have any effect +# for Java sources. +# The default value is: YES. + +EXTRACT_LOCAL_CLASSES = YES + +# This flag is only useful for Objective-C code. If set to YES, local methods, +# which are defined in the implementation section but not in the interface are +# included in the documentation. If set to NO, only methods in the interface are +# included. +# The default value is: NO. + +EXTRACT_LOCAL_METHODS = NO + +# If this flag is set to YES, the members of anonymous namespaces will be +# extracted and appear in the documentation as a namespace called +# 'anonymous_namespace{file}', where file will be replaced with the base name of +# the file that contains the anonymous namespace. By default anonymous namespace +# are hidden. +# The default value is: NO. + +EXTRACT_ANON_NSPACES = NO + +# If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all +# undocumented members inside documented classes or files. If set to NO these +# members will be included in the various overviews, but no documentation +# section is generated. This option has no effect if EXTRACT_ALL is enabled. +# The default value is: NO. + +HIDE_UNDOC_MEMBERS = NO + +# If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all +# undocumented classes that are normally visible in the class hierarchy. If set +# to NO, these classes will be included in the various overviews. This option +# has no effect if EXTRACT_ALL is enabled. +# The default value is: NO. + +HIDE_UNDOC_CLASSES = NO + +# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend +# (class|struct|union) declarations. If set to NO, these declarations will be +# included in the documentation. +# The default value is: NO. + +HIDE_FRIEND_COMPOUNDS = NO + +# If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any +# documentation blocks found inside the body of a function. If set to NO, these +# blocks will be appended to the function's detailed documentation block. +# The default value is: NO. + +HIDE_IN_BODY_DOCS = NO + +# The INTERNAL_DOCS tag determines if documentation that is typed after a +# \internal command is included. If the tag is set to NO then the documentation +# will be excluded. Set it to YES to include the internal documentation. +# The default value is: NO. + +INTERNAL_DOCS = NO + +# If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file +# names in lower-case letters. If set to YES, upper-case letters are also +# allowed. This is useful if you have classes or files whose names only differ +# in case and if your file system supports case sensitive file names. Windows +# and Mac users are advised to set this option to NO. +# The default value is: system dependent. + +CASE_SENSE_NAMES = YES + +# If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with +# their full class and namespace scopes in the documentation. If set to YES, the +# scope will be hidden. +# The default value is: NO. + +HIDE_SCOPE_NAMES = NO + +# If the HIDE_COMPOUND_REFERENCE tag is set to NO (default) then doxygen will +# append additional text to a page's title, such as Class Reference. If set to +# YES the compound reference will be hidden. +# The default value is: NO. + +HIDE_COMPOUND_REFERENCE= NO + +# If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of +# the files that are included by a file in the documentation of that file. +# The default value is: YES. + +SHOW_INCLUDE_FILES = YES + +# If the SHOW_GROUPED_MEMB_INC tag is set to YES then Doxygen will add for each +# grouped member an include statement to the documentation, telling the reader +# which file to include in order to use the member. +# The default value is: NO. + +SHOW_GROUPED_MEMB_INC = NO + +# If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include +# files with double quotes in the documentation rather than with sharp brackets. +# The default value is: NO. + +FORCE_LOCAL_INCLUDES = NO + +# If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the +# documentation for inline members. +# The default value is: YES. + +INLINE_INFO = YES + +# If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the +# (detailed) documentation of file and class members alphabetically by member +# name. If set to NO, the members will appear in declaration order. +# The default value is: YES. + +SORT_MEMBER_DOCS = YES + +# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief +# descriptions of file, namespace and class members alphabetically by member +# name. If set to NO, the members will appear in declaration order. Note that +# this will also influence the order of the classes in the class list. +# The default value is: NO. + +SORT_BRIEF_DOCS = NO + +# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the +# (brief and detailed) documentation of class members so that constructors and +# destructors are listed first. If set to NO the constructors will appear in the +# respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS. +# Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief +# member documentation. +# Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting +# detailed member documentation. +# The default value is: NO. + +SORT_MEMBERS_CTORS_1ST = NO + +# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy +# of group names into alphabetical order. If set to NO the group names will +# appear in their defined order. +# The default value is: NO. + +SORT_GROUP_NAMES = NO + +# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by +# fully-qualified names, including namespaces. If set to NO, the class list will +# be sorted only by class name, not including the namespace part. +# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. +# Note: This option applies only to the class list, not to the alphabetical +# list. +# The default value is: NO. + +SORT_BY_SCOPE_NAME = YES + +# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper +# type resolution of all parameters of a function it will reject a match between +# the prototype and the implementation of a member function even if there is +# only one candidate or it is obvious which candidate to choose by doing a +# simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still +# accept a match between prototype and implementation in such cases. +# The default value is: NO. + +STRICT_PROTO_MATCHING = NO + +# The GENERATE_TODOLIST tag can be used to enable (YES) or disable (NO) the todo +# list. This list is created by putting \todo commands in the documentation. +# The default value is: YES. + +GENERATE_TODOLIST = YES + +# The GENERATE_TESTLIST tag can be used to enable (YES) or disable (NO) the test +# list. This list is created by putting \test commands in the documentation. +# The default value is: YES. + +GENERATE_TESTLIST = YES + +# The GENERATE_BUGLIST tag can be used to enable (YES) or disable (NO) the bug +# list. This list is created by putting \bug commands in the documentation. +# The default value is: YES. + +GENERATE_BUGLIST = YES + +# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or disable (NO) +# the deprecated list. This list is created by putting \deprecated commands in +# the documentation. +# The default value is: YES. + +GENERATE_DEPRECATEDLIST= YES + +# The ENABLED_SECTIONS tag can be used to enable conditional documentation +# sections, marked by \if ... \endif and \cond +# ... \endcond blocks. + +ENABLED_SECTIONS = + +# The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the +# initial value of a variable or macro / define can have for it to appear in the +# documentation. If the initializer consists of more lines than specified here +# it will be hidden. Use a value of 0 to hide initializers completely. The +# appearance of the value of individual variables and macros / defines can be +# controlled using \showinitializer or \hideinitializer command in the +# documentation regardless of this setting. +# Minimum value: 0, maximum value: 10000, default value: 30. + +MAX_INITIALIZER_LINES = 30 + +# Set the SHOW_USED_FILES tag to NO to disable the list of files generated at +# the bottom of the documentation of classes and structs. If set to YES, the +# list will mention the files that were used to generate the documentation. +# The default value is: YES. + +SHOW_USED_FILES = YES + +# Set the SHOW_FILES tag to NO to disable the generation of the Files page. This +# will remove the Files entry from the Quick Index and from the Folder Tree View +# (if specified). +# The default value is: YES. + +SHOW_FILES = YES + +# Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces +# page. This will remove the Namespaces entry from the Quick Index and from the +# Folder Tree View (if specified). +# The default value is: YES. + +SHOW_NAMESPACES = YES + +# The FILE_VERSION_FILTER tag can be used to specify a program or script that +# doxygen should invoke to get the current version for each file (typically from +# the version control system). Doxygen will invoke the program by executing (via +# popen()) the command command input-file, where command is the value of the +# FILE_VERSION_FILTER tag, and input-file is the name of an input file provided +# by doxygen. Whatever the program writes to standard output is used as the file +# version. For an example see the documentation. + +FILE_VERSION_FILTER = + +# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed +# by doxygen. The layout file controls the global structure of the generated +# output files in an output format independent way. To create the layout file +# that represents doxygen's defaults, run doxygen with the -l option. You can +# optionally specify a file name after the option, if omitted DoxygenLayout.xml +# will be used as the name of the layout file. +# +# Note that if you run doxygen from a directory containing a file called +# DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE +# tag is left empty. + +LAYOUT_FILE = + +# The CITE_BIB_FILES tag can be used to specify one or more bib files containing +# the reference definitions. This must be a list of .bib files. The .bib +# extension is automatically appended if omitted. This requires the bibtex tool +# to be installed. See also http://en.wikipedia.org/wiki/BibTeX for more info. +# For LaTeX the style of the bibliography can be controlled using +# LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the +# search path. See also \cite for info how to create references. + +CITE_BIB_FILES = + +#--------------------------------------------------------------------------- +# Configuration options related to warning and progress messages +#--------------------------------------------------------------------------- + +# The QUIET tag can be used to turn on/off the messages that are generated to +# standard output by doxygen. If QUIET is set to YES this implies that the +# messages are off. +# The default value is: NO. + +QUIET = NO + +# The WARNINGS tag can be used to turn on/off the warning messages that are +# generated to standard error (stderr) by doxygen. If WARNINGS is set to YES +# this implies that the warnings are on. +# +# Tip: Turn warnings on while writing the documentation. +# The default value is: YES. + +WARNINGS = YES + +# If the WARN_IF_UNDOCUMENTED tag is set to YES then doxygen will generate +# warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag +# will automatically be disabled. +# The default value is: YES. + +WARN_IF_UNDOCUMENTED = YES + +# If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for +# potential errors in the documentation, such as not documenting some parameters +# in a documented function, or documenting parameters that don't exist or using +# markup commands wrongly. +# The default value is: YES. + +WARN_IF_DOC_ERROR = YES + +# This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that +# are documented, but have no documentation for their parameters or return +# value. If set to NO, doxygen will only warn about wrong or incomplete +# parameter documentation, but not about the absence of documentation. +# The default value is: NO. + +WARN_NO_PARAMDOC = NO + +# If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when +# a warning is encountered. +# The default value is: NO. + +WARN_AS_ERROR = NO + +# The WARN_FORMAT tag determines the format of the warning messages that doxygen +# can produce. The string should contain the $file, $line, and $text tags, which +# will be replaced by the file and line number from which the warning originated +# and the warning text. Optionally the format may contain $version, which will +# be replaced by the version of the file (if it could be obtained via +# FILE_VERSION_FILTER) +# The default value is: $file:$line: $text. + +WARN_FORMAT = "$file:$line: $text" + +# The WARN_LOGFILE tag can be used to specify a file to which warning and error +# messages should be written. If left blank the output is written to standard +# error (stderr). + +WARN_LOGFILE = + +#--------------------------------------------------------------------------- +# Configuration options related to the input files +#--------------------------------------------------------------------------- + +# The INPUT tag is used to specify the files and/or directories that contain +# documented source files. You may enter file names like myfile.cpp or +# directories like /usr/src/myproject. Separate the files or directories with +# spaces. See also FILE_PATTERNS and EXTENSION_MAPPING +# Note: If this tag is empty the current directory is searched. + +INPUT = @CMAKE_CURRENT_SOURCE_DIR@/main-page.md +INPUT += @CMAKE_CURRENT_SOURCE_DIR@/../src/TNL + +# This tag can be used to specify the character encoding of the source files +# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses +# libiconv (or the iconv built into libc) for the transcoding. See the libiconv +# documentation (see: http://www.gnu.org/software/libiconv) for the list of +# possible encodings. +# The default value is: UTF-8. + +INPUT_ENCODING = UTF-8 + +# If the value of the INPUT tag contains directories, you can use the +# FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and +# *.h) to filter out the source-files in the directories. +# +# Note that for custom extensions or not directly supported extensions you also +# need to set EXTENSION_MAPPING for the extension otherwise the files are not +# read by doxygen. +# +# If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cpp, +# *.c++, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h, +# *.hh, *.hxx, *.hpp, *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc, +# *.m, *.markdown, *.md, *.mm, *.dox, *.py, *.pyw, *.f90, *.f95, *.f03, *.f08, +# *.f, *.for, *.tcl, *.vhd, *.vhdl, *.ucf and *.qsf. + +FILE_PATTERNS = *.c \ + *.cc \ + *.cxx \ + *.cpp \ + *.c++ \ + *.java \ + *.ii \ + *.ixx \ + *.ipp \ + *.i++ \ + *.inl \ + *.idl \ + *.ddl \ + *.odl \ + *.h \ + *.hh \ + *.hxx \ + *.hpp \ + *.h++ \ + *.cs \ + *.d \ + *.php \ + *.php4 \ + *.php5 \ + *.phtml \ + *.inc \ + *.m \ + *.markdown \ + *.md \ + *.mm \ + *.dox \ + *.py \ + *.pyw \ + *.f90 \ + *.f95 \ + *.f03 \ + *.f08 \ + *.f \ + *.for \ + *.tcl \ + *.vhd \ + *.vhdl \ + *.ucf \ + *.qsf + +# The RECURSIVE tag can be used to specify whether or not subdirectories should +# be searched for input files as well. +# The default value is: NO. + +RECURSIVE = YES + +# The EXCLUDE tag can be used to specify files and/or directories that should be +# excluded from the INPUT source files. This way you can easily exclude a +# subdirectory from a directory tree whose root is specified with the INPUT tag. +# +# Note that relative paths are relative to the directory from which doxygen is +# run. + +EXCLUDE = + +# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or +# directories that are symbolic links (a Unix file system feature) are excluded +# from the input. +# The default value is: NO. + +EXCLUDE_SYMLINKS = NO + +# If the value of the INPUT tag contains directories, you can use the +# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude +# certain files from those directories. +# +# Note that the wildcards are matched against the file with absolute path, so to +# exclude all test directories for example use the pattern */test/* + +EXCLUDE_PATTERNS = */Debugging/* \ + */Experimental/* \ + */Meshes/GridDetails/* \ + */Meshes/MeshDetails/* \ + */Problems/* + + +# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names +# (namespaces, classes, functions, etc.) that should be excluded from the +# output. The symbol name can be a fully qualified name, a word, or if the +# wildcard * is used, a substring. Examples: ANamespace, AClass, +# AClass::ANamespace, ANamespace::*Test +# +# Note that the wildcards are matched against the file with absolute path, so to +# exclude all test directories use the pattern */test/* + +EXCLUDE_SYMBOLS = detail::*,*::detail::* # internal namespaces +EXCLUDE_SYMBOLS += *::__*impl # internal namespaces +EXCLUDE_SYMBOLS += TNL::Assert::* # internal namespace + +# The EXAMPLE_PATH tag can be used to specify one or more files or directories +# that contain example code fragments that are included (see the \include +# command). + +EXAMPLE_PATH = @CMAKE_CURRENT_SOURCE_DIR@ \ + @CMAKE_CURRENT_BINARY_DIR@ + +# If the value of the EXAMPLE_PATH tag contains directories, you can use the +# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and +# *.h) to filter out the source-files in the directories. If left blank all +# files are included. + +EXAMPLE_PATTERNS = * + +# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be +# searched for input files to be used with the \include or \dontinclude commands +# irrespective of the value of the RECURSIVE tag. +# The default value is: NO. + +EXAMPLE_RECURSIVE = NO + +# The IMAGE_PATH tag can be used to specify one or more files or directories +# that contain images that are to be included in the documentation (see the +# \image command). + +IMAGE_PATH = + +# The INPUT_FILTER tag can be used to specify a program that doxygen should +# invoke to filter for each input file. Doxygen will invoke the filter program +# by executing (via popen()) the command: +# +# +# +# where is the value of the INPUT_FILTER tag, and is the +# name of an input file. Doxygen will then use the output that the filter +# program writes to standard output. If FILTER_PATTERNS is specified, this tag +# will be ignored. +# +# Note that the filter must not add or remove lines; it is applied before the +# code is scanned, but not when the output code is generated. If lines are added +# or removed, the anchors will not be placed correctly. +# +# Note that for custom extensions or not directly supported extensions you also +# need to set EXTENSION_MAPPING for the extension otherwise the files are not +# properly processed by doxygen. + +INPUT_FILTER = + +# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern +# basis. Doxygen will compare the file name with each pattern and apply the +# filter if there is a match. The filters are a list of the form: pattern=filter +# (like *.cpp=my_cpp_filter). See INPUT_FILTER for further information on how +# filters are used. If the FILTER_PATTERNS tag is empty or if none of the +# patterns match the file name, INPUT_FILTER is applied. +# +# Note that for custom extensions or not directly supported extensions you also +# need to set EXTENSION_MAPPING for the extension otherwise the files are not +# properly processed by doxygen. + +FILTER_PATTERNS = + +# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using +# INPUT_FILTER) will also be used to filter the input files that are used for +# producing the source files to browse (i.e. when SOURCE_BROWSER is set to YES). +# The default value is: NO. + +FILTER_SOURCE_FILES = NO + +# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file +# pattern. A pattern will override the setting for FILTER_PATTERN (if any) and +# it is also possible to disable source filtering for a specific pattern using +# *.ext= (so without naming a filter). +# This tag requires that the tag FILTER_SOURCE_FILES is set to YES. + +FILTER_SOURCE_PATTERNS = + +# If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that +# is part of the input, its contents will be placed on the main page +# (index.html). This can be useful if you have a project on for instance GitHub +# and want to reuse the introduction page also for the doxygen output. + +USE_MDFILE_AS_MAINPAGE = main-page.md + +#--------------------------------------------------------------------------- +# Configuration options related to source browsing +#--------------------------------------------------------------------------- + +# If the SOURCE_BROWSER tag is set to YES then a list of source files will be +# generated. Documented entities will be cross-referenced with these sources. +# +# Note: To get rid of all source code in the generated output, make sure that +# also VERBATIM_HEADERS is set to NO. +# The default value is: NO. + +SOURCE_BROWSER = NO + +# Setting the INLINE_SOURCES tag to YES will include the body of functions, +# classes and enums directly into the documentation. +# The default value is: NO. + +INLINE_SOURCES = NO + +# Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any +# special comment blocks from generated source code fragments. Normal C, C++ and +# Fortran comments will always remain visible. +# The default value is: YES. + +STRIP_CODE_COMMENTS = YES + +# If the REFERENCED_BY_RELATION tag is set to YES then for each documented +# function all documented functions referencing it will be listed. +# The default value is: NO. + +REFERENCED_BY_RELATION = NO + +# If the REFERENCES_RELATION tag is set to YES then for each documented function +# all documented entities called/used by that function will be listed. +# The default value is: NO. + +REFERENCES_RELATION = NO + +# If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set +# to YES then the hyperlinks from functions in REFERENCES_RELATION and +# REFERENCED_BY_RELATION lists will link to the source code. Otherwise they will +# link to the documentation. +# The default value is: YES. + +REFERENCES_LINK_SOURCE = YES + +# If SOURCE_TOOLTIPS is enabled (the default) then hovering a hyperlink in the +# source code will show a tooltip with additional information such as prototype, +# brief description and links to the definition and documentation. Since this +# will make the HTML file larger and loading of large files a bit slower, you +# can opt to disable this feature. +# The default value is: YES. +# This tag requires that the tag SOURCE_BROWSER is set to YES. + +SOURCE_TOOLTIPS = YES + +# If the USE_HTAGS tag is set to YES then the references to source code will +# point to the HTML generated by the htags(1) tool instead of doxygen built-in +# source browser. The htags tool is part of GNU's global source tagging system +# (see http://www.gnu.org/software/global/global.html). You will need version +# 4.8.6 or higher. +# +# To use it do the following: +# - Install the latest version of global +# - Enable SOURCE_BROWSER and USE_HTAGS in the config file +# - Make sure the INPUT points to the root of the source tree +# - Run doxygen as normal +# +# Doxygen will invoke htags (and that will in turn invoke gtags), so these +# tools must be available from the command line (i.e. in the search path). +# +# The result: instead of the source browser generated by doxygen, the links to +# source code will now point to the output of htags. +# The default value is: NO. +# This tag requires that the tag SOURCE_BROWSER is set to YES. + +USE_HTAGS = NO + +# If the VERBATIM_HEADERS tag is set the YES then doxygen will generate a +# verbatim copy of the header file for each class for which an include is +# specified. Set to NO to disable this. +# See also: Section \class. +# The default value is: YES. + +VERBATIM_HEADERS = NO + +# If the CLANG_ASSISTED_PARSING tag is set to YES then doxygen will use the +# clang parser (see: http://clang.llvm.org/) for more accurate parsing at the +# cost of reduced performance. This can be particularly helpful with template +# rich C++ code for which doxygen's built-in parser lacks the necessary type +# information. +# Note: The availability of this option depends on whether or not doxygen was +# generated with the -Duse-libclang=ON option for CMake. +# The default value is: NO. + +CLANG_ASSISTED_PARSING = NO + +# If clang assisted parsing is enabled you can provide the compiler with command +# line options that you would normally use when invoking the compiler. Note that +# the include paths will already be set by doxygen for the files and directories +# specified with INPUT and INCLUDE_PATH. +# This tag requires that the tag CLANG_ASSISTED_PARSING is set to YES. + +CLANG_OPTIONS = + +#--------------------------------------------------------------------------- +# Configuration options related to the alphabetical class index +#--------------------------------------------------------------------------- + +# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index of all +# compounds will be generated. Enable this if the project contains a lot of +# classes, structs, unions or interfaces. +# The default value is: YES. + +ALPHABETICAL_INDEX = YES + +# The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in +# which the alphabetical index list will be split. +# Minimum value: 1, maximum value: 20, default value: 5. +# This tag requires that the tag ALPHABETICAL_INDEX is set to YES. + +COLS_IN_ALPHA_INDEX = 5 + +# In case all classes in a project start with a common prefix, all classes will +# be put under the same header in the alphabetical index. The IGNORE_PREFIX tag +# can be used to specify a prefix (or a list of prefixes) that should be ignored +# while generating the index headers. +# This tag requires that the tag ALPHABETICAL_INDEX is set to YES. + +IGNORE_PREFIX = + +#--------------------------------------------------------------------------- +# Configuration options related to the HTML output +#--------------------------------------------------------------------------- + +# If the GENERATE_HTML tag is set to YES, doxygen will generate HTML output +# The default value is: YES. + +GENERATE_HTML = YES + +# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. If a +# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of +# it. +# The default directory is: html. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_OUTPUT = html + +# The HTML_FILE_EXTENSION tag can be used to specify the file extension for each +# generated HTML page (for example: .htm, .php, .asp). +# The default value is: .html. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_FILE_EXTENSION = .html + +# The HTML_HEADER tag can be used to specify a user-defined HTML header file for +# each generated HTML page. If the tag is left blank doxygen will generate a +# standard header. +# +# To get valid HTML the header file that includes any scripts and style sheets +# that doxygen needs, which is dependent on the configuration options used (e.g. +# the setting GENERATE_TREEVIEW). It is highly recommended to start with a +# default header using +# doxygen -w html new_header.html new_footer.html new_stylesheet.css +# YourConfigFile +# and then modify the file new_header.html. See also section "Doxygen usage" +# for information on how to generate the default header that doxygen normally +# uses. +# Note: The header is subject to change so you typically have to regenerate the +# default header when upgrading to a newer version of doxygen. For a description +# of the possible markers and block names see the documentation. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_HEADER = + +# The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each +# generated HTML page. If the tag is left blank doxygen will generate a standard +# footer. See HTML_HEADER for more information on how to generate a default +# footer and what special commands can be used inside the footer. See also +# section "Doxygen usage" for information on how to generate the default footer +# that doxygen normally uses. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_FOOTER = + +# The HTML_STYLESHEET tag can be used to specify a user-defined cascading style +# sheet that is used by each HTML page. It can be used to fine-tune the look of +# the HTML output. If left blank doxygen will generate a default style sheet. +# See also section "Doxygen usage" for information on how to generate the style +# sheet that doxygen normally uses. +# Note: It is recommended to use HTML_EXTRA_STYLESHEET instead of this tag, as +# it is more robust and this tag (HTML_STYLESHEET) will in the future become +# obsolete. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_STYLESHEET = + +# The HTML_EXTRA_STYLESHEET tag can be used to specify additional user-defined +# cascading style sheets that are included after the standard style sheets +# created by doxygen. Using this option one can overrule certain style aspects. +# This is preferred over using HTML_STYLESHEET since it does not replace the +# standard style sheet and is therefore more robust against future updates. +# Doxygen will copy the style sheet files to the output directory. +# Note: The order of the extra style sheet files is of importance (e.g. the last +# style sheet in the list overrules the setting of the previous ones in the +# list). For an example see the documentation. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_EXTRA_STYLESHEET = + +# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or +# other source files which should be copied to the HTML output directory. Note +# that these files will be copied to the base HTML output directory. Use the +# $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these +# files. In the HTML_STYLESHEET file, use the file name only. Also note that the +# files will be copied as-is; there are no commands or markers available. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_EXTRA_FILES = @PROJECT_SOURCE_DIR@/Documentation/tnl-logo.jpg + +# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen +# will adjust the colors in the style sheet and background images according to +# this color. Hue is specified as an angle on a colorwheel, see +# http://en.wikipedia.org/wiki/Hue for more information. For instance the value +# 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300 +# purple, and 360 is red again. +# Minimum value: 0, maximum value: 359, default value: 220. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_COLORSTYLE_HUE = 220 + +# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors +# in the HTML output. For a value of 0 the output will use grayscales only. A +# value of 255 will produce the most vivid colors. +# Minimum value: 0, maximum value: 255, default value: 100. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_COLORSTYLE_SAT = 100 + +# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to the +# luminance component of the colors in the HTML output. Values below 100 +# gradually make the output lighter, whereas values above 100 make the output +# darker. The value divided by 100 is the actual gamma applied, so 80 represents +# a gamma of 0.8, The value 220 represents a gamma of 2.2, and 100 does not +# change the gamma. +# Minimum value: 40, maximum value: 240, default value: 80. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_COLORSTYLE_GAMMA = 80 + +# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML +# page will contain the date and time when the page was generated. Setting this +# to YES can help to show when doxygen was last run and thus if the +# documentation is up to date. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_TIMESTAMP = NO + +# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML +# documentation will contain sections that can be hidden and shown after the +# page has loaded. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_DYNAMIC_SECTIONS = NO + +# With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries +# shown in the various tree structured indices initially; the user can expand +# and collapse entries dynamically later on. Doxygen will expand the tree to +# such a level that at most the specified number of entries are visible (unless +# a fully collapsed tree already exceeds this amount). So setting the number of +# entries 1 will produce a full collapsed tree by default. 0 is a special value +# representing an infinite number of entries and will result in a full expanded +# tree by default. +# Minimum value: 0, maximum value: 9999, default value: 100. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_INDEX_NUM_ENTRIES = 100 + +# If the GENERATE_DOCSET tag is set to YES, additional index files will be +# generated that can be used as input for Apple's Xcode 3 integrated development +# environment (see: http://developer.apple.com/tools/xcode/), introduced with +# OSX 10.5 (Leopard). To create a documentation set, doxygen will generate a +# Makefile in the HTML output directory. Running make will produce the docset in +# that directory and running make install will install the docset in +# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at +# startup. See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html +# for more information. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_DOCSET = NO + +# This tag determines the name of the docset feed. A documentation feed provides +# an umbrella under which multiple documentation sets from a single provider +# (such as a company or product suite) can be grouped. +# The default value is: Doxygen generated docs. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_FEEDNAME = "Doxygen generated docs" + +# This tag specifies a string that should uniquely identify the documentation +# set bundle. This should be a reverse domain-name style string, e.g. +# com.mycompany.MyDocSet. Doxygen will append .docset to the name. +# The default value is: org.doxygen.Project. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_BUNDLE_ID = org.doxygen.Project + +# The DOCSET_PUBLISHER_ID tag specifies a string that should uniquely identify +# the documentation publisher. This should be a reverse domain-name style +# string, e.g. com.mycompany.MyDocSet.documentation. +# The default value is: org.doxygen.Publisher. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_PUBLISHER_ID = org.doxygen.Publisher + +# The DOCSET_PUBLISHER_NAME tag identifies the documentation publisher. +# The default value is: Publisher. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_PUBLISHER_NAME = Publisher + +# If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three +# additional HTML index files: index.hhp, index.hhc, and index.hhk. The +# index.hhp is a project file that can be read by Microsoft's HTML Help Workshop +# (see: http://www.microsoft.com/en-us/download/details.aspx?id=21138) on +# Windows. +# +# The HTML Help Workshop contains a compiler that can convert all HTML output +# generated by doxygen into a single compiled HTML file (.chm). Compiled HTML +# files are now used as the Windows 98 help format, and will replace the old +# Windows help format (.hlp) on all Windows platforms in the future. Compressed +# HTML files also contain an index, a table of contents, and you can search for +# words in the documentation. The HTML workshop also contains a viewer for +# compressed HTML files. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_HTMLHELP = NO + +# The CHM_FILE tag can be used to specify the file name of the resulting .chm +# file. You can add a path in front of the file if the result should not be +# written to the html output directory. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +CHM_FILE = + +# The HHC_LOCATION tag can be used to specify the location (absolute path +# including file name) of the HTML help compiler (hhc.exe). If non-empty, +# doxygen will try to run the HTML help compiler on the generated index.hhp. +# The file has to be specified with full path. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +HHC_LOCATION = + +# The GENERATE_CHI flag controls if a separate .chi index file is generated +# (YES) or that it should be included in the master .chm file (NO). +# The default value is: NO. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +GENERATE_CHI = NO + +# The CHM_INDEX_ENCODING is used to encode HtmlHelp index (hhk), content (hhc) +# and project file content. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +CHM_INDEX_ENCODING = + +# The BINARY_TOC flag controls whether a binary table of contents is generated +# (YES) or a normal table of contents (NO) in the .chm file. Furthermore it +# enables the Previous and Next buttons. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +BINARY_TOC = NO + +# The TOC_EXPAND flag can be set to YES to add extra items for group members to +# the table of contents of the HTML help documentation and to the tree view. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +TOC_EXPAND = NO + +# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and +# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that +# can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help +# (.qch) of the generated HTML documentation. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_QHP = NO + +# If the QHG_LOCATION tag is specified, the QCH_FILE tag can be used to specify +# the file name of the resulting .qch file. The path specified is relative to +# the HTML output folder. +# This tag requires that the tag GENERATE_QHP is set to YES. + +QCH_FILE = + +# The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help +# Project output. For more information please see Qt Help Project / Namespace +# (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#namespace). +# The default value is: org.doxygen.Project. +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_NAMESPACE = org.doxygen.Project + +# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt +# Help Project output. For more information please see Qt Help Project / Virtual +# Folders (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#virtual- +# folders). +# The default value is: doc. +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_VIRTUAL_FOLDER = doc + +# If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom +# filter to add. For more information please see Qt Help Project / Custom +# Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom- +# filters). +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_CUST_FILTER_NAME = + +# The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the +# custom filter to add. For more information please see Qt Help Project / Custom +# Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom- +# filters). +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_CUST_FILTER_ATTRS = + +# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this +# project's filter section matches. Qt Help Project / Filter Attributes (see: +# http://qt-project.org/doc/qt-4.8/qthelpproject.html#filter-attributes). +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_SECT_FILTER_ATTRS = + +# The QHG_LOCATION tag can be used to specify the location of Qt's +# qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the +# generated .qhp file. +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHG_LOCATION = + +# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files will be +# generated, together with the HTML files, they form an Eclipse help plugin. To +# install this plugin and make it available under the help contents menu in +# Eclipse, the contents of the directory containing the HTML and XML files needs +# to be copied into the plugins directory of eclipse. The name of the directory +# within the plugins directory should be the same as the ECLIPSE_DOC_ID value. +# After copying Eclipse needs to be restarted before the help appears. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_ECLIPSEHELP = NO + +# A unique identifier for the Eclipse help plugin. When installing the plugin +# the directory name containing the HTML and XML files should also have this +# name. Each documentation set should have its own identifier. +# The default value is: org.doxygen.Project. +# This tag requires that the tag GENERATE_ECLIPSEHELP is set to YES. + +ECLIPSE_DOC_ID = org.doxygen.Project + +# If you want full control over the layout of the generated HTML pages it might +# be necessary to disable the index and replace it with your own. The +# DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) at top +# of each HTML page. A value of NO enables the index and the value YES disables +# it. Since the tabs in the index contain the same information as the navigation +# tree, you can set this option to YES if you also set GENERATE_TREEVIEW to YES. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +DISABLE_INDEX = NO + +# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index +# structure should be generated to display hierarchical information. If the tag +# value is set to YES, a side panel will be generated containing a tree-like +# index structure (just like the one that is generated for HTML Help). For this +# to work a browser that supports JavaScript, DHTML, CSS and frames is required +# (i.e. any modern browser). Windows users are probably better off using the +# HTML help feature. Via custom style sheets (see HTML_EXTRA_STYLESHEET) one can +# further fine-tune the look of the index. As an example, the default style +# sheet generated by doxygen has an example that shows how to put an image at +# the root of the tree instead of the PROJECT_NAME. Since the tree basically has +# the same information as the tab index, you could consider setting +# DISABLE_INDEX to YES when enabling this option. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_TREEVIEW = NO + +# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that +# doxygen will group on one line in the generated HTML documentation. +# +# Note that a value of 0 will completely suppress the enum values from appearing +# in the overview section. +# Minimum value: 0, maximum value: 20, default value: 4. +# This tag requires that the tag GENERATE_HTML is set to YES. + +ENUM_VALUES_PER_LINE = 4 + +# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be used +# to set the initial width (in pixels) of the frame in which the tree is shown. +# Minimum value: 0, maximum value: 1500, default value: 250. +# This tag requires that the tag GENERATE_HTML is set to YES. + +TREEVIEW_WIDTH = 250 + +# If the EXT_LINKS_IN_WINDOW option is set to YES, doxygen will open links to +# external symbols imported via tag files in a separate window. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +EXT_LINKS_IN_WINDOW = NO + +# Use this tag to change the font size of LaTeX formulas included as images in +# the HTML documentation. When you change the font size after a successful +# doxygen run you need to manually remove any form_*.png images from the HTML +# output directory to force them to be regenerated. +# Minimum value: 8, maximum value: 50, default value: 10. +# This tag requires that the tag GENERATE_HTML is set to YES. + +FORMULA_FONTSIZE = 10 + +# Use the FORMULA_TRANPARENT tag to determine whether or not the images +# generated for formulas are transparent PNGs. Transparent PNGs are not +# supported properly for IE 6.0, but are supported on all modern browsers. +# +# Note that when changing this option you need to delete any form_*.png files in +# the HTML output directory before the changes have effect. +# The default value is: YES. +# This tag requires that the tag GENERATE_HTML is set to YES. + +FORMULA_TRANSPARENT = YES + +# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see +# http://www.mathjax.org) which uses client side Javascript for the rendering +# instead of using pre-rendered bitmaps. Use this if you do not have LaTeX +# installed or if you want to formulas look prettier in the HTML output. When +# enabled you may also need to install MathJax separately and configure the path +# to it using the MATHJAX_RELPATH option. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +USE_MATHJAX = NO + +# When MathJax is enabled you can set the default output format to be used for +# the MathJax output. See the MathJax site (see: +# http://docs.mathjax.org/en/latest/output.html) for more details. +# Possible values are: HTML-CSS (which is slower, but has the best +# compatibility), NativeMML (i.e. MathML) and SVG. +# The default value is: HTML-CSS. +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_FORMAT = HTML-CSS + +# When MathJax is enabled you need to specify the location relative to the HTML +# output directory using the MATHJAX_RELPATH option. The destination directory +# should contain the MathJax.js script. For instance, if the mathjax directory +# is located at the same level as the HTML output directory, then +# MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax +# Content Delivery Network so you can quickly see the result without installing +# MathJax. However, it is strongly recommended to install a local copy of +# MathJax from http://www.mathjax.org before deployment. +# The default value is: http://cdn.mathjax.org/mathjax/latest. +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_RELPATH = http://cdn.mathjax.org/mathjax/latest + +# The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax +# extension names that should be enabled during MathJax rendering. For example +# MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_EXTENSIONS = + +# The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces +# of code that will be used on startup of the MathJax code. See the MathJax site +# (see: http://docs.mathjax.org/en/latest/output.html) for more details. For an +# example see the documentation. +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_CODEFILE = + +# When the SEARCHENGINE tag is enabled doxygen will generate a search box for +# the HTML output. The underlying search engine uses javascript and DHTML and +# should work on any modern browser. Note that when using HTML help +# (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET) +# there is already a search function so this one should typically be disabled. +# For large projects the javascript based search engine can be slow, then +# enabling SERVER_BASED_SEARCH may provide a better solution. It is possible to +# search using the keyboard; to jump to the search box use + S +# (what the is depends on the OS and browser, but it is typically +# , /