Loading src/TNL/Algorithms/CudaReductionBuffer.h +4 −3 Original line number Diff line number Diff line Loading @@ -17,6 +17,7 @@ #include <TNL/Cuda/CheckDevice.h> #include <TNL/Exceptions/CudaBadAlloc.h> #include <TNL/Exceptions/CudaSupportMissing.h> #include <TNL/Cuda/DeclareIfHaveCuda.h> namespace TNL { namespace Algorithms { Loading @@ -26,11 +27,11 @@ class CudaReductionBuffer public: inline static CudaReductionBuffer& getInstance() { static CudaReductionBuffer instance; static CudaReductionBuffer instance(0); return instance; } inline void setSize( size_t size ) inline void setSize( size_t DECLARE_IF_HAVE_CUDA( size ) ) { #ifdef HAVE_CUDA if( size > this->size ) Loading Loading @@ -59,7 +60,7 @@ class CudaReductionBuffer CudaReductionBuffer& operator=( CudaReductionBuffer const& copy ); // Not Implemented // private constructor of the singleton inline CudaReductionBuffer( size_t size = 0 ) inline CudaReductionBuffer( size_t DECLARE_IF_HAVE_CUDA( size = 0 ) ) { #ifdef HAVE_CUDA setSize( size ); Loading src/TNL/Algorithms/StaticFor.h +2 −1 Original line number Diff line number Diff line Loading @@ -11,6 +11,7 @@ #pragma once #include <TNL/Cuda/CudaCallable.h> #include <utility> namespace TNL { namespace Algorithms { Loading Loading @@ -65,7 +66,7 @@ struct StaticFor< End, End, true > { template< typename Function, typename... Args > __cuda_callable__ static void exec( const Function& f, Args&&... args ) {} static void exec( const Function& /*f*/, Args&&... /*args*/ ) {} }; template< int Begin, int End > Loading src/TNL/Assert.h +8 −4 Original line number Diff line number Diff line Loading @@ -297,9 +297,9 @@ cmpHelperTrue( const char* assertion, const char* function, int line, const char* expr1, const char* expr2, const char* /*expr2*/, const T1& val1, const T2& val2 ) const T2& /*val2*/ ) { // explicit cast is necessary, because T1::operator! might not be defined if( ! (bool) val1 ) Loading @@ -316,9 +316,9 @@ cmpHelperFalse( const char* assertion, const char* function, int line, const char* expr1, const char* expr2, const char* /*expr2*/, const T1& val1, const T2& val2 ) const T2& /*val2*/ ) { if( val1 ) ::TNL::Assert::cmpHelperOpFailure( assertion, message, file, function, line, Loading Loading @@ -376,6 +376,10 @@ TNL_IMPL_CMP_HELPER_( GT, > ); #define __TNL_PRETTY_FUNCTION __PRETTY_FUNCTION__ #endif #ifndef __STRING #define __STRING(arg) #arg #endif // Internal macro to compose the string representing the assertion. // We can't do it easily at runtime, because we have to support assertions // in CUDA kernels, which can't use std::string objects. Instead, we do it Loading src/TNL/Containers/Expressions/ExpressionTemplates.h +10 −6 Original line number Diff line number Diff line Loading @@ -430,19 +430,21 @@ operator*( const BinaryExpressionTemplate< L1, L2, LOperation >& a, template< typename T1, typename T2, typename RealType, template< typename, typename > class Operation > auto operator*( const BinaryExpressionTemplate< T1, T2, Operation >& a, const typename BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) const /*typename BinaryExpressionTemplate< T1, T2, Operation >::*/RealType& b ) { return BinaryExpressionTemplate< std::decay_t<decltype(a)>, std::decay_t<decltype(b)>, Multiplication >( a, b ); } template< typename T1, template< typename RealType, typename T1, typename T2, template< typename, typename > class Operation > auto operator*( const typename BinaryExpressionTemplate< T1, T2, Operation >::RealType& a, operator*( const /*typename BinaryExpressionTemplate< T1, T2, Operation >::*/RealType& a, // why is there the real type, why is the body of all functions the same const BinaryExpressionTemplate< T1, T2, Operation >& b ) { return BinaryExpressionTemplate< std::decay_t<decltype(a)>, std::decay_t<decltype(b)>, Multiplication >( a, b ); Loading Loading @@ -518,19 +520,21 @@ operator/( const BinaryExpressionTemplate< L1, L2, LOperation >& a, template< typename T1, typename T2, typename RealType, template< typename, typename > class Operation > auto operator/( const BinaryExpressionTemplate< T1, T2, Operation >& a, const typename BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) const /*typename BinaryExpressionTemplate< T1, T2, Operation >::*/RealType& b ) { return BinaryExpressionTemplate< std::decay_t<decltype(a)>, std::decay_t<decltype(b)>, Division >( a, b ); } template< typename T1, template< typename RealType, typename T1, typename T2, template< typename, typename > class Operation > auto operator/( const typename BinaryExpressionTemplate< T1, T2, Operation >::RealType& a, operator/( const /*typename BinaryExpressionTemplate< T1, T2, Operation >::*/RealType& a, const BinaryExpressionTemplate< T1, T2, Operation >& b ) { return BinaryExpressionTemplate< std::decay_t<decltype(a)>, std::decay_t<decltype(b)>, Division >( a, b ); Loading src/TNL/Containers/Expressions/HorizontalOperations.h +20 −0 Original line number Diff line number Diff line Loading @@ -24,6 +24,26 @@ struct Addition { return a + b; } __cuda_callable__ static auto evaluate( const T1& a, T2&& b ) -> decltype( a + b ) { return a + std::forward<T2>(b); } __cuda_callable__ static auto evaluate( T1&& a, const T2& b ) -> decltype( a + b ) { return std::forward<T1>(a) + b; } __cuda_callable__ static auto evaluate( T1&& a, T2&& b ) -> decltype( a + b ) { return std::forward<T1>(a) + std::forward<T1>(b); } }; template< typename T1, typename T2 > Loading Loading
src/TNL/Algorithms/CudaReductionBuffer.h +4 −3 Original line number Diff line number Diff line Loading @@ -17,6 +17,7 @@ #include <TNL/Cuda/CheckDevice.h> #include <TNL/Exceptions/CudaBadAlloc.h> #include <TNL/Exceptions/CudaSupportMissing.h> #include <TNL/Cuda/DeclareIfHaveCuda.h> namespace TNL { namespace Algorithms { Loading @@ -26,11 +27,11 @@ class CudaReductionBuffer public: inline static CudaReductionBuffer& getInstance() { static CudaReductionBuffer instance; static CudaReductionBuffer instance(0); return instance; } inline void setSize( size_t size ) inline void setSize( size_t DECLARE_IF_HAVE_CUDA( size ) ) { #ifdef HAVE_CUDA if( size > this->size ) Loading Loading @@ -59,7 +60,7 @@ class CudaReductionBuffer CudaReductionBuffer& operator=( CudaReductionBuffer const& copy ); // Not Implemented // private constructor of the singleton inline CudaReductionBuffer( size_t size = 0 ) inline CudaReductionBuffer( size_t DECLARE_IF_HAVE_CUDA( size = 0 ) ) { #ifdef HAVE_CUDA setSize( size ); Loading
src/TNL/Algorithms/StaticFor.h +2 −1 Original line number Diff line number Diff line Loading @@ -11,6 +11,7 @@ #pragma once #include <TNL/Cuda/CudaCallable.h> #include <utility> namespace TNL { namespace Algorithms { Loading Loading @@ -65,7 +66,7 @@ struct StaticFor< End, End, true > { template< typename Function, typename... Args > __cuda_callable__ static void exec( const Function& f, Args&&... args ) {} static void exec( const Function& /*f*/, Args&&... /*args*/ ) {} }; template< int Begin, int End > Loading
src/TNL/Assert.h +8 −4 Original line number Diff line number Diff line Loading @@ -297,9 +297,9 @@ cmpHelperTrue( const char* assertion, const char* function, int line, const char* expr1, const char* expr2, const char* /*expr2*/, const T1& val1, const T2& val2 ) const T2& /*val2*/ ) { // explicit cast is necessary, because T1::operator! might not be defined if( ! (bool) val1 ) Loading @@ -316,9 +316,9 @@ cmpHelperFalse( const char* assertion, const char* function, int line, const char* expr1, const char* expr2, const char* /*expr2*/, const T1& val1, const T2& val2 ) const T2& /*val2*/ ) { if( val1 ) ::TNL::Assert::cmpHelperOpFailure( assertion, message, file, function, line, Loading Loading @@ -376,6 +376,10 @@ TNL_IMPL_CMP_HELPER_( GT, > ); #define __TNL_PRETTY_FUNCTION __PRETTY_FUNCTION__ #endif #ifndef __STRING #define __STRING(arg) #arg #endif // Internal macro to compose the string representing the assertion. // We can't do it easily at runtime, because we have to support assertions // in CUDA kernels, which can't use std::string objects. Instead, we do it Loading
src/TNL/Containers/Expressions/ExpressionTemplates.h +10 −6 Original line number Diff line number Diff line Loading @@ -430,19 +430,21 @@ operator*( const BinaryExpressionTemplate< L1, L2, LOperation >& a, template< typename T1, typename T2, typename RealType, template< typename, typename > class Operation > auto operator*( const BinaryExpressionTemplate< T1, T2, Operation >& a, const typename BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) const /*typename BinaryExpressionTemplate< T1, T2, Operation >::*/RealType& b ) { return BinaryExpressionTemplate< std::decay_t<decltype(a)>, std::decay_t<decltype(b)>, Multiplication >( a, b ); } template< typename T1, template< typename RealType, typename T1, typename T2, template< typename, typename > class Operation > auto operator*( const typename BinaryExpressionTemplate< T1, T2, Operation >::RealType& a, operator*( const /*typename BinaryExpressionTemplate< T1, T2, Operation >::*/RealType& a, // why is there the real type, why is the body of all functions the same const BinaryExpressionTemplate< T1, T2, Operation >& b ) { return BinaryExpressionTemplate< std::decay_t<decltype(a)>, std::decay_t<decltype(b)>, Multiplication >( a, b ); Loading Loading @@ -518,19 +520,21 @@ operator/( const BinaryExpressionTemplate< L1, L2, LOperation >& a, template< typename T1, typename T2, typename RealType, template< typename, typename > class Operation > auto operator/( const BinaryExpressionTemplate< T1, T2, Operation >& a, const typename BinaryExpressionTemplate< T1, T2, Operation >::RealType& b ) const /*typename BinaryExpressionTemplate< T1, T2, Operation >::*/RealType& b ) { return BinaryExpressionTemplate< std::decay_t<decltype(a)>, std::decay_t<decltype(b)>, Division >( a, b ); } template< typename T1, template< typename RealType, typename T1, typename T2, template< typename, typename > class Operation > auto operator/( const typename BinaryExpressionTemplate< T1, T2, Operation >::RealType& a, operator/( const /*typename BinaryExpressionTemplate< T1, T2, Operation >::*/RealType& a, const BinaryExpressionTemplate< T1, T2, Operation >& b ) { return BinaryExpressionTemplate< std::decay_t<decltype(a)>, std::decay_t<decltype(b)>, Division >( a, b ); Loading
src/TNL/Containers/Expressions/HorizontalOperations.h +20 −0 Original line number Diff line number Diff line Loading @@ -24,6 +24,26 @@ struct Addition { return a + b; } __cuda_callable__ static auto evaluate( const T1& a, T2&& b ) -> decltype( a + b ) { return a + std::forward<T2>(b); } __cuda_callable__ static auto evaluate( T1&& a, const T2& b ) -> decltype( a + b ) { return std::forward<T1>(a) + b; } __cuda_callable__ static auto evaluate( T1&& a, T2&& b ) -> decltype( a + b ) { return std::forward<T1>(a) + std::forward<T1>(b); } }; template< typename T1, typename T2 > Loading