Loading src/core/cuda/CMakeLists.txt +2 −0 Original line number Diff line number Diff line Loading @@ -20,6 +20,7 @@ IF( BUILD_CUDA ) ${CURRENT_DIR}/cuda-reduction-abs-max_impl.cu ${CURRENT_DIR}/cuda-reduction-and_impl.cu ${CURRENT_DIR}/cuda-reduction-or_impl.cu ${CURRENT_DIR}/cuda-reduction-l2-norm_impl.cu ${CURRENT_DIR}/cuda-reduction-lp-norm_impl.cu ${CURRENT_DIR}/cuda-reduction-equalities_impl.cu ${CURRENT_DIR}/cuda-reduction-inequalities_impl.cu Loading @@ -30,6 +31,7 @@ IF( BUILD_CUDA ) ${CURRENT_DIR}/cuda-reduction-diff-abs-sum_impl.cu ${CURRENT_DIR}/cuda-reduction-diff-abs-min_impl.cu ${CURRENT_DIR}/cuda-reduction-diff-abs-max_impl.cu ${CURRENT_DIR}/cuda-reduction-diff-l2-norm_impl.cu ${CURRENT_DIR}/cuda-reduction-diff-lp-norm_impl.cu ${CURRENT_DIR}/cuda-prefix-sum_impl.cu PARENT_SCOPE ) Loading src/core/cuda/cuda-reduction-diff-l2-norm_impl.cu 0 → 100644 +87 −0 Original line number Diff line number Diff line /*************************************************************************** cuda-reduction-diff-lp-norm_impl.cu - description ------------------- begin : Jan 19, 2014 copyright : (C) 2014 by Tomas Oberhuber email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ /*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * ***************************************************************************/ #include <core/cuda/reduction-operations.h> #include <core/cuda/cuda-reduction.h> #ifdef TEMPLATE_EXPLICIT_INSTANTIATION /**** * Diff L2 Norm */ template bool reductionOnCudaDevice< tnlParallelReductionDiffL2Norm< float, int > > ( const tnlParallelReductionDiffL2Norm< float, int >& operation, const typename tnlParallelReductionDiffL2Norm< float, int > :: IndexType size, const typename tnlParallelReductionDiffL2Norm< float, int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffL2Norm< float, int > :: RealType* deviceInput2, typename tnlParallelReductionDiffL2Norm< float, int> :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionDiffL2Norm< double, int > > ( const tnlParallelReductionDiffL2Norm< double, int>& operation, const typename tnlParallelReductionDiffL2Norm< double, int > :: IndexType size, const typename tnlParallelReductionDiffL2Norm< double, int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffL2Norm< double, int > :: RealType* deviceInput2, typename tnlParallelReductionDiffL2Norm< double, int> :: ResultType& result ); #ifdef INSTANTIATE_LONG_DOUBLE template bool reductionOnCudaDevice< tnlParallelReductionDiffL2Norm< long double, int > > ( const tnlParallelReductionDiffL2Norm< long double, int>& operation, const typename tnlParallelReductionDiffL2Norm< long double, int > :: IndexType size, const typename tnlParallelReductionDiffL2Norm< long double, int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffL2Norm< long double, int > :: RealType* deviceInput2, typename tnlParallelReductionDiffL2Norm< long double, int> :: ResultType& result ); #endif #ifdef INSTANTIATE_LONG_INT template bool reductionOnCudaDevice< tnlParallelReductionDiffL2Norm< char, long int > > ( const tnlParallelReductionDiffL2Norm< char, long int >& operation, const typename tnlParallelReductionDiffL2Norm< char, long int > :: IndexType size, const typename tnlParallelReductionDiffL2Norm< char, long int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffL2Norm< char, long int > :: RealType* deviceInput2, typename tnlParallelReductionDiffL2Norm< char, long int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionDiffL2Norm< int, long int > > ( const tnlParallelReductionDiffL2Norm< int, long int >& operation, const typename tnlParallelReductionDiffL2Norm< int, long int > :: IndexType size, const typename tnlParallelReductionDiffL2Norm< int, long int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffL2Norm< int, long int > :: RealType* deviceInput2, typename tnlParallelReductionDiffL2Norm< int, long int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionDiffL2Norm< float, long int > > ( const tnlParallelReductionDiffL2Norm< float, long int >& operation, const typename tnlParallelReductionDiffL2Norm< float, long int > :: IndexType size, const typename tnlParallelReductionDiffL2Norm< float, long int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffL2Norm< float, long int > :: RealType* deviceInput2, typename tnlParallelReductionDiffL2Norm< float, long int> :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionDiffL2Norm< double, long int > > ( const tnlParallelReductionDiffL2Norm< double, long int>& operation, const typename tnlParallelReductionDiffL2Norm< double, long int > :: IndexType size, const typename tnlParallelReductionDiffL2Norm< double, long int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffL2Norm< double, long int > :: RealType* deviceInput2, typename tnlParallelReductionDiffL2Norm< double, long int> :: ResultType& result ); #ifdef INSTANTIATE_LONG_DOUBLE template bool reductionOnCudaDevice< tnlParallelReductionDiffL2Norm< long double, long int > > ( const tnlParallelReductionDiffL2Norm< long double, long int>& operation, const typename tnlParallelReductionDiffL2Norm< long double, long int > :: IndexType size, const typename tnlParallelReductionDiffL2Norm< long double, long int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffL2Norm< long double, long int > :: RealType* deviceInput2, typename tnlParallelReductionDiffL2Norm< long double, long int> :: ResultType& result ); #endif #endif #endif src/core/cuda/cuda-reduction-l2-norm_impl.cu 0 → 100644 +80 −0 Original line number Diff line number Diff line /*************************************************************************** cuda-reduction-lp-norm_impl.cu - description ------------------- begin : Jan 19, 2014 copyright : (C) 2014 by Tomas Oberhuber email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ /*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * ***************************************************************************/ #include <core/cuda/reduction-operations.h> #include <core/cuda/cuda-reduction.h> #ifdef TEMPLATE_EXPLICIT_INSTANTIATION /**** * L2 Norm */ template bool reductionOnCudaDevice< tnlParallelReductionL2Norm< float, int > > ( const tnlParallelReductionL2Norm< float, int >& operation, const typename tnlParallelReductionL2Norm< float, int > :: IndexType size, const typename tnlParallelReductionL2Norm< float, int > :: RealType* deviceInput1, const typename tnlParallelReductionL2Norm< float, int > :: RealType* deviceInput2, typename tnlParallelReductionL2Norm< float, int> :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionL2Norm< double, int > > ( const tnlParallelReductionL2Norm< double, int>& operation, const typename tnlParallelReductionL2Norm< double, int > :: IndexType size, const typename tnlParallelReductionL2Norm< double, int > :: RealType* deviceInput1, const typename tnlParallelReductionL2Norm< double, int > :: RealType* deviceInput2, typename tnlParallelReductionL2Norm< double, int> :: ResultType& result ); #ifdef INSTANTIATE_LONG_DOUBLE template bool reductionOnCudaDevice< tnlParallelReductionL2Norm< long double, int > > ( const tnlParallelReductionL2Norm< long double, int>& operation, const typename tnlParallelReductionL2Norm< long double, int > :: IndexType size, const typename tnlParallelReductionL2Norm< long double, int > :: RealType* deviceInput1, const typename tnlParallelReductionL2Norm< long double, int > :: RealType* deviceInput2, typename tnlParallelReductionL2Norm< long double, int> :: ResultType& result ); #endif #ifdef INSTANTIATE_LONG_INT template bool reductionOnCudaDevice< tnlParallelReductionL2Norm< int, long int > > ( const tnlParallelReductionL2Norm< int, long int >& operation, const typename tnlParallelReductionL2Norm< int, long int > :: IndexType size, const typename tnlParallelReductionL2Norm< int, long int > :: RealType* deviceInput1, const typename tnlParallelReductionL2Norm< int, long int > :: RealType* deviceInput2, typename tnlParallelReductionL2Norm< int, long int> :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionL2Norm< float, long int > > ( const tnlParallelReductionL2Norm< float, long int >& operation, const typename tnlParallelReductionL2Norm< float, long int > :: IndexType size, const typename tnlParallelReductionL2Norm< float, long int > :: RealType* deviceInput1, const typename tnlParallelReductionL2Norm< float, long int > :: RealType* deviceInput2, typename tnlParallelReductionL2Norm< float, long int> :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionL2Norm< double, long int > > ( const tnlParallelReductionL2Norm< double, long int>& operation, const typename tnlParallelReductionL2Norm< double, long int > :: IndexType size, const typename tnlParallelReductionL2Norm< double, long int > :: RealType* deviceInput1, const typename tnlParallelReductionL2Norm< double, long int > :: RealType* deviceInput2, typename tnlParallelReductionL2Norm< double, long int> :: ResultType& result ); #ifdef INSTANTIATE_LONG_DOUBLE template bool reductionOnCudaDevice< tnlParallelReductionL2Norm< long double, long int > > ( const tnlParallelReductionL2Norm< long double, long int>& operation, const typename tnlParallelReductionL2Norm< long double, long int > :: IndexType size, const typename tnlParallelReductionL2Norm< long double, long int > :: RealType* deviceInput1, const typename tnlParallelReductionL2Norm< long double, long int > :: RealType* deviceInput2, typename tnlParallelReductionL2Norm< long double, long int> :: ResultType& result ); #endif #endif #endif No newline at end of file src/core/cuda/reduction-operations.h +60 −0 Original line number Diff line number Diff line Loading @@ -501,6 +501,35 @@ class tnlParallelReductionAbsMax : public tnlParallelReductionMax< Real, Index > } }; template< typename Real, typename Index > class tnlParallelReductionL2Norm : public tnlParallelReductionSum< Real, Index > { public: typedef Real RealType; typedef Index IndexType; typedef Real ResultType; typedef tnlParallelReductionSum< Real, Index > LaterReductionOperation; ResultType reduceOnHost( const IndexType idx, const ResultType& current, const RealType* data1, const RealType* data2 ) const { return current + data1[ idx ] * data1[ idx ]; }; __cuda_callable__ ResultType initialValue() const { return ( ResultType ) 0; }; __cuda_callable__ void cudaFirstReduction( ResultType& result, const IndexType index, const RealType* data1, const RealType* data2 ) const { result += data1[ index ] * data1[ index ]; } }; template< typename Real, typename Index > class tnlParallelReductionLpNorm : public tnlParallelReductionSum< Real, Index > Loading Loading @@ -801,6 +830,37 @@ class tnlParallelReductionDiffAbsMax : public tnlParallelReductionMax< Real, Ind } }; template< typename Real, typename Index > class tnlParallelReductionDiffL2Norm : public tnlParallelReductionSum< Real, Index > { public: typedef Real RealType; typedef Index IndexType; typedef Real ResultType; typedef tnlParallelReductionSum< Real, Index > LaterReductionOperation; ResultType reduceOnHost( const IndexType idx, const ResultType& current, const RealType* data1, const RealType* data2 ) const { const RealType aux( data2[ idx ] - data1[ idx ] ); return current + aux * aux; }; __cuda_callable__ ResultType initialValue() const { return ( ResultType ) 0; }; __cuda_callable__ void cudaFirstReduction( ResultType& result, const IndexType index, const RealType* data1, const RealType* data2 ) const { const RealType aux( data2[ index ] - data1[ index ] ); result += aux * aux; } }; template< typename Real, typename Index > class tnlParallelReductionDiffLpNorm : public tnlParallelReductionSum< Real, Index > { Loading src/core/vectors/tnlVectorOperations.h +34 −3 Original line number Diff line number Diff line Loading @@ -54,6 +54,12 @@ class tnlVectorOperations< tnlHost > template< typename Vector > static typename Vector::RealType getVectorAbsMin( const Vector& v ); template< typename Vector > static typename Vector::RealType getVectorL1Norm( const Vector& v ); template< typename Vector > static typename Vector::RealType getVectorL2Norm( const Vector& v ); template< typename Vector > static typename Vector::RealType getVectorLpNorm( const Vector& v, const typename Vector::RealType& p ); Loading @@ -77,6 +83,14 @@ class tnlVectorOperations< tnlHost > static typename Vector1::RealType getVectorDifferenceAbsMin( const Vector1& v1, const Vector2& v2 ); template< typename Vector1, typename Vector2 > static typename Vector1::RealType getVectorDifferenceL1Norm( const Vector1& v1, const Vector2& v2 ); template< typename Vector1, typename Vector2 > static typename Vector1::RealType getVectorDifferenceL2Norm( const Vector1& v1, const Vector2& v2 ); template< typename Vector1, typename Vector2 > static typename Vector1::RealType getVectorDifferenceLpNorm( const Vector1& v1, const Vector2& v2, Loading @@ -85,6 +99,8 @@ class tnlVectorOperations< tnlHost > template< typename Vector1, typename Vector2 > static typename Vector1::RealType getVectorDifferenceSum( const Vector1& v1, const Vector2& v2 ); template< typename Vector > static void vectorScalarMultiplication( Vector& v, const typename Vector::RealType& alpha ); Loading Loading @@ -147,6 +163,12 @@ class tnlVectorOperations< tnlCuda > template< typename Vector > static typename Vector::RealType getVectorAbsMin( const Vector& v ); template< typename Vector > static typename Vector::RealType getVectorL1Norm( const Vector& v ); template< typename Vector > static typename Vector::RealType getVectorL2Norm( const Vector& v ); template< typename Vector > static typename Vector::RealType getVectorLpNorm( const Vector& v, const typename Vector::RealType& p ); Loading @@ -170,6 +192,14 @@ class tnlVectorOperations< tnlCuda > static typename Vector1::RealType getVectorDifferenceAbsMin( const Vector1& v1, const Vector2& v2 ); template< typename Vector1, typename Vector2 > static typename Vector1::RealType getVectorDifferenceL1Norm( const Vector1& v1, const Vector2& v2 ); template< typename Vector1, typename Vector2 > static typename Vector1::RealType getVectorDifferenceL2Norm( const Vector1& v1, const Vector2& v2 ); template< typename Vector1, typename Vector2 > static typename Vector1::RealType getVectorDifferenceLpNorm( const Vector1& v1, const Vector2& v2, Loading @@ -178,6 +208,7 @@ class tnlVectorOperations< tnlCuda > template< typename Vector1, typename Vector2 > static typename Vector1::RealType getVectorDifferenceSum( const Vector1& v1, const Vector2& v2 ); template< typename Vector > static void vectorScalarMultiplication( Vector& v, const typename Vector::RealType& alpha ); Loading Loading
src/core/cuda/CMakeLists.txt +2 −0 Original line number Diff line number Diff line Loading @@ -20,6 +20,7 @@ IF( BUILD_CUDA ) ${CURRENT_DIR}/cuda-reduction-abs-max_impl.cu ${CURRENT_DIR}/cuda-reduction-and_impl.cu ${CURRENT_DIR}/cuda-reduction-or_impl.cu ${CURRENT_DIR}/cuda-reduction-l2-norm_impl.cu ${CURRENT_DIR}/cuda-reduction-lp-norm_impl.cu ${CURRENT_DIR}/cuda-reduction-equalities_impl.cu ${CURRENT_DIR}/cuda-reduction-inequalities_impl.cu Loading @@ -30,6 +31,7 @@ IF( BUILD_CUDA ) ${CURRENT_DIR}/cuda-reduction-diff-abs-sum_impl.cu ${CURRENT_DIR}/cuda-reduction-diff-abs-min_impl.cu ${CURRENT_DIR}/cuda-reduction-diff-abs-max_impl.cu ${CURRENT_DIR}/cuda-reduction-diff-l2-norm_impl.cu ${CURRENT_DIR}/cuda-reduction-diff-lp-norm_impl.cu ${CURRENT_DIR}/cuda-prefix-sum_impl.cu PARENT_SCOPE ) Loading
src/core/cuda/cuda-reduction-diff-l2-norm_impl.cu 0 → 100644 +87 −0 Original line number Diff line number Diff line /*************************************************************************** cuda-reduction-diff-lp-norm_impl.cu - description ------------------- begin : Jan 19, 2014 copyright : (C) 2014 by Tomas Oberhuber email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ /*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * ***************************************************************************/ #include <core/cuda/reduction-operations.h> #include <core/cuda/cuda-reduction.h> #ifdef TEMPLATE_EXPLICIT_INSTANTIATION /**** * Diff L2 Norm */ template bool reductionOnCudaDevice< tnlParallelReductionDiffL2Norm< float, int > > ( const tnlParallelReductionDiffL2Norm< float, int >& operation, const typename tnlParallelReductionDiffL2Norm< float, int > :: IndexType size, const typename tnlParallelReductionDiffL2Norm< float, int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffL2Norm< float, int > :: RealType* deviceInput2, typename tnlParallelReductionDiffL2Norm< float, int> :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionDiffL2Norm< double, int > > ( const tnlParallelReductionDiffL2Norm< double, int>& operation, const typename tnlParallelReductionDiffL2Norm< double, int > :: IndexType size, const typename tnlParallelReductionDiffL2Norm< double, int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffL2Norm< double, int > :: RealType* deviceInput2, typename tnlParallelReductionDiffL2Norm< double, int> :: ResultType& result ); #ifdef INSTANTIATE_LONG_DOUBLE template bool reductionOnCudaDevice< tnlParallelReductionDiffL2Norm< long double, int > > ( const tnlParallelReductionDiffL2Norm< long double, int>& operation, const typename tnlParallelReductionDiffL2Norm< long double, int > :: IndexType size, const typename tnlParallelReductionDiffL2Norm< long double, int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffL2Norm< long double, int > :: RealType* deviceInput2, typename tnlParallelReductionDiffL2Norm< long double, int> :: ResultType& result ); #endif #ifdef INSTANTIATE_LONG_INT template bool reductionOnCudaDevice< tnlParallelReductionDiffL2Norm< char, long int > > ( const tnlParallelReductionDiffL2Norm< char, long int >& operation, const typename tnlParallelReductionDiffL2Norm< char, long int > :: IndexType size, const typename tnlParallelReductionDiffL2Norm< char, long int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffL2Norm< char, long int > :: RealType* deviceInput2, typename tnlParallelReductionDiffL2Norm< char, long int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionDiffL2Norm< int, long int > > ( const tnlParallelReductionDiffL2Norm< int, long int >& operation, const typename tnlParallelReductionDiffL2Norm< int, long int > :: IndexType size, const typename tnlParallelReductionDiffL2Norm< int, long int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffL2Norm< int, long int > :: RealType* deviceInput2, typename tnlParallelReductionDiffL2Norm< int, long int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionDiffL2Norm< float, long int > > ( const tnlParallelReductionDiffL2Norm< float, long int >& operation, const typename tnlParallelReductionDiffL2Norm< float, long int > :: IndexType size, const typename tnlParallelReductionDiffL2Norm< float, long int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffL2Norm< float, long int > :: RealType* deviceInput2, typename tnlParallelReductionDiffL2Norm< float, long int> :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionDiffL2Norm< double, long int > > ( const tnlParallelReductionDiffL2Norm< double, long int>& operation, const typename tnlParallelReductionDiffL2Norm< double, long int > :: IndexType size, const typename tnlParallelReductionDiffL2Norm< double, long int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffL2Norm< double, long int > :: RealType* deviceInput2, typename tnlParallelReductionDiffL2Norm< double, long int> :: ResultType& result ); #ifdef INSTANTIATE_LONG_DOUBLE template bool reductionOnCudaDevice< tnlParallelReductionDiffL2Norm< long double, long int > > ( const tnlParallelReductionDiffL2Norm< long double, long int>& operation, const typename tnlParallelReductionDiffL2Norm< long double, long int > :: IndexType size, const typename tnlParallelReductionDiffL2Norm< long double, long int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffL2Norm< long double, long int > :: RealType* deviceInput2, typename tnlParallelReductionDiffL2Norm< long double, long int> :: ResultType& result ); #endif #endif #endif
src/core/cuda/cuda-reduction-l2-norm_impl.cu 0 → 100644 +80 −0 Original line number Diff line number Diff line /*************************************************************************** cuda-reduction-lp-norm_impl.cu - description ------------------- begin : Jan 19, 2014 copyright : (C) 2014 by Tomas Oberhuber email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ /*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * ***************************************************************************/ #include <core/cuda/reduction-operations.h> #include <core/cuda/cuda-reduction.h> #ifdef TEMPLATE_EXPLICIT_INSTANTIATION /**** * L2 Norm */ template bool reductionOnCudaDevice< tnlParallelReductionL2Norm< float, int > > ( const tnlParallelReductionL2Norm< float, int >& operation, const typename tnlParallelReductionL2Norm< float, int > :: IndexType size, const typename tnlParallelReductionL2Norm< float, int > :: RealType* deviceInput1, const typename tnlParallelReductionL2Norm< float, int > :: RealType* deviceInput2, typename tnlParallelReductionL2Norm< float, int> :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionL2Norm< double, int > > ( const tnlParallelReductionL2Norm< double, int>& operation, const typename tnlParallelReductionL2Norm< double, int > :: IndexType size, const typename tnlParallelReductionL2Norm< double, int > :: RealType* deviceInput1, const typename tnlParallelReductionL2Norm< double, int > :: RealType* deviceInput2, typename tnlParallelReductionL2Norm< double, int> :: ResultType& result ); #ifdef INSTANTIATE_LONG_DOUBLE template bool reductionOnCudaDevice< tnlParallelReductionL2Norm< long double, int > > ( const tnlParallelReductionL2Norm< long double, int>& operation, const typename tnlParallelReductionL2Norm< long double, int > :: IndexType size, const typename tnlParallelReductionL2Norm< long double, int > :: RealType* deviceInput1, const typename tnlParallelReductionL2Norm< long double, int > :: RealType* deviceInput2, typename tnlParallelReductionL2Norm< long double, int> :: ResultType& result ); #endif #ifdef INSTANTIATE_LONG_INT template bool reductionOnCudaDevice< tnlParallelReductionL2Norm< int, long int > > ( const tnlParallelReductionL2Norm< int, long int >& operation, const typename tnlParallelReductionL2Norm< int, long int > :: IndexType size, const typename tnlParallelReductionL2Norm< int, long int > :: RealType* deviceInput1, const typename tnlParallelReductionL2Norm< int, long int > :: RealType* deviceInput2, typename tnlParallelReductionL2Norm< int, long int> :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionL2Norm< float, long int > > ( const tnlParallelReductionL2Norm< float, long int >& operation, const typename tnlParallelReductionL2Norm< float, long int > :: IndexType size, const typename tnlParallelReductionL2Norm< float, long int > :: RealType* deviceInput1, const typename tnlParallelReductionL2Norm< float, long int > :: RealType* deviceInput2, typename tnlParallelReductionL2Norm< float, long int> :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionL2Norm< double, long int > > ( const tnlParallelReductionL2Norm< double, long int>& operation, const typename tnlParallelReductionL2Norm< double, long int > :: IndexType size, const typename tnlParallelReductionL2Norm< double, long int > :: RealType* deviceInput1, const typename tnlParallelReductionL2Norm< double, long int > :: RealType* deviceInput2, typename tnlParallelReductionL2Norm< double, long int> :: ResultType& result ); #ifdef INSTANTIATE_LONG_DOUBLE template bool reductionOnCudaDevice< tnlParallelReductionL2Norm< long double, long int > > ( const tnlParallelReductionL2Norm< long double, long int>& operation, const typename tnlParallelReductionL2Norm< long double, long int > :: IndexType size, const typename tnlParallelReductionL2Norm< long double, long int > :: RealType* deviceInput1, const typename tnlParallelReductionL2Norm< long double, long int > :: RealType* deviceInput2, typename tnlParallelReductionL2Norm< long double, long int> :: ResultType& result ); #endif #endif #endif No newline at end of file
src/core/cuda/reduction-operations.h +60 −0 Original line number Diff line number Diff line Loading @@ -501,6 +501,35 @@ class tnlParallelReductionAbsMax : public tnlParallelReductionMax< Real, Index > } }; template< typename Real, typename Index > class tnlParallelReductionL2Norm : public tnlParallelReductionSum< Real, Index > { public: typedef Real RealType; typedef Index IndexType; typedef Real ResultType; typedef tnlParallelReductionSum< Real, Index > LaterReductionOperation; ResultType reduceOnHost( const IndexType idx, const ResultType& current, const RealType* data1, const RealType* data2 ) const { return current + data1[ idx ] * data1[ idx ]; }; __cuda_callable__ ResultType initialValue() const { return ( ResultType ) 0; }; __cuda_callable__ void cudaFirstReduction( ResultType& result, const IndexType index, const RealType* data1, const RealType* data2 ) const { result += data1[ index ] * data1[ index ]; } }; template< typename Real, typename Index > class tnlParallelReductionLpNorm : public tnlParallelReductionSum< Real, Index > Loading Loading @@ -801,6 +830,37 @@ class tnlParallelReductionDiffAbsMax : public tnlParallelReductionMax< Real, Ind } }; template< typename Real, typename Index > class tnlParallelReductionDiffL2Norm : public tnlParallelReductionSum< Real, Index > { public: typedef Real RealType; typedef Index IndexType; typedef Real ResultType; typedef tnlParallelReductionSum< Real, Index > LaterReductionOperation; ResultType reduceOnHost( const IndexType idx, const ResultType& current, const RealType* data1, const RealType* data2 ) const { const RealType aux( data2[ idx ] - data1[ idx ] ); return current + aux * aux; }; __cuda_callable__ ResultType initialValue() const { return ( ResultType ) 0; }; __cuda_callable__ void cudaFirstReduction( ResultType& result, const IndexType index, const RealType* data1, const RealType* data2 ) const { const RealType aux( data2[ index ] - data1[ index ] ); result += aux * aux; } }; template< typename Real, typename Index > class tnlParallelReductionDiffLpNorm : public tnlParallelReductionSum< Real, Index > { Loading
src/core/vectors/tnlVectorOperations.h +34 −3 Original line number Diff line number Diff line Loading @@ -54,6 +54,12 @@ class tnlVectorOperations< tnlHost > template< typename Vector > static typename Vector::RealType getVectorAbsMin( const Vector& v ); template< typename Vector > static typename Vector::RealType getVectorL1Norm( const Vector& v ); template< typename Vector > static typename Vector::RealType getVectorL2Norm( const Vector& v ); template< typename Vector > static typename Vector::RealType getVectorLpNorm( const Vector& v, const typename Vector::RealType& p ); Loading @@ -77,6 +83,14 @@ class tnlVectorOperations< tnlHost > static typename Vector1::RealType getVectorDifferenceAbsMin( const Vector1& v1, const Vector2& v2 ); template< typename Vector1, typename Vector2 > static typename Vector1::RealType getVectorDifferenceL1Norm( const Vector1& v1, const Vector2& v2 ); template< typename Vector1, typename Vector2 > static typename Vector1::RealType getVectorDifferenceL2Norm( const Vector1& v1, const Vector2& v2 ); template< typename Vector1, typename Vector2 > static typename Vector1::RealType getVectorDifferenceLpNorm( const Vector1& v1, const Vector2& v2, Loading @@ -85,6 +99,8 @@ class tnlVectorOperations< tnlHost > template< typename Vector1, typename Vector2 > static typename Vector1::RealType getVectorDifferenceSum( const Vector1& v1, const Vector2& v2 ); template< typename Vector > static void vectorScalarMultiplication( Vector& v, const typename Vector::RealType& alpha ); Loading Loading @@ -147,6 +163,12 @@ class tnlVectorOperations< tnlCuda > template< typename Vector > static typename Vector::RealType getVectorAbsMin( const Vector& v ); template< typename Vector > static typename Vector::RealType getVectorL1Norm( const Vector& v ); template< typename Vector > static typename Vector::RealType getVectorL2Norm( const Vector& v ); template< typename Vector > static typename Vector::RealType getVectorLpNorm( const Vector& v, const typename Vector::RealType& p ); Loading @@ -170,6 +192,14 @@ class tnlVectorOperations< tnlCuda > static typename Vector1::RealType getVectorDifferenceAbsMin( const Vector1& v1, const Vector2& v2 ); template< typename Vector1, typename Vector2 > static typename Vector1::RealType getVectorDifferenceL1Norm( const Vector1& v1, const Vector2& v2 ); template< typename Vector1, typename Vector2 > static typename Vector1::RealType getVectorDifferenceL2Norm( const Vector1& v1, const Vector2& v2 ); template< typename Vector1, typename Vector2 > static typename Vector1::RealType getVectorDifferenceLpNorm( const Vector1& v1, const Vector2& v2, Loading @@ -178,6 +208,7 @@ class tnlVectorOperations< tnlCuda > template< typename Vector1, typename Vector2 > static typename Vector1::RealType getVectorDifferenceSum( const Vector1& v1, const Vector2& v2 ); template< typename Vector > static void vectorScalarMultiplication( Vector& v, const typename Vector::RealType& alpha ); Loading