Merge branch 'master' into mhfem (5e526c7f) · Commits · TNL / tnl-dev

src/core/cuda/CMakeLists.txt

+2 −0

Original line number	Diff line number	Diff line
		@@ -20,6 +20,7 @@ IF( BUILD_CUDA )
		${CURRENT_DIR}/cuda-reduction-abs-max_impl.cu
		${CURRENT_DIR}/cuda-reduction-and_impl.cu
		${CURRENT_DIR}/cuda-reduction-or_impl.cu
		${CURRENT_DIR}/cuda-reduction-l2-norm_impl.cu
		${CURRENT_DIR}/cuda-reduction-lp-norm_impl.cu
		${CURRENT_DIR}/cuda-reduction-equalities_impl.cu
		${CURRENT_DIR}/cuda-reduction-inequalities_impl.cu
		@@ -30,6 +31,7 @@ IF( BUILD_CUDA )
		${CURRENT_DIR}/cuda-reduction-diff-abs-sum_impl.cu
		${CURRENT_DIR}/cuda-reduction-diff-abs-min_impl.cu
		${CURRENT_DIR}/cuda-reduction-diff-abs-max_impl.cu
		${CURRENT_DIR}/cuda-reduction-diff-l2-norm_impl.cu
		${CURRENT_DIR}/cuda-reduction-diff-lp-norm_impl.cu
		${CURRENT_DIR}/cuda-prefix-sum_impl.cu
		PARENT_SCOPE )

src/core/cuda/cuda-reduction-diff-l2-norm_impl.cu

0 → 100644

+87 −0

Original line number	Diff line number	Diff line
		/***************************************************************************
		cuda-reduction-diff-lp-norm_impl.cu - description
		-------------------
		begin : Jan 19, 2014
		copyright : (C) 2014 by Tomas Oberhuber
		email : tomas.oberhuber@fjfi.cvut.cz
		***************************************************************************/

		/***************************************************************************
		* *
		* This program is free software; you can redistribute it and/or modify *
		* it under the terms of the GNU General Public License as published by *
		* the Free Software Foundation; either version 2 of the License, or *
		* (at your option) any later version. *
		* *
		***************************************************************************/

		#include <core/cuda/reduction-operations.h>
		#include <core/cuda/cuda-reduction.h>

		#ifdef TEMPLATE_EXPLICIT_INSTANTIATION

		/****
		* Diff L2 Norm
		*/
		template bool reductionOnCudaDevice< tnlParallelReductionDiffL2Norm< float, int > >
		( const tnlParallelReductionDiffL2Norm< float, int >& operation,
		const typename tnlParallelReductionDiffL2Norm< float, int > :: IndexType size,
		const typename tnlParallelReductionDiffL2Norm< float, int > :: RealType* deviceInput1,
		const typename tnlParallelReductionDiffL2Norm< float, int > :: RealType* deviceInput2,
		typename tnlParallelReductionDiffL2Norm< float, int> :: ResultType& result );

		template bool reductionOnCudaDevice< tnlParallelReductionDiffL2Norm< double, int > >
		( const tnlParallelReductionDiffL2Norm< double, int>& operation,
		const typename tnlParallelReductionDiffL2Norm< double, int > :: IndexType size,
		const typename tnlParallelReductionDiffL2Norm< double, int > :: RealType* deviceInput1,
		const typename tnlParallelReductionDiffL2Norm< double, int > :: RealType* deviceInput2,
		typename tnlParallelReductionDiffL2Norm< double, int> :: ResultType& result );

		#ifdef INSTANTIATE_LONG_DOUBLE
		template bool reductionOnCudaDevice< tnlParallelReductionDiffL2Norm< long double, int > >
		( const tnlParallelReductionDiffL2Norm< long double, int>& operation,
		const typename tnlParallelReductionDiffL2Norm< long double, int > :: IndexType size,
		const typename tnlParallelReductionDiffL2Norm< long double, int > :: RealType* deviceInput1,
		const typename tnlParallelReductionDiffL2Norm< long double, int > :: RealType* deviceInput2,
		typename tnlParallelReductionDiffL2Norm< long double, int> :: ResultType& result );
		#endif

		#ifdef INSTANTIATE_LONG_INT
		template bool reductionOnCudaDevice< tnlParallelReductionDiffL2Norm< char, long int > >
		( const tnlParallelReductionDiffL2Norm< char, long int >& operation,
		const typename tnlParallelReductionDiffL2Norm< char, long int > :: IndexType size,
		const typename tnlParallelReductionDiffL2Norm< char, long int > :: RealType* deviceInput1,
		const typename tnlParallelReductionDiffL2Norm< char, long int > :: RealType* deviceInput2,
		typename tnlParallelReductionDiffL2Norm< char, long int > :: ResultType& result );

		template bool reductionOnCudaDevice< tnlParallelReductionDiffL2Norm< int, long int > >
		( const tnlParallelReductionDiffL2Norm< int, long int >& operation,
		const typename tnlParallelReductionDiffL2Norm< int, long int > :: IndexType size,
		const typename tnlParallelReductionDiffL2Norm< int, long int > :: RealType* deviceInput1,
		const typename tnlParallelReductionDiffL2Norm< int, long int > :: RealType* deviceInput2,
		typename tnlParallelReductionDiffL2Norm< int, long int > :: ResultType& result );

		template bool reductionOnCudaDevice< tnlParallelReductionDiffL2Norm< float, long int > >
		( const tnlParallelReductionDiffL2Norm< float, long int >& operation,
		const typename tnlParallelReductionDiffL2Norm< float, long int > :: IndexType size,
		const typename tnlParallelReductionDiffL2Norm< float, long int > :: RealType* deviceInput1,
		const typename tnlParallelReductionDiffL2Norm< float, long int > :: RealType* deviceInput2,
		typename tnlParallelReductionDiffL2Norm< float, long int> :: ResultType& result );

		template bool reductionOnCudaDevice< tnlParallelReductionDiffL2Norm< double, long int > >
		( const tnlParallelReductionDiffL2Norm< double, long int>& operation,
		const typename tnlParallelReductionDiffL2Norm< double, long int > :: IndexType size,
		const typename tnlParallelReductionDiffL2Norm< double, long int > :: RealType* deviceInput1,
		const typename tnlParallelReductionDiffL2Norm< double, long int > :: RealType* deviceInput2,
		typename tnlParallelReductionDiffL2Norm< double, long int> :: ResultType& result );

		#ifdef INSTANTIATE_LONG_DOUBLE
		template bool reductionOnCudaDevice< tnlParallelReductionDiffL2Norm< long double, long int > >
		( const tnlParallelReductionDiffL2Norm< long double, long int>& operation,
		const typename tnlParallelReductionDiffL2Norm< long double, long int > :: IndexType size,
		const typename tnlParallelReductionDiffL2Norm< long double, long int > :: RealType* deviceInput1,
		const typename tnlParallelReductionDiffL2Norm< long double, long int > :: RealType* deviceInput2,
		typename tnlParallelReductionDiffL2Norm< long double, long int> :: ResultType& result );
		#endif
		#endif
		#endif

src/core/cuda/cuda-reduction-l2-norm_impl.cu

0 → 100644

+80 −0

Original line number	Diff line number	Diff line
		/***************************************************************************
		cuda-reduction-lp-norm_impl.cu - description
		-------------------
		begin : Jan 19, 2014
		copyright : (C) 2014 by Tomas Oberhuber
		email : tomas.oberhuber@fjfi.cvut.cz
		***************************************************************************/

		/***************************************************************************
		* *
		* This program is free software; you can redistribute it and/or modify *
		* it under the terms of the GNU General Public License as published by *
		* the Free Software Foundation; either version 2 of the License, or *
		* (at your option) any later version. *
		* *
		***************************************************************************/

		#include <core/cuda/reduction-operations.h>
		#include <core/cuda/cuda-reduction.h>

		#ifdef TEMPLATE_EXPLICIT_INSTANTIATION

		/****
		* L2 Norm
		*/
		template bool reductionOnCudaDevice< tnlParallelReductionL2Norm< float, int > >
		( const tnlParallelReductionL2Norm< float, int >& operation,
		const typename tnlParallelReductionL2Norm< float, int > :: IndexType size,
		const typename tnlParallelReductionL2Norm< float, int > :: RealType* deviceInput1,
		const typename tnlParallelReductionL2Norm< float, int > :: RealType* deviceInput2,
		typename tnlParallelReductionL2Norm< float, int> :: ResultType& result );

		template bool reductionOnCudaDevice< tnlParallelReductionL2Norm< double, int > >
		( const tnlParallelReductionL2Norm< double, int>& operation,
		const typename tnlParallelReductionL2Norm< double, int > :: IndexType size,
		const typename tnlParallelReductionL2Norm< double, int > :: RealType* deviceInput1,
		const typename tnlParallelReductionL2Norm< double, int > :: RealType* deviceInput2,
		typename tnlParallelReductionL2Norm< double, int> :: ResultType& result );

		#ifdef INSTANTIATE_LONG_DOUBLE
		template bool reductionOnCudaDevice< tnlParallelReductionL2Norm< long double, int > >
		( const tnlParallelReductionL2Norm< long double, int>& operation,
		const typename tnlParallelReductionL2Norm< long double, int > :: IndexType size,
		const typename tnlParallelReductionL2Norm< long double, int > :: RealType* deviceInput1,
		const typename tnlParallelReductionL2Norm< long double, int > :: RealType* deviceInput2,
		typename tnlParallelReductionL2Norm< long double, int> :: ResultType& result );
		#endif

		#ifdef INSTANTIATE_LONG_INT
		template bool reductionOnCudaDevice< tnlParallelReductionL2Norm< int, long int > >
		( const tnlParallelReductionL2Norm< int, long int >& operation,
		const typename tnlParallelReductionL2Norm< int, long int > :: IndexType size,
		const typename tnlParallelReductionL2Norm< int, long int > :: RealType* deviceInput1,
		const typename tnlParallelReductionL2Norm< int, long int > :: RealType* deviceInput2,
		typename tnlParallelReductionL2Norm< int, long int> :: ResultType& result );

		template bool reductionOnCudaDevice< tnlParallelReductionL2Norm< float, long int > >
		( const tnlParallelReductionL2Norm< float, long int >& operation,
		const typename tnlParallelReductionL2Norm< float, long int > :: IndexType size,
		const typename tnlParallelReductionL2Norm< float, long int > :: RealType* deviceInput1,
		const typename tnlParallelReductionL2Norm< float, long int > :: RealType* deviceInput2,
		typename tnlParallelReductionL2Norm< float, long int> :: ResultType& result );

		template bool reductionOnCudaDevice< tnlParallelReductionL2Norm< double, long int > >
		( const tnlParallelReductionL2Norm< double, long int>& operation,
		const typename tnlParallelReductionL2Norm< double, long int > :: IndexType size,
		const typename tnlParallelReductionL2Norm< double, long int > :: RealType* deviceInput1,
		const typename tnlParallelReductionL2Norm< double, long int > :: RealType* deviceInput2,
		typename tnlParallelReductionL2Norm< double, long int> :: ResultType& result );

		#ifdef INSTANTIATE_LONG_DOUBLE
		template bool reductionOnCudaDevice< tnlParallelReductionL2Norm< long double, long int > >
		( const tnlParallelReductionL2Norm< long double, long int>& operation,
		const typename tnlParallelReductionL2Norm< long double, long int > :: IndexType size,
		const typename tnlParallelReductionL2Norm< long double, long int > :: RealType* deviceInput1,
		const typename tnlParallelReductionL2Norm< long double, long int > :: RealType* deviceInput2,
		typename tnlParallelReductionL2Norm< long double, long int> :: ResultType& result );
		#endif
		#endif
		#endif
		No newline at end of file

src/core/cuda/reduction-operations.h

+60 −0

Original line number	Diff line number	Diff line
		@@ -501,6 +501,35 @@ class tnlParallelReductionAbsMax : public tnlParallelReductionMax< Real, Index >
		}
		};

		template< typename Real, typename Index >
		class tnlParallelReductionL2Norm : public tnlParallelReductionSum< Real, Index >
		{
		public:

		typedef Real RealType;
		typedef Index IndexType;
		typedef Real ResultType;
		typedef tnlParallelReductionSum< Real, Index > LaterReductionOperation;

		ResultType reduceOnHost( const IndexType idx,
		const ResultType& current,
		const RealType* data1,
		const RealType* data2 ) const
		{
		return current + data1[ idx ] * data1[ idx ];
		};

		__cuda_callable__ ResultType initialValue() const { return ( ResultType ) 0; };

		__cuda_callable__ void cudaFirstReduction( ResultType& result,
		const IndexType index,
		const RealType* data1,
		const RealType* data2 ) const
		{
		result += data1[ index ] * data1[ index ];
		}
		};


		template< typename Real, typename Index >
		class tnlParallelReductionLpNorm : public tnlParallelReductionSum< Real, Index >
		@@ -801,6 +830,37 @@ class tnlParallelReductionDiffAbsMax : public tnlParallelReductionMax< Real, Ind
		}
		};

		template< typename Real, typename Index >
		class tnlParallelReductionDiffL2Norm : public tnlParallelReductionSum< Real, Index >
		{
		public:

		typedef Real RealType;
		typedef Index IndexType;
		typedef Real ResultType;
		typedef tnlParallelReductionSum< Real, Index > LaterReductionOperation;

		ResultType reduceOnHost( const IndexType idx,
		const ResultType& current,
		const RealType* data1,
		const RealType* data2 ) const
		{
		const RealType aux( data2[ idx ] - data1[ idx ] );
		return current + aux * aux;
		};

		__cuda_callable__ ResultType initialValue() const { return ( ResultType ) 0; };

		__cuda_callable__ void cudaFirstReduction( ResultType& result,
		const IndexType index,
		const RealType* data1,
		const RealType* data2 ) const
		{
		const RealType aux( data2[ index ] - data1[ index ] );
		result += aux * aux;
		}
		};

		template< typename Real, typename Index >
		class tnlParallelReductionDiffLpNorm : public tnlParallelReductionSum< Real, Index >
		{

src/core/vectors/tnlVectorOperations.h

+34 −3

Original line number	Diff line number	Diff line
		@@ -54,6 +54,12 @@ class tnlVectorOperations< tnlHost >
		template< typename Vector >
		static typename Vector::RealType getVectorAbsMin( const Vector& v );

		template< typename Vector >
		static typename Vector::RealType getVectorL1Norm( const Vector& v );

		template< typename Vector >
		static typename Vector::RealType getVectorL2Norm( const Vector& v );

		template< typename Vector >
		static typename Vector::RealType getVectorLpNorm( const Vector& v,
		const typename Vector::RealType& p );
		@@ -77,6 +83,14 @@ class tnlVectorOperations< tnlHost >
		static typename Vector1::RealType getVectorDifferenceAbsMin( const Vector1& v1,
		const Vector2& v2 );

		template< typename Vector1, typename Vector2 >
		static typename Vector1::RealType getVectorDifferenceL1Norm( const Vector1& v1,
		const Vector2& v2 );

		template< typename Vector1, typename Vector2 >
		static typename Vector1::RealType getVectorDifferenceL2Norm( const Vector1& v1,
		const Vector2& v2 );

		template< typename Vector1, typename Vector2 >
		static typename Vector1::RealType getVectorDifferenceLpNorm( const Vector1& v1,
		const Vector2& v2,
		@@ -85,6 +99,8 @@ class tnlVectorOperations< tnlHost >
		template< typename Vector1, typename Vector2 >
		static typename Vector1::RealType getVectorDifferenceSum( const Vector1& v1,
		const Vector2& v2 );


		template< typename Vector >
		static void vectorScalarMultiplication( Vector& v,
		const typename Vector::RealType& alpha );
		@@ -147,6 +163,12 @@ class tnlVectorOperations< tnlCuda >
		template< typename Vector >
		static typename Vector::RealType getVectorAbsMin( const Vector& v );

		template< typename Vector >
		static typename Vector::RealType getVectorL1Norm( const Vector& v );

		template< typename Vector >
		static typename Vector::RealType getVectorL2Norm( const Vector& v );

		template< typename Vector >
		static typename Vector::RealType getVectorLpNorm( const Vector& v,
		const typename Vector::RealType& p );
		@@ -170,6 +192,14 @@ class tnlVectorOperations< tnlCuda >
		static typename Vector1::RealType getVectorDifferenceAbsMin( const Vector1& v1,
		const Vector2& v2 );

		template< typename Vector1, typename Vector2 >
		static typename Vector1::RealType getVectorDifferenceL1Norm( const Vector1& v1,
		const Vector2& v2 );

		template< typename Vector1, typename Vector2 >
		static typename Vector1::RealType getVectorDifferenceL2Norm( const Vector1& v1,
		const Vector2& v2 );

		template< typename Vector1, typename Vector2 >
		static typename Vector1::RealType getVectorDifferenceLpNorm( const Vector1& v1,
		const Vector2& v2,
		@@ -178,6 +208,7 @@ class tnlVectorOperations< tnlCuda >
		template< typename Vector1, typename Vector2 >
		static typename Vector1::RealType getVectorDifferenceSum( const Vector1& v1,
		const Vector2& v2 );

		template< typename Vector >
		static void vectorScalarMultiplication( Vector& v,
		const typename Vector::RealType& alpha );