Commit 5e526c7f authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

Merge branch 'master' into mhfem

* master:
  Optimizing L1 and L2 norms in CUDA.
parents 03905634 59b3959e
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -20,6 +20,7 @@ IF( BUILD_CUDA )
        ${CURRENT_DIR}/cuda-reduction-abs-max_impl.cu
        ${CURRENT_DIR}/cuda-reduction-and_impl.cu
        ${CURRENT_DIR}/cuda-reduction-or_impl.cu
        ${CURRENT_DIR}/cuda-reduction-l2-norm_impl.cu
        ${CURRENT_DIR}/cuda-reduction-lp-norm_impl.cu
        ${CURRENT_DIR}/cuda-reduction-equalities_impl.cu
        ${CURRENT_DIR}/cuda-reduction-inequalities_impl.cu
@@ -30,6 +31,7 @@ IF( BUILD_CUDA )
        ${CURRENT_DIR}/cuda-reduction-diff-abs-sum_impl.cu
        ${CURRENT_DIR}/cuda-reduction-diff-abs-min_impl.cu
        ${CURRENT_DIR}/cuda-reduction-diff-abs-max_impl.cu
        ${CURRENT_DIR}/cuda-reduction-diff-l2-norm_impl.cu        
        ${CURRENT_DIR}/cuda-reduction-diff-lp-norm_impl.cu        
        ${CURRENT_DIR}/cuda-prefix-sum_impl.cu
        PARENT_SCOPE )        
+87 −0
Original line number Diff line number Diff line
/***************************************************************************
                          cuda-reduction-diff-lp-norm_impl.cu  -  description
                             -------------------
    begin                : Jan 19, 2014
    copyright            : (C) 2014 by Tomas Oberhuber
    email                : tomas.oberhuber@fjfi.cvut.cz
 ***************************************************************************/

/***************************************************************************
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 2 of the License, or     *
 *   (at your option) any later version.                                   *
 *                                                                         *
 ***************************************************************************/
 
#include <core/cuda/reduction-operations.h>
#include <core/cuda/cuda-reduction.h>
 
#ifdef TEMPLATE_EXPLICIT_INSTANTIATION

/****
 * Diff L2 Norm
 */
template bool reductionOnCudaDevice< tnlParallelReductionDiffL2Norm< float, int > >
                                   ( const tnlParallelReductionDiffL2Norm< float, int >& operation,
                                     const typename tnlParallelReductionDiffL2Norm< float, int > :: IndexType size,
                                     const typename tnlParallelReductionDiffL2Norm< float, int > :: RealType* deviceInput1,
                                     const typename tnlParallelReductionDiffL2Norm< float, int > :: RealType* deviceInput2,
                                     typename tnlParallelReductionDiffL2Norm< float, int> :: ResultType& result );

template bool reductionOnCudaDevice< tnlParallelReductionDiffL2Norm< double, int > >
                                   ( const tnlParallelReductionDiffL2Norm< double, int>& operation,
                                     const typename tnlParallelReductionDiffL2Norm< double, int > :: IndexType size,
                                     const typename tnlParallelReductionDiffL2Norm< double, int > :: RealType* deviceInput1,
                                     const typename tnlParallelReductionDiffL2Norm< double, int > :: RealType* deviceInput2,
                                     typename tnlParallelReductionDiffL2Norm< double, int> :: ResultType& result );

#ifdef INSTANTIATE_LONG_DOUBLE
template bool reductionOnCudaDevice< tnlParallelReductionDiffL2Norm< long double, int > >
                                   ( const tnlParallelReductionDiffL2Norm< long double, int>& operation,
                                     const typename tnlParallelReductionDiffL2Norm< long double, int > :: IndexType size,
                                     const typename tnlParallelReductionDiffL2Norm< long double, int > :: RealType* deviceInput1,
                                     const typename tnlParallelReductionDiffL2Norm< long double, int > :: RealType* deviceInput2,
                                     typename tnlParallelReductionDiffL2Norm< long double, int> :: ResultType& result );
#endif

#ifdef INSTANTIATE_LONG_INT
template bool reductionOnCudaDevice< tnlParallelReductionDiffL2Norm< char, long int > >
                                   ( const tnlParallelReductionDiffL2Norm< char, long int >& operation,
                                     const typename tnlParallelReductionDiffL2Norm< char, long int > :: IndexType size,
                                     const typename tnlParallelReductionDiffL2Norm< char, long int > :: RealType* deviceInput1,
                                     const typename tnlParallelReductionDiffL2Norm< char, long int > :: RealType* deviceInput2,
                                     typename tnlParallelReductionDiffL2Norm< char, long int > :: ResultType& result );

template bool reductionOnCudaDevice< tnlParallelReductionDiffL2Norm< int, long int > >
                                   ( const tnlParallelReductionDiffL2Norm< int, long int >& operation,
                                     const typename tnlParallelReductionDiffL2Norm< int, long int > :: IndexType size,
                                     const typename tnlParallelReductionDiffL2Norm< int, long int > :: RealType* deviceInput1,
                                     const typename tnlParallelReductionDiffL2Norm< int, long int > :: RealType* deviceInput2,
                                     typename tnlParallelReductionDiffL2Norm< int, long int > :: ResultType& result );

template bool reductionOnCudaDevice< tnlParallelReductionDiffL2Norm< float, long int > >
                                   ( const tnlParallelReductionDiffL2Norm< float, long int >& operation,
                                     const typename tnlParallelReductionDiffL2Norm< float, long int > :: IndexType size,
                                     const typename tnlParallelReductionDiffL2Norm< float, long int > :: RealType* deviceInput1,
                                     const typename tnlParallelReductionDiffL2Norm< float, long int > :: RealType* deviceInput2,
                                     typename tnlParallelReductionDiffL2Norm< float, long int> :: ResultType& result );

template bool reductionOnCudaDevice< tnlParallelReductionDiffL2Norm< double, long int > >
                                   ( const tnlParallelReductionDiffL2Norm< double, long int>& operation,
                                     const typename tnlParallelReductionDiffL2Norm< double, long int > :: IndexType size,
                                     const typename tnlParallelReductionDiffL2Norm< double, long int > :: RealType* deviceInput1,
                                     const typename tnlParallelReductionDiffL2Norm< double, long int > :: RealType* deviceInput2,
                                     typename tnlParallelReductionDiffL2Norm< double, long int> :: ResultType& result );

#ifdef INSTANTIATE_LONG_DOUBLE
template bool reductionOnCudaDevice< tnlParallelReductionDiffL2Norm< long double, long int > >
                                   ( const tnlParallelReductionDiffL2Norm< long double, long int>& operation,
                                     const typename tnlParallelReductionDiffL2Norm< long double, long int > :: IndexType size,
                                     const typename tnlParallelReductionDiffL2Norm< long double, long int > :: RealType* deviceInput1,
                                     const typename tnlParallelReductionDiffL2Norm< long double, long int > :: RealType* deviceInput2,
                                     typename tnlParallelReductionDiffL2Norm< long double, long int> :: ResultType& result );
#endif
#endif
#endif
+80 −0
Original line number Diff line number Diff line
/***************************************************************************
                          cuda-reduction-lp-norm_impl.cu  -  description
                             -------------------
    begin                : Jan 19, 2014
    copyright            : (C) 2014 by Tomas Oberhuber
    email                : tomas.oberhuber@fjfi.cvut.cz
 ***************************************************************************/

/***************************************************************************
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 2 of the License, or     *
 *   (at your option) any later version.                                   *
 *                                                                         *
 ***************************************************************************/
 
#include <core/cuda/reduction-operations.h>
#include <core/cuda/cuda-reduction.h>
 
#ifdef TEMPLATE_EXPLICIT_INSTANTIATION

/****
 * L2 Norm
 */
template bool reductionOnCudaDevice< tnlParallelReductionL2Norm< float, int > >
                                   ( const tnlParallelReductionL2Norm< float, int >& operation,
                                     const typename tnlParallelReductionL2Norm< float, int > :: IndexType size,
                                     const typename tnlParallelReductionL2Norm< float, int > :: RealType* deviceInput1,
                                     const typename tnlParallelReductionL2Norm< float, int > :: RealType* deviceInput2,
                                     typename tnlParallelReductionL2Norm< float, int> :: ResultType& result );

template bool reductionOnCudaDevice< tnlParallelReductionL2Norm< double, int > >
                                   ( const tnlParallelReductionL2Norm< double, int>& operation,
                                     const typename tnlParallelReductionL2Norm< double, int > :: IndexType size,
                                     const typename tnlParallelReductionL2Norm< double, int > :: RealType* deviceInput1,
                                     const typename tnlParallelReductionL2Norm< double, int > :: RealType* deviceInput2,
                                     typename tnlParallelReductionL2Norm< double, int> :: ResultType& result );

#ifdef INSTANTIATE_LONG_DOUBLE
template bool reductionOnCudaDevice< tnlParallelReductionL2Norm< long double, int > >
                                   ( const tnlParallelReductionL2Norm< long double, int>& operation,
                                     const typename tnlParallelReductionL2Norm< long double, int > :: IndexType size,
                                     const typename tnlParallelReductionL2Norm< long double, int > :: RealType* deviceInput1,
                                     const typename tnlParallelReductionL2Norm< long double, int > :: RealType* deviceInput2,
                                     typename tnlParallelReductionL2Norm< long double, int> :: ResultType& result );
#endif

#ifdef INSTANTIATE_LONG_INT
template bool reductionOnCudaDevice< tnlParallelReductionL2Norm< int, long int > >
                                   ( const tnlParallelReductionL2Norm< int, long int >& operation,
                                     const typename tnlParallelReductionL2Norm< int, long int > :: IndexType size,
                                     const typename tnlParallelReductionL2Norm< int, long int > :: RealType* deviceInput1,
                                     const typename tnlParallelReductionL2Norm< int, long int > :: RealType* deviceInput2,
                                     typename tnlParallelReductionL2Norm< int, long int> :: ResultType& result );

template bool reductionOnCudaDevice< tnlParallelReductionL2Norm< float, long int > >
                                   ( const tnlParallelReductionL2Norm< float, long int >& operation,
                                     const typename tnlParallelReductionL2Norm< float, long int > :: IndexType size,
                                     const typename tnlParallelReductionL2Norm< float, long int > :: RealType* deviceInput1,
                                     const typename tnlParallelReductionL2Norm< float, long int > :: RealType* deviceInput2,
                                     typename tnlParallelReductionL2Norm< float, long int> :: ResultType& result );

template bool reductionOnCudaDevice< tnlParallelReductionL2Norm< double, long int > >
                                   ( const tnlParallelReductionL2Norm< double, long int>& operation,
                                     const typename tnlParallelReductionL2Norm< double, long int > :: IndexType size,
                                     const typename tnlParallelReductionL2Norm< double, long int > :: RealType* deviceInput1,
                                     const typename tnlParallelReductionL2Norm< double, long int > :: RealType* deviceInput2,
                                     typename tnlParallelReductionL2Norm< double, long int> :: ResultType& result );

#ifdef INSTANTIATE_LONG_DOUBLE
template bool reductionOnCudaDevice< tnlParallelReductionL2Norm< long double, long int > >
                                   ( const tnlParallelReductionL2Norm< long double, long int>& operation,
                                     const typename tnlParallelReductionL2Norm< long double, long int > :: IndexType size,
                                     const typename tnlParallelReductionL2Norm< long double, long int > :: RealType* deviceInput1,
                                     const typename tnlParallelReductionL2Norm< long double, long int > :: RealType* deviceInput2,
                                     typename tnlParallelReductionL2Norm< long double, long int> :: ResultType& result );
#endif
#endif                                     
#endif
 No newline at end of file
+60 −0
Original line number Diff line number Diff line
@@ -501,6 +501,35 @@ class tnlParallelReductionAbsMax : public tnlParallelReductionMax< Real, Index >
   }   
};

template< typename Real, typename Index >
class tnlParallelReductionL2Norm : public tnlParallelReductionSum< Real, Index >
{
   public:

   typedef Real RealType;
   typedef Index IndexType;
   typedef Real ResultType;
   typedef tnlParallelReductionSum< Real, Index > LaterReductionOperation;

   ResultType reduceOnHost( const IndexType idx,
                            const ResultType& current,
                            const RealType* data1,
                            const RealType* data2 ) const
   {
      return current + data1[ idx ] * data1[ idx ];
   };

   __cuda_callable__ ResultType initialValue() const { return ( ResultType ) 0; };
   
   __cuda_callable__ void cudaFirstReduction( ResultType& result, 
                                              const IndexType index,
                                              const RealType* data1,
                                              const RealType* data2 ) const
   {
      result += data1[ index ] * data1[ index ];
   }
};


template< typename Real, typename Index >
class tnlParallelReductionLpNorm : public tnlParallelReductionSum< Real, Index >
@@ -801,6 +830,37 @@ class tnlParallelReductionDiffAbsMax : public tnlParallelReductionMax< Real, Ind
   }
};

template< typename Real, typename Index >
class tnlParallelReductionDiffL2Norm : public tnlParallelReductionSum< Real, Index >
{
   public:

   typedef Real RealType;
   typedef Index IndexType;
   typedef Real ResultType;
   typedef tnlParallelReductionSum< Real, Index > LaterReductionOperation;

   ResultType reduceOnHost( const IndexType idx,
                            const ResultType& current,
                            const RealType* data1,
                            const RealType* data2 ) const
   {
      const RealType aux( data2[ idx ] - data1[ idx ]  );
      return current + aux * aux;
   };

   __cuda_callable__ ResultType initialValue() const { return ( ResultType ) 0; };
   
   __cuda_callable__ void cudaFirstReduction( ResultType& result, 
                                              const IndexType index,
                                              const RealType* data1,
                                              const RealType* data2 ) const
   {
      const RealType aux( data2[ index ] - data1[ index ]  );
      result += aux * aux;
   }
};

template< typename Real, typename Index >
class tnlParallelReductionDiffLpNorm : public tnlParallelReductionSum< Real, Index >
{
+34 −3
Original line number Diff line number Diff line
@@ -54,6 +54,12 @@ class tnlVectorOperations< tnlHost >
   template< typename Vector >
   static typename Vector::RealType getVectorAbsMin( const Vector& v );

   template< typename Vector >
   static typename Vector::RealType getVectorL1Norm( const Vector& v );
   
   template< typename Vector >
   static typename Vector::RealType getVectorL2Norm( const Vector& v );
   
   template< typename Vector >
   static typename Vector::RealType getVectorLpNorm( const Vector& v,
                                                     const typename Vector::RealType& p );
@@ -77,6 +83,14 @@ class tnlVectorOperations< tnlHost >
   static typename Vector1::RealType getVectorDifferenceAbsMin( const Vector1& v1,
                                                                  const Vector2& v2 );

   template< typename Vector1, typename Vector2 >
   static typename Vector1::RealType getVectorDifferenceL1Norm( const Vector1& v1,
                                                           const Vector2& v2 );

   template< typename Vector1, typename Vector2 >
   static typename Vector1::RealType getVectorDifferenceL2Norm( const Vector1& v1,
                                                           const Vector2& v2 );
   
   template< typename Vector1, typename Vector2 >
   static typename Vector1::RealType getVectorDifferenceLpNorm( const Vector1& v1,
                                                           const Vector2& v2,
@@ -85,6 +99,8 @@ class tnlVectorOperations< tnlHost >
   template< typename Vector1, typename Vector2 >
   static typename Vector1::RealType getVectorDifferenceSum( const Vector1& v1,
                                                               const Vector2& v2 );
   
   
   template< typename Vector >
   static void vectorScalarMultiplication( Vector& v,
                                           const typename Vector::RealType& alpha );
@@ -147,6 +163,12 @@ class tnlVectorOperations< tnlCuda >
   template< typename Vector >
   static typename Vector::RealType getVectorAbsMin( const Vector& v );
   
   template< typename Vector >
   static typename Vector::RealType getVectorL1Norm( const Vector& v );
   
   template< typename Vector >
   static typename Vector::RealType getVectorL2Norm( const Vector& v );
   
   template< typename Vector >
   static typename Vector::RealType getVectorLpNorm( const Vector& v,
                                                     const typename Vector::RealType& p );
@@ -170,6 +192,14 @@ class tnlVectorOperations< tnlCuda >
   static typename Vector1::RealType getVectorDifferenceAbsMin( const Vector1& v1,
                                                                const Vector2& v2 );
  
   template< typename Vector1, typename Vector2 >
   static typename Vector1::RealType getVectorDifferenceL1Norm( const Vector1& v1,
                                                                const Vector2& v2 );

   template< typename Vector1, typename Vector2 >
   static typename Vector1::RealType getVectorDifferenceL2Norm( const Vector1& v1,
                                                                const Vector2& v2 );
  
   template< typename Vector1, typename Vector2 >
   static typename Vector1::RealType getVectorDifferenceLpNorm( const Vector1& v1,
                                                           const Vector2& v2,
@@ -178,6 +208,7 @@ class tnlVectorOperations< tnlCuda >
   template< typename Vector1, typename Vector2 >
   static typename Vector1::RealType getVectorDifferenceSum( const Vector1& v1,
                                                               const Vector2& v2 );
   
   template< typename Vector >
   static void vectorScalarMultiplication( Vector& v,
                                           const typename Vector::RealType& alpha );
Loading