Commit 9d816749 authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

commonReductionOnDevice will be useful also in OpenMP parallelization on host

parent 7110b53a
Loading
Loading
Loading
Loading
+38 −57
Original line number Diff line number Diff line
@@ -218,21 +218,17 @@ class tnlParallelReductionSum
      result += data1[ index ];
   }
 
#ifdef HAVE_CUDA

   __device__ void commonReductionOnDevice( ResultType& result,
   __cuda_callable__ void commonReductionOnDevice( ResultType& result,
                                                   const ResultType& data )
   {
      result += data;
   };
 
   __device__ void commonReductionOnDevice( volatile ResultType& result,
   __cuda_callable__ void commonReductionOnDevice( volatile ResultType& result,
                                                   volatile const ResultType& data )
   {
      result += data;
   };

#endif
};

template< typename Real, typename Index >
@@ -263,21 +259,17 @@ class tnlParallelReductionMin
      result = tnlCudaMin( result, data1[ index ] );
   }
 
#ifdef HAVE_CUDA
   __device__ void commonReductionOnDevice( ResultType& result,
   __cuda_callable__ void commonReductionOnDevice( ResultType& result,
                                                   const ResultType& data )
   {
      result = tnlCudaMin( result, data );
   };
 
   __device__ void commonReductionOnDevice( volatile ResultType& result,
   __cuda_callable__ void commonReductionOnDevice( volatile ResultType& result,
                                                   volatile const ResultType& data )
   {
      result = tnlCudaMin( result, data );
   };
 

#endif
};

template< typename Real, typename Index >
@@ -308,19 +300,17 @@ class tnlParallelReductionMax
      result = tnlCudaMax( result, data1[ index ] );
   }
 
#ifdef HAVE_CUDA
   __device__ void commonReductionOnDevice( ResultType& result,
   __cuda_callable__ void commonReductionOnDevice( ResultType& result,
                                                   const ResultType& data )
   {
      result = tnlCudaMax( result, data );
   };

   __device__ void commonReductionOnDevice( volatile ResultType& result,
   __cuda_callable__ void commonReductionOnDevice( volatile ResultType& result,
                                                   volatile const ResultType& data )
   {
      result = tnlCudaMax( result, data );
   };
#endif
};

template< typename Real, typename Index >
@@ -351,22 +341,17 @@ class tnlParallelReductionLogicalAnd
      result = result && data1[ index ];
   }
 
 
#ifdef HAVE_CUDA
   __device__ void commonReductionOnDevice( ResultType& result,
   __cuda_callable__ void commonReductionOnDevice( ResultType& result,
                                                   const ResultType& data )
   {
      result = result && data;
   };
 
   __device__ void commonReductionOnDevice( volatile ResultType& result,
   __cuda_callable__ void commonReductionOnDevice( volatile ResultType& result,
                                                   volatile const ResultType& data )
   {
      result = result && data;
   };
 

#endif
};


@@ -398,20 +383,17 @@ class tnlParallelReductionLogicalOr
      result = result || data1[ index ];
   }


#ifdef HAVE_CUDA
   __device__ void commonReductionOnDevice( ResultType& result,
   __cuda_callable__ void commonReductionOnDevice( ResultType& result,
                                                   const ResultType& data )
   {
      result = result || data;
   };
 
   __device__ void commonReductionOnDevice( volatile ResultType& result,
   __cuda_callable__ void commonReductionOnDevice( volatile ResultType& result,
                                                   volatile const ResultType& data )
   {
      result = result || data;
   };
#endif
};

template< typename Real, typename Index >
@@ -909,4 +891,3 @@ class tnlParallelReductionDiffLpNorm : public tnlParallelReductionSum< Real, Ind
} // namespace Containers
} // namespace TNL