Loading src/core/cuda/reduction-operations.h 0 → 100644 +226 −0 Original line number Diff line number Diff line /*************************************************************************** reduction-operations.h - description ------------------- begin : Mar 22, 2013 copyright : (C) 2013 by Tomas Oberhuber email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ /*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * ***************************************************************************/ #ifndef REDUCTION_OPERATIONS_H_ #define REDUCTION_OPERATIONS_H_ #ifdef HAVE_CUDA #include <cuda.h> #include <core/mfuncs.h> enum tnlTupleOperation { tnlParallelReductionMax, tnlParallelReductionAbsMin, tnlParallelReductionAbsMax, tnlParallelReductionAbsSum, tnlParallelReductionLpNorm, tnlParallelReductionSdot }; /*** * This function returns minimum of two numbers stored on the device. */ template< class T > __device__ T tnlCudaMin( const T& a, const T& b ) { return a < b ? a : b; } __device__ int tnlCudaMin( const int& a, const int& b ) { return min( a, b ); } __device__ float tnlCudaMin( const float& a, const float& b ) { return fminf( a, b ); } __device__ double tnlCudaMin( const double& a, const double& b ) { return fmin( a, b ); } /*** * This function returns maximum of two numbers stored on the device. */ template< class T > __device__ T tnlCudaMax( const T& a, const T& b ) { return a > b ? a : b; } __device__ int tnlCudaMax( const int& a, const int& b ) { return max( a, b ); } __device__ float tnlCudaMax( const float& a, const float& b ) { return fmaxf( a, b ); } __device__ double tnlCudaMax( const double& a, const double& b ) { return fmax( a, b ); } /*** * This function returns absolute value of given number on the device. */ __device__ int tnlCudaAbs( const int& a ) { return abs( a ); } __device__ float tnlCudaAbs( const float& a ) { return fabs( a ); } __device__ double tnlCudaAbs( const double& a ) { return fabs( a ); } template< typename Real, typename Index > class tnlParallelReductionSum { public: typedef Real RealType; typedef Index IndexType; RealType reduceOnHost( const RealType& data1, const RealType& data2 ) const { return data1 + data2; } __device__ RealType initialValueOnDevice( const IndexType idx1, const IndexType idx2, const RealType* data1, const RealType* data2 ) const { return data1[ idx1 ] + data1[ idx2 ]; } __device__ RealType initialValueOnDevice( const IndexType idx1, const RealType* data1, const RealType* data2 ) const { return data1[ idx1 ]; }; __device__ RealType firstReductionOnDevice( const IndexType idx1, const IndexType idx2, const IndexType idx3, const RealType* data1, const RealType* data2, const RealType* data3 ) const { return data1[ idx1 ] + data2[ idx2 ] + data2[ idx3 ]; }; __device__ RealType firstReductionOnDevice( const IndexType idx1, const IndexType idx2, const RealType* data1, const RealType* data2, const RealType* data3 ) const { return data1[ idx1 ] + data2[ idx2 ]; }; __device__ RealType commonReductionOnDevice( const IndexType idx1, const IndexType idx2, const RealType* data ) const { return data[ idx1 ] + data[ idx2 ]; }; }; template< typename Real, typename Index > class tnlParallelReductionMin { public: typedef Real RealType; typedef Index IndexType; RealType reduceOnHost( const RealType& data1, const RealType& data2 ) const { return Min( data1, data2 ); } __device__ RealType initialValueOnDevice( const IndexType idx1, const IndexType idx2, const RealType* data1, const RealType* data2 ) const { return tnlCudaMin( data1[ idx1 ], data1[ idx2 ] ); } __device__ RealType initialValueOnDevice( const IndexType idx1, const RealType* data1, const RealType* data2 ) const { return data1[ idx1 ]; }; __device__ RealType firstReductionOnDevice( const IndexType idx1, const IndexType idx2, const IndexType idx3, const RealType* data1, const RealType* data2, const RealType* data3 ) const { return tnlCudaMin( data1[ idx1 ], tnlCudaMin( data2[ idx2 ], data2[ idx3 ] ) ); }; __device__ RealType firstReductionOnDevice( const IndexType idx1, const IndexType idx2, const RealType* data1, const RealType* data2, const RealType* data3 ) const { return tnlCudaMin( data1[ idx1 ], data2[ idx2 ] ); }; __device__ RealType commonReductionOnDevice( const IndexType idx1, const IndexType idx2, const RealType* data ) const { return tnlCudaMin( data[ idx1 ], data[ idx2 ] ); }; }; #include <implementation/core/cuda/reduction-operations_impl.h> #endif #endif /* REDUCTION_OPERATIONS_H_ */ src/implementation/core/CMakeLists.txt +3 −1 Original line number Diff line number Diff line Loading @@ -21,8 +21,10 @@ SET( headers cuda-long-vector-kernels.h SET( CURRENT_DIR ${CMAKE_SOURCE_DIR}/src/implementation/core ) IF( BUILD_CUDA ) set( tnl_implementation_core_CUDA__SOURCES ${tnl_implementation_core_cuda_CUDA__SOURCES} ${CURRENT_DIR}/tnlArray_impl.cu ${CURRENT_DIR}/tnlVector_impl.cu ) ${CURRENT_DIR}/tnlVector_impl.cu PARENT_SCOPE ) ENDIF() set( tnl_implementation_core_SOURCES ${tnl_implementation_core_CUDA__SOURCES} Loading src/implementation/core/cuda-long-vector-kernels.h +137 −203 File changed.Preview size limit exceeded, changes collapsed. Show changes src/implementation/core/cuda/CMakeLists.txt +8 −1 Original line number Diff line number Diff line SET( headers reduction_impl.h ) SET( headers reduction_impl.h reduction-operations_impl.h ) SET( CURRENT_DIR ${CMAKE_SOURCE_DIR}/src/implementation/core/cuda ) IF( BUILD_CUDA ) set( tnl_implementation_core_cuda_CUDA__SOURCES ${CURRENT_DIR}/reduction-operations_impl.cu PARENT_SCOPE ) endif() set( tnl_implementation_core_cuda_SOURCES ${CURRENT_DIR}/device-check.cpp PARENT_SCOPE ) Loading src/implementation/core/cuda/reduction-operations_impl.cu 0 → 100644 +16 −0 Original line number Diff line number Diff line /*************************************************************************** reduction-operations.cu - description ------------------- begin : Mar 22, 2013 copyright : (C) 2013 by Tomas Oberhuber email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ /*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * ***************************************************************************/ No newline at end of file Loading
src/core/cuda/reduction-operations.h 0 → 100644 +226 −0 Original line number Diff line number Diff line /*************************************************************************** reduction-operations.h - description ------------------- begin : Mar 22, 2013 copyright : (C) 2013 by Tomas Oberhuber email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ /*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * ***************************************************************************/ #ifndef REDUCTION_OPERATIONS_H_ #define REDUCTION_OPERATIONS_H_ #ifdef HAVE_CUDA #include <cuda.h> #include <core/mfuncs.h> enum tnlTupleOperation { tnlParallelReductionMax, tnlParallelReductionAbsMin, tnlParallelReductionAbsMax, tnlParallelReductionAbsSum, tnlParallelReductionLpNorm, tnlParallelReductionSdot }; /*** * This function returns minimum of two numbers stored on the device. */ template< class T > __device__ T tnlCudaMin( const T& a, const T& b ) { return a < b ? a : b; } __device__ int tnlCudaMin( const int& a, const int& b ) { return min( a, b ); } __device__ float tnlCudaMin( const float& a, const float& b ) { return fminf( a, b ); } __device__ double tnlCudaMin( const double& a, const double& b ) { return fmin( a, b ); } /*** * This function returns maximum of two numbers stored on the device. */ template< class T > __device__ T tnlCudaMax( const T& a, const T& b ) { return a > b ? a : b; } __device__ int tnlCudaMax( const int& a, const int& b ) { return max( a, b ); } __device__ float tnlCudaMax( const float& a, const float& b ) { return fmaxf( a, b ); } __device__ double tnlCudaMax( const double& a, const double& b ) { return fmax( a, b ); } /*** * This function returns absolute value of given number on the device. */ __device__ int tnlCudaAbs( const int& a ) { return abs( a ); } __device__ float tnlCudaAbs( const float& a ) { return fabs( a ); } __device__ double tnlCudaAbs( const double& a ) { return fabs( a ); } template< typename Real, typename Index > class tnlParallelReductionSum { public: typedef Real RealType; typedef Index IndexType; RealType reduceOnHost( const RealType& data1, const RealType& data2 ) const { return data1 + data2; } __device__ RealType initialValueOnDevice( const IndexType idx1, const IndexType idx2, const RealType* data1, const RealType* data2 ) const { return data1[ idx1 ] + data1[ idx2 ]; } __device__ RealType initialValueOnDevice( const IndexType idx1, const RealType* data1, const RealType* data2 ) const { return data1[ idx1 ]; }; __device__ RealType firstReductionOnDevice( const IndexType idx1, const IndexType idx2, const IndexType idx3, const RealType* data1, const RealType* data2, const RealType* data3 ) const { return data1[ idx1 ] + data2[ idx2 ] + data2[ idx3 ]; }; __device__ RealType firstReductionOnDevice( const IndexType idx1, const IndexType idx2, const RealType* data1, const RealType* data2, const RealType* data3 ) const { return data1[ idx1 ] + data2[ idx2 ]; }; __device__ RealType commonReductionOnDevice( const IndexType idx1, const IndexType idx2, const RealType* data ) const { return data[ idx1 ] + data[ idx2 ]; }; }; template< typename Real, typename Index > class tnlParallelReductionMin { public: typedef Real RealType; typedef Index IndexType; RealType reduceOnHost( const RealType& data1, const RealType& data2 ) const { return Min( data1, data2 ); } __device__ RealType initialValueOnDevice( const IndexType idx1, const IndexType idx2, const RealType* data1, const RealType* data2 ) const { return tnlCudaMin( data1[ idx1 ], data1[ idx2 ] ); } __device__ RealType initialValueOnDevice( const IndexType idx1, const RealType* data1, const RealType* data2 ) const { return data1[ idx1 ]; }; __device__ RealType firstReductionOnDevice( const IndexType idx1, const IndexType idx2, const IndexType idx3, const RealType* data1, const RealType* data2, const RealType* data3 ) const { return tnlCudaMin( data1[ idx1 ], tnlCudaMin( data2[ idx2 ], data2[ idx3 ] ) ); }; __device__ RealType firstReductionOnDevice( const IndexType idx1, const IndexType idx2, const RealType* data1, const RealType* data2, const RealType* data3 ) const { return tnlCudaMin( data1[ idx1 ], data2[ idx2 ] ); }; __device__ RealType commonReductionOnDevice( const IndexType idx1, const IndexType idx2, const RealType* data ) const { return tnlCudaMin( data[ idx1 ], data[ idx2 ] ); }; }; #include <implementation/core/cuda/reduction-operations_impl.h> #endif #endif /* REDUCTION_OPERATIONS_H_ */
src/implementation/core/CMakeLists.txt +3 −1 Original line number Diff line number Diff line Loading @@ -21,8 +21,10 @@ SET( headers cuda-long-vector-kernels.h SET( CURRENT_DIR ${CMAKE_SOURCE_DIR}/src/implementation/core ) IF( BUILD_CUDA ) set( tnl_implementation_core_CUDA__SOURCES ${tnl_implementation_core_cuda_CUDA__SOURCES} ${CURRENT_DIR}/tnlArray_impl.cu ${CURRENT_DIR}/tnlVector_impl.cu ) ${CURRENT_DIR}/tnlVector_impl.cu PARENT_SCOPE ) ENDIF() set( tnl_implementation_core_SOURCES ${tnl_implementation_core_CUDA__SOURCES} Loading
src/implementation/core/cuda-long-vector-kernels.h +137 −203 File changed.Preview size limit exceeded, changes collapsed. Show changes
src/implementation/core/cuda/CMakeLists.txt +8 −1 Original line number Diff line number Diff line SET( headers reduction_impl.h ) SET( headers reduction_impl.h reduction-operations_impl.h ) SET( CURRENT_DIR ${CMAKE_SOURCE_DIR}/src/implementation/core/cuda ) IF( BUILD_CUDA ) set( tnl_implementation_core_cuda_CUDA__SOURCES ${CURRENT_DIR}/reduction-operations_impl.cu PARENT_SCOPE ) endif() set( tnl_implementation_core_cuda_SOURCES ${CURRENT_DIR}/device-check.cpp PARENT_SCOPE ) Loading
src/implementation/core/cuda/reduction-operations_impl.cu 0 → 100644 +16 −0 Original line number Diff line number Diff line /*************************************************************************** reduction-operations.cu - description ------------------- begin : Mar 22, 2013 copyright : (C) 2013 by Tomas Oberhuber email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ /*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * ***************************************************************************/ No newline at end of file