Commit 81dbf195 authored by Tomáš Oberhuber's avatar Tomáš Oberhuber
Browse files

Merge branch 'master' into functions

Conflicts:
	build
parents 47f7ff04 35fafc0a
Loading
Loading
Loading
Loading
+26 −25
Original line number Diff line number Diff line
@@ -29,17 +29,18 @@ do
        --with-cuda=*                    ) WITH_CUDA="${option#*=}" ;;
        --with-cublas=*                  ) WITH_CUBLAS="${option#*=}" ;;
        --with-cuda-arch=*               ) WITH_CUDA_ARCH="${option#*=}";;
        --with-templates-instantiation ) WITH_TEMPLATE_INSTANTIATION="${option#*=}" ;;
        --with-templates-instantiation=* ) WITH_TEMPLATE_INSTANTIATION="${option#*=}" ;;
        --instantiate-long-int=*         ) INSTANTIATE_LONG_INT="${option#*=}" ;;
        --instantiate-int=*              ) INSTANTIATE_INT="${option#*=}" ;;
        --instantiate-long-double=*      ) INSTANTIATE_LONG_DOUBLE="${option#*=}" ;;
        --instantiate-double=*           ) INSTANTIATE_DOUBLE="${option#*=}" ;;
        --instantiate-float=*            ) INSTANTIATE_FLOAT="${option#*=}" ;;
        --full-build                   ) INSTANTIATE_LONG_INT="yes"
        --fast-build                     ) INSTANTIATE_LONG_INT="no"
                                           INSTANTIATE_INT="yes"
                                         INSTANTIATE_LONG_DOUBLE="yes"
                                           INSTANTIATE_LONG_DOUBLE="no"
                                           INSTANTIATE_DOUBLE="yes"
                                         INSTANTIATE_FLOAT="yes";;
                                           INSTANTIATE_FLOAT="no"
                                           WITH_CUDA_ARCH="auto" ;;
        --with-cmake=*                   ) CMAKE="${option#*=}" ;;
        --build-jobs=*                   ) BUILD_JOBS="${option#*=}" ;;
        --cmake-only=*                   ) CMAKE_ONLY="${option#*=}" ;;
+5 −2
Original line number Diff line number Diff line
@@ -256,6 +256,9 @@ bool tnlArrayOperations< tnlHost, tnlCuda >::compareMemory( const Element1* dest
                                                            const Element2* source,
                                                            const Index size )
{
   /***
    * Here, destination is on host and source is on CUDA device.
    */
   tnlAssert( destination, );
   tnlAssert( source, );
   tnlAssert( size >= 0, cerr << "size = " << size );
@@ -280,7 +283,7 @@ bool tnlArrayOperations< tnlHost, tnlCuda >::compareMemory( const Element1* dest
         delete[] host_buffer;
         return false;
      }
      if( ! tnlArrayOperations< tnlHost >::compareMemory( host_buffer, destination, transfer ) )
      if( ! tnlArrayOperations< tnlHost >::compareMemory( &destination[ compared ], host_buffer, transfer ) )
      {
         delete[] host_buffer;
         return false;
+3 −1
Original line number Diff line number Diff line
@@ -20,6 +20,7 @@ IF( BUILD_CUDA )
        ${CURRENT_DIR}/cuda-reduction-abs-max_impl.cu
        ${CURRENT_DIR}/cuda-reduction-and_impl.cu
        ${CURRENT_DIR}/cuda-reduction-or_impl.cu
        ${CURRENT_DIR}/cuda-reduction-l2-norm_impl.cu
        ${CURRENT_DIR}/cuda-reduction-lp-norm_impl.cu
        ${CURRENT_DIR}/cuda-reduction-equalities_impl.cu
        ${CURRENT_DIR}/cuda-reduction-inequalities_impl.cu
@@ -30,6 +31,7 @@ IF( BUILD_CUDA )
        ${CURRENT_DIR}/cuda-reduction-diff-abs-sum_impl.cu
        ${CURRENT_DIR}/cuda-reduction-diff-abs-min_impl.cu
        ${CURRENT_DIR}/cuda-reduction-diff-abs-max_impl.cu
        ${CURRENT_DIR}/cuda-reduction-diff-l2-norm_impl.cu        
        ${CURRENT_DIR}/cuda-reduction-diff-lp-norm_impl.cu        
        ${CURRENT_DIR}/cuda-prefix-sum_impl.cu
        PARENT_SCOPE ) 
+8 −8
Original line number Diff line number Diff line
@@ -23,7 +23,7 @@ template bool cudaPrefixSum( const int size,
                             const int blockSize,
                             const int *deviceInput,
                             int* deviceOutput,
                             const tnlParallelReductionSum< int, int >& operation,
                             tnlParallelReductionSum< int, int >& operation,
                             const enumPrefixSumType prefixSumType );


@@ -32,7 +32,7 @@ template bool cudaPrefixSum( const int size,
                             const int blockSize,
                             const float *deviceInput,
                             float* deviceOutput,
                             const tnlParallelReductionSum< float, int >& operation,
                             tnlParallelReductionSum< float, int >& operation,
                             const enumPrefixSumType prefixSumType );
#endif

@@ -40,7 +40,7 @@ template bool cudaPrefixSum( const int size,
                             const int blockSize,
                             const double *deviceInput,
                             double* deviceOutput,
                             const tnlParallelReductionSum< double, int >& operation,
                             tnlParallelReductionSum< double, int >& operation,
                             const enumPrefixSumType prefixSumType );

#ifdef INSTANTIATE_LONG_DOUBLE
@@ -48,7 +48,7 @@ template bool cudaPrefixSum( const int size,
                             const int blockSize,
                             const long double *deviceInput,
                             long double* deviceOutput,
                             const tnlParallelReductionSum< long double, int >& operation,
                             tnlParallelReductionSum< long double, int >& operation,
                             const enumPrefixSumType prefixSumType );
#endif

@@ -57,7 +57,7 @@ template bool cudaPrefixSum( const long int size,
                             const long int blockSize,
                             const int *deviceInput,
                             int* deviceOutput,
                             const tnlParallelReductionSum< int, long int >& operation,
                             tnlParallelReductionSum< int, long int >& operation,
                             const enumPrefixSumType prefixSumType );


@@ -66,7 +66,7 @@ template bool cudaPrefixSum( const long int size,
                             const long int blockSize,
                             const float *deviceInput,
                             float* deviceOutput,
                             const tnlParallelReductionSum< float, long int >& operation,
                             tnlParallelReductionSum< float, long int >& operation,
                             const enumPrefixSumType prefixSumType );
#endif

@@ -74,7 +74,7 @@ template bool cudaPrefixSum( const long int size,
                             const long int blockSize,
                             const double *deviceInput,
                             double* deviceOutput,
                             const tnlParallelReductionSum< double, long int >& operation,
                             tnlParallelReductionSum< double, long int >& operation,
                             const enumPrefixSumType prefixSumType );

#ifdef INSTANTIATE_LONG_DOUBLE
@@ -82,7 +82,7 @@ template bool cudaPrefixSum( const long int size,
                             const long int blockSize,
                             const long double *deviceInput,
                             long double* deviceOutput,
                             const tnlParallelReductionSum< long double, long int >& operation,
                             tnlParallelReductionSum< long double, long int >& operation,
                             const enumPrefixSumType prefixSumType );
#endif
#endif 
+13 −13
Original line number Diff line number Diff line
@@ -30,7 +30,7 @@ template< typename DataType,
          typename Operation,
          typename Index >
__global__ void cudaFirstPhaseBlockPrefixSum( const enumPrefixSumType prefixSumType,
                                              const Operation operation,
                                              Operation operation,
                                              const Index size,
                                              const Index elementsInBlock,
                                              const DataType* input,
@@ -159,7 +159,7 @@ __global__ void cudaFirstPhaseBlockPrefixSum( const enumPrefixSumType prefixSumT
template< typename DataType,
          typename Operation,
          typename Index >
__global__ void cudaSecondPhaseBlockPrefixSum( const Operation operation,
__global__ void cudaSecondPhaseBlockPrefixSum( Operation operation,
                                               const Index size,
                                               const Index elementsInBlock,
                                               const Index gridShift,
@@ -186,7 +186,7 @@ template< typename DataType,
          typename Operation,
          typename Index >
bool cudaRecursivePrefixSum( const enumPrefixSumType prefixSumType,
                             const Operation& operation,
                             Operation& operation,
                             const Index size,
                             const Index blockSize,
                             const Index elementsInBlock,
@@ -276,7 +276,7 @@ template< typename DataType,
          typename Operation,
          typename Index >
bool cudaGridPrefixSum( enumPrefixSumType prefixSumType,
                        const Operation& operation,
                        Operation& operation,
                        const Index size,
                        const Index blockSize,
                        const Index elementsInBlock,
@@ -313,7 +313,7 @@ bool cudaPrefixSum( const Index size,
                    const Index blockSize,
                    const DataType *deviceInput,
                    DataType* deviceOutput,
                    const Operation& operation,
                    Operation& operation,
                    const enumPrefixSumType prefixSumType )
{
   /****
@@ -361,7 +361,7 @@ extern template bool cudaPrefixSum( const int size,
                                    const int blockSize,
                                    const int *deviceInput,
                                    int* deviceOutput,
                                    const tnlParallelReductionSum< int, int >& operation,
                                    tnlParallelReductionSum< int, int >& operation,
                                    const enumPrefixSumType prefixSumType );


@@ -369,14 +369,14 @@ extern template bool cudaPrefixSum( const int size,
                                    const int blockSize,
                                    const float *deviceInput,
                                    float* deviceOutput,
                                    const tnlParallelReductionSum< float, int >& operation,
                                    tnlParallelReductionSum< float, int >& operation,
                                    const enumPrefixSumType prefixSumType );

extern template bool cudaPrefixSum( const int size,
                                    const int blockSize,
                                    const double *deviceInput,
                                    double* deviceOutput,
                                    const tnlParallelReductionSum< double, int >& operation,
                                    tnlParallelReductionSum< double, int >& operation,
                                    const enumPrefixSumType prefixSumType );

#ifdef INSTANTIATE_LONG_DOUBLE
@@ -384,7 +384,7 @@ extern template bool cudaPrefixSum( const int size,
                                    const int blockSize,
                                    const long double *deviceInput,
                                    long double* deviceOutput,
                                    const tnlParallelReductionSum< long double, int >& operation,
                                    tnlParallelReductionSum< long double, int >& operation,
                                    const enumPrefixSumType prefixSumType );
#endif

@@ -393,7 +393,7 @@ extern template bool cudaPrefixSum( const long int size,
                                    const long int blockSize,
                                    const int *deviceInput,
                                    int* deviceOutput,
                                    const tnlParallelReductionSum< int, long int >& operation,
                                    tnlParallelReductionSum< int, long int >& operation,
                                    const enumPrefixSumType prefixSumType );


@@ -401,14 +401,14 @@ extern template bool cudaPrefixSum( const long int size,
                                    const long int blockSize,
                                    const float *deviceInput,
                                    float* deviceOutput,
                                    const tnlParallelReductionSum< float, long int >& operation,
                                    tnlParallelReductionSum< float, long int >& operation,
                                    const enumPrefixSumType prefixSumType );

extern template bool cudaPrefixSum( const long int size,
                                    const long int blockSize,
                                    const double *deviceInput,
                                    double* deviceOutput,
                                    const tnlParallelReductionSum< double, long int >& operation,
                                    tnlParallelReductionSum< double, long int >& operation,
                                    const enumPrefixSumType prefixSumType );

#ifdef INSTANTIATE_LONG_DOUBLE
@@ -416,7 +416,7 @@ extern template bool cudaPrefixSum( const long int size,
                                    const long int blockSize,
                                    const long double *deviceInput,
                                    long double* deviceOutput,
                                    const tnlParallelReductionSum< long double, long int >& operation,
                                    tnlParallelReductionSum< long double, long int >& operation,
                                    const enumPrefixSumType prefixSumType );
#endif
#endif
Loading