Commit 1fe62640 authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

Replaced static member variables in CudaPrefixSumKernelLauncher with static getters

parent af6d1d6b
Loading
Loading
Loading
Loading
+17 −24
Original line number Diff line number Diff line
@@ -297,8 +297,7 @@ struct CudaPrefixSumKernelLauncher
       */
      const Index elementsInBlock = 8 * blockSize;
      const Index numberOfBlocks = roundUpDivision( size, elementsInBlock );
      //const auto maxGridSize = 3; //Devices::Cuda::getMaxGridSize();
      const Index numberOfGrids = Devices::Cuda::getNumberOfGrids( numberOfBlocks, maxGridSize );
      const Index numberOfGrids = Devices::Cuda::getNumberOfGrids( numberOfBlocks, maxGridSize() );
      Real gridShift = zero;
      //std::cerr << "numberOfgrids =  " << numberOfGrids << std::endl;

@@ -310,10 +309,10 @@ struct CudaPrefixSumKernelLauncher
         /****
          * Compute current grid size and size of data to be scanned
          */
         const Index gridOffset = gridIdx * maxGridSize * elementsInBlock;
         const Index gridOffset = gridIdx * maxGridSize() * elementsInBlock;
         Index currentSize = size - gridOffset;
         if( currentSize / elementsInBlock > maxGridSize )
            currentSize = maxGridSize * elementsInBlock;
         if( currentSize / elementsInBlock > maxGridSize() )
            currentSize = maxGridSize() * elementsInBlock;

         //std::cerr << "GridIdx = " << gridIdx << " grid size = " << currentSize << std::endl;
         cudaRecursivePrefixSum( prefixSumType,
@@ -331,37 +330,31 @@ struct CudaPrefixSumKernelLauncher
       * Store the number of CUDA grids for the purpose of unit testing, i.e.
       * to check if we test the algorithm with more than one CUDA grid.
       */
      gridsCount = numberOfGrids;
      gridsCount() = numberOfGrids;
   }

   /****
    * The following serves for setting smaller maxGridSize so that we can force
    * the prefix sum in CUDA to run with more the one grids in unit tests.
    */
   static void setMaxGridSize( int newMaxGridSize ) {
      maxGridSize = newMaxGridSize;
   static int& maxGridSize()
   {
      static int maxGridSize = Devices::Cuda::getMaxGridSize();
      return maxGridSize;
   }

   static void resetMaxGridSize() {
      maxGridSize = Devices::Cuda::getMaxGridSize();
   static void resetMaxGridSize()
   {
      maxGridSize() = Devices::Cuda::getMaxGridSize();
   }

   static int maxGridSize;

   static int gridsCount;
   static int& gridsCount()
   {
      static int gridsCount = -1;
      return gridsCount;
   }
};

template< PrefixSumType prefixSumType,
          typename Real,
          typename Index >
int CudaPrefixSumKernelLauncher< prefixSumType, Real, Index >::maxGridSize = Devices::Cuda::getMaxGridSize();

template< PrefixSumType prefixSumType,
          typename Real,
          typename Index >
int CudaPrefixSumKernelLauncher< prefixSumType, Real, Index >::gridsCount = -1;


#endif

} // namespace Algorithms
+7 −5
Original line number Diff line number Diff line
@@ -113,17 +113,19 @@ public:
   }

   protected:
      static bool& ompEnabled() {
      static bool& ompEnabled()
      {
#ifdef HAVE_OPENMP
         static bool ompEnabled( true );
         static bool ompEnabled = true;
#else
         static bool ompEnabled( false );
         static bool ompEnabled = false;
#endif
         return ompEnabled;
      }

      static int& maxThreadsCount() {
         static int maxThreadsCount( -1 );
      static int& maxThreadsCount()
      {
         static int maxThreadsCount = -1;
         return maxThreadsCount;
      }
};
+13 −13
Original line number Diff line number Diff line
@@ -74,11 +74,11 @@ TYPED_TEST( VectorTest, prefixSum )
   if( std::is_same< DeviceType, Devices::Cuda >::value )
   {
#ifdef HAVE_CUDA
      Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Inclusive, RealType, IndexType >::setMaxGridSize( 3 );
      Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Inclusive, RealType, IndexType >::maxGridSize() = 3;
      v = 0;
      v_host = -1;
      v.prefixSum();
      EXPECT_GT( ( Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Inclusive, RealType, IndexType >::gridsCount ), 1  );
      EXPECT_GT( ( Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Inclusive, RealType, IndexType >::gridsCount() ), 1  );
      v_host = v;
      for( int i = 0; i < size; i++ )
         EXPECT_EQ( v_host[ i ], 0 );
@@ -86,7 +86,7 @@ TYPED_TEST( VectorTest, prefixSum )
      setLinearSequence( v );
      v_host = -1;
      v.prefixSum();
      EXPECT_GT( ( Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Inclusive, RealType, IndexType >::gridsCount ), 1  );
      EXPECT_GT( ( Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Inclusive, RealType, IndexType >::gridsCount() ), 1  );
      v_host = v;
      for( int i = 1; i < size; i++ )
         EXPECT_EQ( v_host[ i ] - v_host[ i - 1 ], i );
@@ -94,7 +94,7 @@ TYPED_TEST( VectorTest, prefixSum )
      setConstantSequence( v, 1 );
      v_host = -1;
      v_view.prefixSum();
      EXPECT_GT( ( Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Inclusive, RealType, IndexType >::gridsCount ), 1  );
      EXPECT_GT( ( Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Inclusive, RealType, IndexType >::gridsCount() ), 1  );
      v_host = v_view;
      for( int i = 0; i < size; i++ )
         EXPECT_EQ( v_host[ i ], i + 1 );
@@ -102,7 +102,7 @@ TYPED_TEST( VectorTest, prefixSum )
      v = 0;
      v_host = -1;
      v_view.prefixSum();
      EXPECT_GT( ( Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Inclusive, RealType, IndexType >::gridsCount ), 1  );
      EXPECT_GT( ( Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Inclusive, RealType, IndexType >::gridsCount() ), 1  );
      v_host = v_view;
      for( int i = 0; i < size; i++ )
         EXPECT_EQ( v_host[ i ], 0 );
@@ -110,7 +110,7 @@ TYPED_TEST( VectorTest, prefixSum )
      setLinearSequence( v );
      v_host = -1;
      v_view.prefixSum();
      EXPECT_GT( ( Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Inclusive, RealType, IndexType >::gridsCount ), 1  );
      EXPECT_GT( ( Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Inclusive, RealType, IndexType >::gridsCount() ), 1  );
      v_host = v_view;
      for( int i = 1; i < size; i++ )
         EXPECT_EQ( v_host[ i ] - v_host[ i - 1 ], i );
@@ -184,12 +184,12 @@ TYPED_TEST( VectorTest, exclusivePrefixSum )
   if( std::is_same< DeviceType, Devices::Cuda >::value )
   {
#ifdef HAVE_CUDA
      Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Exclusive, RealType, IndexType >::setMaxGridSize( 3 );
      Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Exclusive, RealType, IndexType >::maxGridSize() = 3;

      setConstantSequence( v, 1 );
      v_host = -1;
      v.template prefixSum< Algorithms::PrefixSumType::Exclusive >();
      EXPECT_GT( ( Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Exclusive, RealType, IndexType >::gridsCount ), 1 );
      EXPECT_GT( ( Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Exclusive, RealType, IndexType >::gridsCount() ), 1 );
      v_host = v;
      for( int i = 0; i < size; i++ )
         EXPECT_EQ( v_host[ i ], i );
@@ -197,7 +197,7 @@ TYPED_TEST( VectorTest, exclusivePrefixSum )
      v.setValue( 0 );
      v_host = -1;
      v.template prefixSum< Algorithms::PrefixSumType::Exclusive >();
      EXPECT_GT( ( Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Exclusive, RealType, IndexType >::gridsCount ), 1 );
      EXPECT_GT( ( Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Exclusive, RealType, IndexType >::gridsCount() ), 1 );
      v_host = v;
      for( int i = 0; i < size; i++ )
         EXPECT_EQ( v_host[ i ], 0 );
@@ -205,7 +205,7 @@ TYPED_TEST( VectorTest, exclusivePrefixSum )
      setLinearSequence( v );
      v_host = -1;
      v.template prefixSum< Algorithms::PrefixSumType::Exclusive >();
      EXPECT_GT( ( Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Exclusive, RealType, IndexType >::gridsCount ), 1 );
      EXPECT_GT( ( Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Exclusive, RealType, IndexType >::gridsCount() ), 1 );
      v_host = v;
      for( int i = 1; i < size; i++ )
         EXPECT_EQ( v_host[ i ] - v_host[ i - 1 ], i - 1 );
@@ -213,7 +213,7 @@ TYPED_TEST( VectorTest, exclusivePrefixSum )
      setConstantSequence( v, 1 );
      v_host = -1;
      v_view.template prefixSum< Algorithms::PrefixSumType::Exclusive >();
      EXPECT_GT( ( Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Exclusive, RealType, IndexType >::gridsCount ), 1 );
      EXPECT_GT( ( Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Exclusive, RealType, IndexType >::gridsCount() ), 1 );
      v_host = v_view;
      for( int i = 0; i < size; i++ )
         EXPECT_EQ( v_host[ i ], i );
@@ -221,7 +221,7 @@ TYPED_TEST( VectorTest, exclusivePrefixSum )
      v.setValue( 0 );
      v_host = -1;
      v_view.template prefixSum< Algorithms::PrefixSumType::Exclusive >();
      EXPECT_GT( ( Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Exclusive, RealType, IndexType >::gridsCount ), 1 );
      EXPECT_GT( ( Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Exclusive, RealType, IndexType >::gridsCount() ), 1 );
      v_host = v_view;
      for( int i = 0; i < size; i++ )
         EXPECT_EQ( v_host[ i ], 0 );
@@ -229,7 +229,7 @@ TYPED_TEST( VectorTest, exclusivePrefixSum )
      setLinearSequence( v );
      v_host = -1;
      v_view.template prefixSum< Algorithms::PrefixSumType::Exclusive >();
      EXPECT_GT( ( Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Exclusive, RealType, IndexType >::gridsCount ), 1 );
      EXPECT_GT( ( Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Exclusive, RealType, IndexType >::gridsCount() ), 1 );
      v_host = v_view;
      for( int i = 1; i < size; i++ )
         EXPECT_EQ( v_host[ i ] - v_host[ i - 1 ], i - 1 );