Commit 185599c9 authored by Tomáš Oberhuber's avatar Tomáš Oberhuber
Browse files

Fixing CUDA max grid size.

parent 364fb421
Loading
Loading
Loading
Loading
+3 −3
Original line number Diff line number Diff line
@@ -56,7 +56,7 @@ segmentsReductionCSRAdaptiveKernel( BlocksView blocks,
   __shared__ Real multivectorShared[ CudaBlockSize / WarpSize ];
   //__shared__ BlockType sharedBlocks[ WarpsCount ];

   const Index index = ( ( gridIdx * TNL::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x ) + threadIdx.x;
   const Index index = ( ( gridIdx * TNL::Cuda::getMaxGridXSize() + blockIdx.x ) * blockDim.x ) + threadIdx.x;
   const Index blockIdx = index / WarpSize;
   if( blockIdx >= blocks.getSize() - 1 )
      return;
@@ -237,8 +237,8 @@ struct CSRAdaptiveKernelSegmentsReductionDispatcher< Index, Device, Fetch, Reduc

      Index blocksCount;

      const Index threads = detail::CSRAdaptiveKernelParameters< sizeof( Real ) >::CudaBlockSize();
      constexpr size_t maxGridSize = TNL::Cuda::getMaxGridSize();
      const Index threads = details::CSRAdaptiveKernelParameters< sizeof( Real ) >::CudaBlockSize();
      constexpr size_t maxGridSize = TNL::Cuda::getMaxGridXSize();

      // Fill blocks
      size_t neededThreads = blocks.getSize() * TNL::Cuda::getWarpSize(); // one warp per block
+15 −0
Original line number Diff line number Diff line
@@ -22,6 +22,21 @@ inline constexpr std::size_t getMaxGridSize()
   return 65535;
}

inline constexpr size_t getMaxGridXSize()
{
   return 2147483647;//65535;
}

inline constexpr size_t getMaxGridYSize()
{
   return 65535;
}

inline constexpr size_t getMaxGridZSize()
{
   return 65535;
}

inline constexpr int getMaxBlockSize()
{
   return 1024;