From 99b90ba46b4b69034fe051d37aaf9a14b302391d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com> Date: Wed, 10 Feb 2021 13:27:22 +0100 Subject: [PATCH] Deleting legacy constants in CSRAdaptiveKernel. --- .../Algorithms/Segments/CSRAdaptiveKernel.h | 21 ------------------- .../Algorithms/Segments/CSRAdaptiveKernel.hpp | 2 +- .../details/CSRAdaptiveKernelParameters.h | 2 +- 3 files changed, 2 insertions(+), 23 deletions(-) diff --git a/src/TNL/Algorithms/Segments/CSRAdaptiveKernel.h b/src/TNL/Algorithms/Segments/CSRAdaptiveKernel.h index 5ade54d02e..22cf447ecb 100644 --- a/src/TNL/Algorithms/Segments/CSRAdaptiveKernel.h +++ b/src/TNL/Algorithms/Segments/CSRAdaptiveKernel.h @@ -67,27 +67,6 @@ struct CSRAdaptiveKernel static TNL::String getKernelType(); - - static constexpr Index THREADS_ADAPTIVE = details::CSRAdaptiveKernelParameters< sizeof( Index ) >::CudaBlockSize(); //sizeof(Index) == 8 ? 128 : 256; - - // How many shared memory use per block in CSR Adaptive kernel - static constexpr Index SHARED_PER_BLOCK = details::CSRAdaptiveKernelParameters< sizeof( Index ) >::StreamedSharedMemory(); //20000; //24576; TODO: - - // Number of elements in shared memory - static constexpr Index SHARED = SHARED_PER_BLOCK/sizeof(double); - - // Number of warps in block for CSR Adaptive - static constexpr Index WARPS = THREADS_ADAPTIVE / 32; - - // Number of elements in shared memory per one warp - static constexpr Index SHARED_PER_WARP = SHARED / WARPS; - - // Max length of row to process one warp for CSR Light, MultiVector - static constexpr Index MAX_ELEMENTS_PER_WARP = 384; - - // Max length of row to process one warp for CSR Adaptive - static constexpr Index MAX_ELEMENTS_PER_WARP_ADAPT = details::CSRAdaptiveKernelParameters< sizeof( Index ) >::MaxAdaptiveElementsPerWarp(); - template< typename Offsets > void init( const Offsets& offsets ); diff --git a/src/TNL/Algorithms/Segments/CSRAdaptiveKernel.hpp b/src/TNL/Algorithms/Segments/CSRAdaptiveKernel.hpp index ff2db147be..13c653c6c5 100644 --- a/src/TNL/Algorithms/Segments/CSRAdaptiveKernel.hpp +++ b/src/TNL/Algorithms/Segments/CSRAdaptiveKernel.hpp @@ -165,7 +165,7 @@ initValueSize( const Offsets& offsets ) if( type == details::Type::LONG ) { const Index blocksCount = inBlocks.size(); - const Index warpsPerCudaBlock = THREADS_ADAPTIVE / TNL::Cuda::getWarpSize(); + const Index warpsPerCudaBlock = details::CSRAdaptiveKernelParameters< sizeof( Index ) >::CudaBlockSize() / TNL::Cuda::getWarpSize(); Index warpsLeft = roundUpDivision( blocksCount, warpsPerCudaBlock ) * warpsPerCudaBlock - blocksCount; if( warpsLeft == 0 ) warpsLeft = warpsPerCudaBlock; diff --git a/src/TNL/Algorithms/Segments/details/CSRAdaptiveKernelParameters.h b/src/TNL/Algorithms/Segments/details/CSRAdaptiveKernelParameters.h index 2546580f8f..56f203a74b 100644 --- a/src/TNL/Algorithms/Segments/details/CSRAdaptiveKernelParameters.h +++ b/src/TNL/Algorithms/Segments/details/CSRAdaptiveKernelParameters.h @@ -24,7 +24,7 @@ struct CSRAdaptiveKernelParameters * * \return CUDA block size. */ - static constexpr int CudaBlockSize() { return 256; }; //sizeof( Value ) == 8 ? 128 : 256; }; + static constexpr int CudaBlockSize() { return 128; }; //sizeof( Value ) == 8 ? 128 : 256; }; //std::max( ( int ) ( 1024 / sizeof( Value ) ), ( int ) Cuda::getWarpSize() ); }; /** -- GitLab