Loading src/TNL/Algorithms/Segments/BiEllpackView.hpp +8 −7 Original line number Diff line number Diff line Loading @@ -15,6 +15,7 @@ #include <TNL/Algorithms/Segments/BiEllpackView.h> #include <TNL/Algorithms/Segments/details/LambdaAdapter.h> //#include <TNL/Algorithms/Segments/details/BiEllpack.h> #include <TNL/Cuda/SharedMemory.h> namespace TNL { namespace Algorithms { Loading src/TNL/Algorithms/Segments/ChunkedEllpackView.hpp +1 −0 Original line number Diff line number Diff line Loading @@ -15,6 +15,7 @@ #include <TNL/Algorithms/Segments/ChunkedEllpackView.h> #include <TNL/Algorithms/Segments/details/LambdaAdapter.h> //#include <TNL/Algorithms/Segments/details/ChunkedEllpack.h> #include <TNL/Cuda/SharedMemory.h> namespace TNL { namespace Algorithms { Loading src/TNL/Algorithms/detail/CudaScanKernel.h +0 −5 Original line number Diff line number Diff line Loading @@ -10,8 +10,6 @@ #pragma once #include <iostream> #include <TNL/Math.h> #include <TNL/Cuda/SharedMemory.h> #include <TNL/Exceptions/CudaBadAlloc.h> Loading Loading @@ -246,7 +244,6 @@ struct CudaScanKernelLauncher const int elementsInBlock = 8 * blockSize; const Index numberOfBlocks = roundUpDivision( size, elementsInBlock ); const Index numberOfGrids = Cuda::getNumberOfGrids( numberOfBlocks, maxGridSize() ); //std::cerr << "numberOfgrids = " << numberOfGrids << std::endl; // allocate array for the block results Containers::Array< Real, Devices::Cuda > blockResults; Loading @@ -260,7 +257,6 @@ struct CudaScanKernelLauncher Index currentSize = size - gridOffset; if( currentSize / elementsInBlock > maxGridSize() ) currentSize = maxGridSize() * elementsInBlock; //std::cerr << "GridIdx = " << gridIdx << " grid size = " << currentSize << std::endl; // setup block and grid size dim3 cudaBlockSize, cudaGridSize; Loading Loading @@ -343,7 +339,6 @@ struct CudaScanKernelLauncher Index currentSize = size - gridOffset; if( currentSize / elementsInBlock > maxGridSize() ) currentSize = maxGridSize() * elementsInBlock; //std::cerr << "GridIdx = " << gridIdx << " grid size = " << currentSize << std::endl; // setup block and grid size dim3 cudaBlockSize, cudaGridSize; Loading Loading
src/TNL/Algorithms/Segments/BiEllpackView.hpp +8 −7 Original line number Diff line number Diff line Loading @@ -15,6 +15,7 @@ #include <TNL/Algorithms/Segments/BiEllpackView.h> #include <TNL/Algorithms/Segments/details/LambdaAdapter.h> //#include <TNL/Algorithms/Segments/details/BiEllpack.h> #include <TNL/Cuda/SharedMemory.h> namespace TNL { namespace Algorithms { Loading
src/TNL/Algorithms/Segments/ChunkedEllpackView.hpp +1 −0 Original line number Diff line number Diff line Loading @@ -15,6 +15,7 @@ #include <TNL/Algorithms/Segments/ChunkedEllpackView.h> #include <TNL/Algorithms/Segments/details/LambdaAdapter.h> //#include <TNL/Algorithms/Segments/details/ChunkedEllpack.h> #include <TNL/Cuda/SharedMemory.h> namespace TNL { namespace Algorithms { Loading
src/TNL/Algorithms/detail/CudaScanKernel.h +0 −5 Original line number Diff line number Diff line Loading @@ -10,8 +10,6 @@ #pragma once #include <iostream> #include <TNL/Math.h> #include <TNL/Cuda/SharedMemory.h> #include <TNL/Exceptions/CudaBadAlloc.h> Loading Loading @@ -246,7 +244,6 @@ struct CudaScanKernelLauncher const int elementsInBlock = 8 * blockSize; const Index numberOfBlocks = roundUpDivision( size, elementsInBlock ); const Index numberOfGrids = Cuda::getNumberOfGrids( numberOfBlocks, maxGridSize() ); //std::cerr << "numberOfgrids = " << numberOfGrids << std::endl; // allocate array for the block results Containers::Array< Real, Devices::Cuda > blockResults; Loading @@ -260,7 +257,6 @@ struct CudaScanKernelLauncher Index currentSize = size - gridOffset; if( currentSize / elementsInBlock > maxGridSize() ) currentSize = maxGridSize() * elementsInBlock; //std::cerr << "GridIdx = " << gridIdx << " grid size = " << currentSize << std::endl; // setup block and grid size dim3 cudaBlockSize, cudaGridSize; Loading Loading @@ -343,7 +339,6 @@ struct CudaScanKernelLauncher Index currentSize = size - gridOffset; if( currentSize / elementsInBlock > maxGridSize() ) currentSize = maxGridSize() * elementsInBlock; //std::cerr << "GridIdx = " << gridIdx << " grid size = " << currentSize << std::endl; // setup block and grid size dim3 cudaBlockSize, cudaGridSize; Loading