Loading src/TNL/Containers/Algorithms/CudaReductionKernel.h +7 −7 Original line number Diff line number Diff line Loading @@ -57,9 +57,9 @@ CudaReductionKernel( Operation operation, * gridSize is the number of element processed by all blocks at the * same time. */ IndexType tid = threadIdx. x; const IndexType tid = threadIdx.x; IndexType gid = blockIdx.x * blockDim. x + threadIdx.x; IndexType gridSize = blockDim. x * gridDim.x; const IndexType gridSize = blockDim.x * gridDim.x; sdata[ tid ] = operation.initialValue(); /*** Loading src/TNL/Containers/Algorithms/Multireduction_impl.h +7 −7 Original line number Diff line number Diff line Loading @@ -103,7 +103,7 @@ reduce( Operation& operation, deviceAux1 ); #ifdef CUDA_REDUCTION_PROFILING timer.stop(); cout << " Multireduction of " << n << " datasets on GPU to size " << reducedSize << " took " << timer.getRealTime() << " sec. " << endl; std::cout << " Multireduction of " << n << " datasets on GPU to size " << reducedSize << " took " << timer.getRealTime() << " sec. " << std::endl; timer.reset(); timer.start(); #endif Loading @@ -117,18 +117,18 @@ reduce( Operation& operation, #ifdef CUDA_REDUCTION_PROFILING timer.stop(); cout << " Transferring data to CPU took " << timer.getRealTime() << " sec. " << endl; std::cout << " Transferring data to CPU took " << timer.getRealTime() << " sec. " << std::endl; timer.reset(); timer.start(); #endif // cout << "resultArray = ["; // std::cout << "resultArray = ["; // for( int i = 0; i < n * reducedSize; i++ ) { // cout << resultArray[ i ]; // std::cout << resultArray[ i ]; // if( i < n * reducedSize - 1 ) // cout << ", "; // std::cout << ", "; // } // cout << "]" << endl; // std::cout << "]" << std::endl; /*** * Reduce the data on the host system. Loading @@ -138,7 +138,7 @@ reduce( Operation& operation, #ifdef CUDA_REDUCTION_PROFILING timer.stop(); cout << " Multireduction of small data set on CPU took " << timer.getRealTime() << " sec. " << endl; std::cout << " Multireduction of small data set on CPU took " << timer.getRealTime() << " sec. " << std::endl; #endif return checkCudaDevice; Loading src/TNL/Containers/Algorithms/Reduction_impl.h +0 −3 Original line number Diff line number Diff line Loading @@ -36,9 +36,6 @@ namespace Algorithms { */ const int minGPUReductionDataSize = 256;//65536; //16384;//1024;//256; #ifdef HAVE_CUDA #endif template< typename Operation > bool reductionOnCudaDevice( Operation& operation, Loading Loading
src/TNL/Containers/Algorithms/CudaReductionKernel.h +7 −7 Original line number Diff line number Diff line Loading @@ -57,9 +57,9 @@ CudaReductionKernel( Operation operation, * gridSize is the number of element processed by all blocks at the * same time. */ IndexType tid = threadIdx. x; const IndexType tid = threadIdx.x; IndexType gid = blockIdx.x * blockDim. x + threadIdx.x; IndexType gridSize = blockDim. x * gridDim.x; const IndexType gridSize = blockDim.x * gridDim.x; sdata[ tid ] = operation.initialValue(); /*** Loading
src/TNL/Containers/Algorithms/Multireduction_impl.h +7 −7 Original line number Diff line number Diff line Loading @@ -103,7 +103,7 @@ reduce( Operation& operation, deviceAux1 ); #ifdef CUDA_REDUCTION_PROFILING timer.stop(); cout << " Multireduction of " << n << " datasets on GPU to size " << reducedSize << " took " << timer.getRealTime() << " sec. " << endl; std::cout << " Multireduction of " << n << " datasets on GPU to size " << reducedSize << " took " << timer.getRealTime() << " sec. " << std::endl; timer.reset(); timer.start(); #endif Loading @@ -117,18 +117,18 @@ reduce( Operation& operation, #ifdef CUDA_REDUCTION_PROFILING timer.stop(); cout << " Transferring data to CPU took " << timer.getRealTime() << " sec. " << endl; std::cout << " Transferring data to CPU took " << timer.getRealTime() << " sec. " << std::endl; timer.reset(); timer.start(); #endif // cout << "resultArray = ["; // std::cout << "resultArray = ["; // for( int i = 0; i < n * reducedSize; i++ ) { // cout << resultArray[ i ]; // std::cout << resultArray[ i ]; // if( i < n * reducedSize - 1 ) // cout << ", "; // std::cout << ", "; // } // cout << "]" << endl; // std::cout << "]" << std::endl; /*** * Reduce the data on the host system. Loading @@ -138,7 +138,7 @@ reduce( Operation& operation, #ifdef CUDA_REDUCTION_PROFILING timer.stop(); cout << " Multireduction of small data set on CPU took " << timer.getRealTime() << " sec. " << endl; std::cout << " Multireduction of small data set on CPU took " << timer.getRealTime() << " sec. " << std::endl; #endif return checkCudaDevice; Loading
src/TNL/Containers/Algorithms/Reduction_impl.h +0 −3 Original line number Diff line number Diff line Loading @@ -36,9 +36,6 @@ namespace Algorithms { */ const int minGPUReductionDataSize = 256;//65536; //16384;//1024;//256; #ifdef HAVE_CUDA #endif template< typename Operation > bool reductionOnCudaDevice( Operation& operation, Loading