From 0ef6688575b2474cda5bec9349810a34390572fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com> Date: Sun, 14 Apr 2019 10:36:43 +0200 Subject: [PATCH] Parallel reduction is working. --- .../Algorithms/ArrayOperationsCuda.hpp | 6 +----- .../Containers/Algorithms/CudaReductionKernel.h | 7 ++----- src/UnitTests/Containers/ArrayOperationsTest.h | 16 ---------------- 3 files changed, 3 insertions(+), 26 deletions(-) diff --git a/src/TNL/Containers/Algorithms/ArrayOperationsCuda.hpp b/src/TNL/Containers/Algorithms/ArrayOperationsCuda.hpp index 472eb414ab..a12b9c67fb 100644 --- a/src/TNL/Containers/Algorithms/ArrayOperationsCuda.hpp +++ b/src/TNL/Containers/Algorithms/ArrayOperationsCuda.hpp @@ -207,11 +207,7 @@ compareMemory( const Element1* destination, auto fetch = [=] __cuda_callable__ ( Index i ) { return ( destination[ i ] == source[ i ] ); }; auto reduction = [=] __cuda_callable__ ( const bool a, const bool b ) { return a && b; }; - return Reduction< Devices::Cuda >::reduce( - size, - reduction, //[=] __cuda_callable__ ( const bool a, const bool b ) { return a && b; }, - fetch, //[=] __cuda_callable__ ( Index i ) { return destination[ i ]; }, - true ); + return Reduction< Devices::Cuda >::reduce( size, reduction, fetch, true ); /*Algorithms::ParallelReductionEqualities< Element1, Element2 > reductionEqualities; return Reduction< Devices::Cuda >::reduce( reductionEqualities, size, destination, source );*/ diff --git a/src/TNL/Containers/Algorithms/CudaReductionKernel.h b/src/TNL/Containers/Algorithms/CudaReductionKernel.h index 21331defe1..8fea90f8f4 100644 --- a/src/TNL/Containers/Algorithms/CudaReductionKernel.h +++ b/src/TNL/Containers/Algorithms/CudaReductionKernel.h @@ -47,8 +47,8 @@ template< int blockSize, __global__ void __launch_bounds__( Reduction_maxThreadsPerBlock, Reduction_minBlocksPerMultiprocessor ) CudaReductionKernel( const Result zero, - const DataFetcher& dataFetcher, - const Reduction& reduction, + const DataFetcher dataFetcher, + const Reduction reduction, const Index size, Result* output ) { @@ -94,8 +94,6 @@ CudaReductionKernel( const Result zero, __syncthreads(); //printf( "1: tid %d data %f \n", tid, sdata[ tid ] ); - - //return; /*** * Perform the parallel reduction. */ @@ -127,7 +125,6 @@ CudaReductionKernel( const Result zero, //printf( "3: tid %d data %f \n", tid, sdata[ tid ] ); } - /*** * This runs in one warp so it is synchronized implicitly. */ diff --git a/src/UnitTests/Containers/ArrayOperationsTest.h b/src/UnitTests/Containers/ArrayOperationsTest.h index 21bfccdea6..23b8fcd4eb 100644 --- a/src/UnitTests/Containers/ArrayOperationsTest.h +++ b/src/UnitTests/Containers/ArrayOperationsTest.h @@ -445,22 +445,6 @@ TYPED_TEST( ArrayOperationsTest, containsOnlyValue_cuda ) #include "../GtestMissingError.h" int main( int argc, char* argv[] ) { - - using ValueType = double; - int size = 1000; - ValueType *hostData, *deviceData, *deviceData2; - ArrayOperations< Devices::Host >::allocateMemory( hostData, size ); - ArrayOperations< Devices::Cuda >::allocateMemory( deviceData, size ); - ArrayOperations< Devices::Cuda >::allocateMemory( deviceData2, size ); - - ArrayOperations< Devices::Host >::setMemory( hostData, (ValueType) 7, size ); - ArrayOperations< Devices::Cuda >::setMemory( deviceData, (ValueType) 8, size ); - ArrayOperations< Devices::Cuda >::setMemory( deviceData2, (ValueType) 9, size ); - EXPECT_FALSE(( ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< ValueType, ValueType >( hostData, deviceData, size ) )); - EXPECT_FALSE(( ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< ValueType, ValueType >( deviceData, hostData, size ) )); - EXPECT_FALSE(( ArrayOperations< Devices::Cuda >::compareMemory< ValueType, ValueType >( deviceData, deviceData2, size ) )); - - return 0; #ifdef HAVE_GTEST ::testing::InitGoogleTest( &argc, argv ); return RUN_ALL_TESTS(); -- GitLab