diff --git a/src/TNL/Containers/Algorithms/ArrayOperationsCuda.hpp b/src/TNL/Containers/Algorithms/ArrayOperationsCuda.hpp index 0570f10f459a2a2250132197251f6b96d9697532..472eb414ab61be1b08d23bfa6bab0f946cfc8fe9 100644 --- a/src/TNL/Containers/Algorithms/ArrayOperationsCuda.hpp +++ b/src/TNL/Containers/Algorithms/ArrayOperationsCuda.hpp @@ -193,7 +193,6 @@ copySTLList( DestinationElement* destination, copiedElements += copySize; } } - template< typename Element1, typename Element2, typename Index > @@ -206,9 +205,7 @@ compareMemory( const Element1* destination, TNL_ASSERT_TRUE( destination, "Attempted to compare data through a nullptr." ); TNL_ASSERT_TRUE( source, "Attempted to compare data through a nullptr." ); - Element1* d; - cudaMalloc( ( void** ) &d, size * sizeof( Element1 ) ); - auto fetch = [=] __cuda_callable__ ( Index i ) { return d[ 0 ]; }; //( destination[ i ] == source[ i ] ); }; + auto fetch = [=] __cuda_callable__ ( Index i ) { return ( destination[ i ] == source[ i ] ); }; auto reduction = [=] __cuda_callable__ ( const bool a, const bool b ) { return a && b; }; return Reduction< Devices::Cuda >::reduce( size, diff --git a/src/TNL/Containers/Algorithms/CudaReductionKernel.h b/src/TNL/Containers/Algorithms/CudaReductionKernel.h index 685a3a33216b6eb5d5d88f68c55830e129b88d22..21331defe1b5758f73dd9aa05bb47aadd2198599 100644 --- a/src/TNL/Containers/Algorithms/CudaReductionKernel.h +++ b/src/TNL/Containers/Algorithms/CudaReductionKernel.h @@ -72,7 +72,7 @@ CudaReductionKernel( const Result zero, * Read data into the shared memory. We start with the * sequential reduction. */ - /*while( gid + 4 * gridSize < size ) + while( gid + 4 * gridSize < size ) { sdata[ tid ] = reduction( sdata[ tid ], dataFetcher( gid ) ); sdata[ tid ] = reduction( sdata[ tid ], dataFetcher( gid + gridSize ) ); @@ -85,14 +85,14 @@ CudaReductionKernel( const Result zero, sdata[ tid ] = reduction( sdata[ tid ], dataFetcher( gid ) ); sdata[ tid ] = reduction( sdata[ tid ], dataFetcher( gid + gridSize ) ); gid += 2 * gridSize; - }*/ + } while( gid < size ) { - sdata[ tid ] = dataFetcher( gid ); //reduction( sdata[ tid ], dataFetcher( gid ) ); + sdata[ tid ] = reduction( sdata[ tid ], dataFetcher( gid ) ); gid += gridSize; } __syncthreads(); - return; + //printf( "1: tid %d data %f \n", tid, sdata[ tid ] ); //return; diff --git a/src/TNL/Containers/Algorithms/Reduction_impl.h b/src/TNL/Containers/Algorithms/Reduction_impl.h index b5fcabdc774b71c48855d2b881fe93db17495154..0fee746cf092cb7b326c5aff4e77024f4870a53c 100644 --- a/src/TNL/Containers/Algorithms/Reduction_impl.h +++ b/src/TNL/Containers/Algorithms/Reduction_impl.h @@ -110,9 +110,9 @@ Reduction< Devices::Cuda >:: * Transfer the reduced data from device to host. */ //ResultType* resultArray[ reducedSize ]; - //std::unique_ptr< ResultType[] > resultArray{ new ResultType[ reducedSize ] }; - ResultType* resultArray = new ResultType[ reducedSize ]; - ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory( resultArray, deviceAux1, reducedSize ); + std::unique_ptr< ResultType[] > resultArray{ new ResultType[ reducedSize ] }; + //ResultType* resultArray = new ResultType[ reducedSize ]; + ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory( resultArray.get(), deviceAux1, reducedSize ); #ifdef CUDA_REDUCTION_PROFILING timer.stop(); @@ -132,7 +132,7 @@ Reduction< Devices::Cuda >:: std::cout << " Reduction of small data set on CPU took " << timer.getRealTime() << " sec. " << std::endl; #endif - delete[] resultArray; + //delete[] resultArray; return result; } else { diff --git a/src/UnitTests/Containers/ArrayOperationsTest.h b/src/UnitTests/Containers/ArrayOperationsTest.h index 9a0d24039130a1750bc783a4f84ab958698f9f45..21bfccdea6536c6c4d7292096a7f7dbca60fc3b8 100644 --- a/src/UnitTests/Containers/ArrayOperationsTest.h +++ b/src/UnitTests/Containers/ArrayOperationsTest.h @@ -303,10 +303,9 @@ TYPED_TEST( ArrayOperationsTest, copyMemoryWithConversions_cuda ) ArrayOperations< Devices::Cuda >::freeMemory( deviceData2 ); } -//TYPED_TEST( ArrayOperationsTest, compareMemory_cuda ) -void Test() +TYPED_TEST( ArrayOperationsTest, compareMemory_cuda ) { - using ValueType = double;//typename TestFixture::ValueType; + using ValueType = typename TestFixture::ValueType; const int size = ARRAY_TEST_SIZE; ValueType *hostData, *deviceData, *deviceData2; @@ -446,7 +445,22 @@ TYPED_TEST( ArrayOperationsTest, containsOnlyValue_cuda ) #include "../GtestMissingError.h" int main( int argc, char* argv[] ) { - Test(); + + using ValueType = double; + int size = 1000; + ValueType *hostData, *deviceData, *deviceData2; + ArrayOperations< Devices::Host >::allocateMemory( hostData, size ); + ArrayOperations< Devices::Cuda >::allocateMemory( deviceData, size ); + ArrayOperations< Devices::Cuda >::allocateMemory( deviceData2, size ); + + ArrayOperations< Devices::Host >::setMemory( hostData, (ValueType) 7, size ); + ArrayOperations< Devices::Cuda >::setMemory( deviceData, (ValueType) 8, size ); + ArrayOperations< Devices::Cuda >::setMemory( deviceData2, (ValueType) 9, size ); + EXPECT_FALSE(( ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< ValueType, ValueType >( hostData, deviceData, size ) )); + EXPECT_FALSE(( ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< ValueType, ValueType >( deviceData, hostData, size ) )); + EXPECT_FALSE(( ArrayOperations< Devices::Cuda >::compareMemory< ValueType, ValueType >( deviceData, deviceData2, size ) )); + + return 0; #ifdef HAVE_GTEST ::testing::InitGoogleTest( &argc, argv ); return RUN_ALL_TESTS();