Loading src/TNL/Containers/Algorithms/ArrayOperationsCuda.hpp +1 −4 Original line number Diff line number Diff line Loading @@ -193,7 +193,6 @@ copySTLList( DestinationElement* destination, copiedElements += copySize; } } template< typename Element1, typename Element2, typename Index > Loading @@ -206,9 +205,7 @@ compareMemory( const Element1* destination, TNL_ASSERT_TRUE( destination, "Attempted to compare data through a nullptr." ); TNL_ASSERT_TRUE( source, "Attempted to compare data through a nullptr." ); Element1* d; cudaMalloc( ( void** ) &d, size * sizeof( Element1 ) ); auto fetch = [=] __cuda_callable__ ( Index i ) { return d[ 0 ]; }; //( destination[ i ] == source[ i ] ); }; auto fetch = [=] __cuda_callable__ ( Index i ) { return ( destination[ i ] == source[ i ] ); }; auto reduction = [=] __cuda_callable__ ( const bool a, const bool b ) { return a && b; }; return Reduction< Devices::Cuda >::reduce( size, Loading src/TNL/Containers/Algorithms/CudaReductionKernel.h +4 −4 Original line number Diff line number Diff line Loading @@ -72,7 +72,7 @@ CudaReductionKernel( const Result zero, * Read data into the shared memory. We start with the * sequential reduction. */ /*while( gid + 4 * gridSize < size ) while( gid + 4 * gridSize < size ) { sdata[ tid ] = reduction( sdata[ tid ], dataFetcher( gid ) ); sdata[ tid ] = reduction( sdata[ tid ], dataFetcher( gid + gridSize ) ); Loading @@ -85,14 +85,14 @@ CudaReductionKernel( const Result zero, sdata[ tid ] = reduction( sdata[ tid ], dataFetcher( gid ) ); sdata[ tid ] = reduction( sdata[ tid ], dataFetcher( gid + gridSize ) ); gid += 2 * gridSize; }*/ } while( gid < size ) { sdata[ tid ] = dataFetcher( gid ); //reduction( sdata[ tid ], dataFetcher( gid ) ); sdata[ tid ] = reduction( sdata[ tid ], dataFetcher( gid ) ); gid += gridSize; } __syncthreads(); return; //printf( "1: tid %d data %f \n", tid, sdata[ tid ] ); //return; Loading src/TNL/Containers/Algorithms/Reduction_impl.h +4 −4 Original line number Diff line number Diff line Loading @@ -110,9 +110,9 @@ Reduction< Devices::Cuda >:: * Transfer the reduced data from device to host. */ //ResultType* resultArray[ reducedSize ]; //std::unique_ptr< ResultType[] > resultArray{ new ResultType[ reducedSize ] }; ResultType* resultArray = new ResultType[ reducedSize ]; ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory( resultArray, deviceAux1, reducedSize ); std::unique_ptr< ResultType[] > resultArray{ new ResultType[ reducedSize ] }; //ResultType* resultArray = new ResultType[ reducedSize ]; ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory( resultArray.get(), deviceAux1, reducedSize ); #ifdef CUDA_REDUCTION_PROFILING timer.stop(); Loading @@ -132,7 +132,7 @@ Reduction< Devices::Cuda >:: std::cout << " Reduction of small data set on CPU took " << timer.getRealTime() << " sec. " << std::endl; #endif delete[] resultArray; //delete[] resultArray; return result; } else { Loading src/UnitTests/Containers/ArrayOperationsTest.h +18 −4 Original line number Diff line number Diff line Loading @@ -303,10 +303,9 @@ TYPED_TEST( ArrayOperationsTest, copyMemoryWithConversions_cuda ) ArrayOperations< Devices::Cuda >::freeMemory( deviceData2 ); } //TYPED_TEST( ArrayOperationsTest, compareMemory_cuda ) void Test() TYPED_TEST( ArrayOperationsTest, compareMemory_cuda ) { using ValueType = double;//typename TestFixture::ValueType; using ValueType = typename TestFixture::ValueType; const int size = ARRAY_TEST_SIZE; ValueType *hostData, *deviceData, *deviceData2; Loading Loading @@ -446,7 +445,22 @@ TYPED_TEST( ArrayOperationsTest, containsOnlyValue_cuda ) #include "../GtestMissingError.h" int main( int argc, char* argv[] ) { Test(); using ValueType = double; int size = 1000; ValueType *hostData, *deviceData, *deviceData2; ArrayOperations< Devices::Host >::allocateMemory( hostData, size ); ArrayOperations< Devices::Cuda >::allocateMemory( deviceData, size ); ArrayOperations< Devices::Cuda >::allocateMemory( deviceData2, size ); ArrayOperations< Devices::Host >::setMemory( hostData, (ValueType) 7, size ); ArrayOperations< Devices::Cuda >::setMemory( deviceData, (ValueType) 8, size ); ArrayOperations< Devices::Cuda >::setMemory( deviceData2, (ValueType) 9, size ); EXPECT_FALSE(( ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< ValueType, ValueType >( hostData, deviceData, size ) )); EXPECT_FALSE(( ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< ValueType, ValueType >( deviceData, hostData, size ) )); EXPECT_FALSE(( ArrayOperations< Devices::Cuda >::compareMemory< ValueType, ValueType >( deviceData, deviceData2, size ) )); return 0; #ifdef HAVE_GTEST ::testing::InitGoogleTest( &argc, argv ); return RUN_ALL_TESTS(); Loading Loading
src/TNL/Containers/Algorithms/ArrayOperationsCuda.hpp +1 −4 Original line number Diff line number Diff line Loading @@ -193,7 +193,6 @@ copySTLList( DestinationElement* destination, copiedElements += copySize; } } template< typename Element1, typename Element2, typename Index > Loading @@ -206,9 +205,7 @@ compareMemory( const Element1* destination, TNL_ASSERT_TRUE( destination, "Attempted to compare data through a nullptr." ); TNL_ASSERT_TRUE( source, "Attempted to compare data through a nullptr." ); Element1* d; cudaMalloc( ( void** ) &d, size * sizeof( Element1 ) ); auto fetch = [=] __cuda_callable__ ( Index i ) { return d[ 0 ]; }; //( destination[ i ] == source[ i ] ); }; auto fetch = [=] __cuda_callable__ ( Index i ) { return ( destination[ i ] == source[ i ] ); }; auto reduction = [=] __cuda_callable__ ( const bool a, const bool b ) { return a && b; }; return Reduction< Devices::Cuda >::reduce( size, Loading
src/TNL/Containers/Algorithms/CudaReductionKernel.h +4 −4 Original line number Diff line number Diff line Loading @@ -72,7 +72,7 @@ CudaReductionKernel( const Result zero, * Read data into the shared memory. We start with the * sequential reduction. */ /*while( gid + 4 * gridSize < size ) while( gid + 4 * gridSize < size ) { sdata[ tid ] = reduction( sdata[ tid ], dataFetcher( gid ) ); sdata[ tid ] = reduction( sdata[ tid ], dataFetcher( gid + gridSize ) ); Loading @@ -85,14 +85,14 @@ CudaReductionKernel( const Result zero, sdata[ tid ] = reduction( sdata[ tid ], dataFetcher( gid ) ); sdata[ tid ] = reduction( sdata[ tid ], dataFetcher( gid + gridSize ) ); gid += 2 * gridSize; }*/ } while( gid < size ) { sdata[ tid ] = dataFetcher( gid ); //reduction( sdata[ tid ], dataFetcher( gid ) ); sdata[ tid ] = reduction( sdata[ tid ], dataFetcher( gid ) ); gid += gridSize; } __syncthreads(); return; //printf( "1: tid %d data %f \n", tid, sdata[ tid ] ); //return; Loading
src/TNL/Containers/Algorithms/Reduction_impl.h +4 −4 Original line number Diff line number Diff line Loading @@ -110,9 +110,9 @@ Reduction< Devices::Cuda >:: * Transfer the reduced data from device to host. */ //ResultType* resultArray[ reducedSize ]; //std::unique_ptr< ResultType[] > resultArray{ new ResultType[ reducedSize ] }; ResultType* resultArray = new ResultType[ reducedSize ]; ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory( resultArray, deviceAux1, reducedSize ); std::unique_ptr< ResultType[] > resultArray{ new ResultType[ reducedSize ] }; //ResultType* resultArray = new ResultType[ reducedSize ]; ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory( resultArray.get(), deviceAux1, reducedSize ); #ifdef CUDA_REDUCTION_PROFILING timer.stop(); Loading @@ -132,7 +132,7 @@ Reduction< Devices::Cuda >:: std::cout << " Reduction of small data set on CPU took " << timer.getRealTime() << " sec. " << std::endl; #endif delete[] resultArray; //delete[] resultArray; return result; } else { Loading
src/UnitTests/Containers/ArrayOperationsTest.h +18 −4 Original line number Diff line number Diff line Loading @@ -303,10 +303,9 @@ TYPED_TEST( ArrayOperationsTest, copyMemoryWithConversions_cuda ) ArrayOperations< Devices::Cuda >::freeMemory( deviceData2 ); } //TYPED_TEST( ArrayOperationsTest, compareMemory_cuda ) void Test() TYPED_TEST( ArrayOperationsTest, compareMemory_cuda ) { using ValueType = double;//typename TestFixture::ValueType; using ValueType = typename TestFixture::ValueType; const int size = ARRAY_TEST_SIZE; ValueType *hostData, *deviceData, *deviceData2; Loading Loading @@ -446,7 +445,22 @@ TYPED_TEST( ArrayOperationsTest, containsOnlyValue_cuda ) #include "../GtestMissingError.h" int main( int argc, char* argv[] ) { Test(); using ValueType = double; int size = 1000; ValueType *hostData, *deviceData, *deviceData2; ArrayOperations< Devices::Host >::allocateMemory( hostData, size ); ArrayOperations< Devices::Cuda >::allocateMemory( deviceData, size ); ArrayOperations< Devices::Cuda >::allocateMemory( deviceData2, size ); ArrayOperations< Devices::Host >::setMemory( hostData, (ValueType) 7, size ); ArrayOperations< Devices::Cuda >::setMemory( deviceData, (ValueType) 8, size ); ArrayOperations< Devices::Cuda >::setMemory( deviceData2, (ValueType) 9, size ); EXPECT_FALSE(( ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< ValueType, ValueType >( hostData, deviceData, size ) )); EXPECT_FALSE(( ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< ValueType, ValueType >( deviceData, hostData, size ) )); EXPECT_FALSE(( ArrayOperations< Devices::Cuda >::compareMemory< ValueType, ValueType >( deviceData, deviceData2, size ) )); return 0; #ifdef HAVE_GTEST ::testing::InitGoogleTest( &argc, argv ); return RUN_ALL_TESTS(); Loading