From 8ea590e97d436f869ed9fd0d79b288e62ce07aaa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkjak@fjfi.cvut.cz> Date: Fri, 4 Jan 2019 23:05:34 +0100 Subject: [PATCH] Traverser benchmarks: added explicit cast to Real Because constants 1.0 and 2.0 have type double. --- .../Traversers/GridTraversersBenchmark.h | 4 ++-- .../Traversers/GridTraversersBenchmark_1D.h | 16 +++++++------- .../Traversers/GridTraversersBenchmark_2D.h | 18 +++++++-------- .../Traversers/GridTraversersBenchmark_3D.h | 22 +++++++++---------- src/Benchmarks/Traversers/cuda-kernels.h | 18 +++++++-------- 5 files changed, 39 insertions(+), 39 deletions(-) diff --git a/src/Benchmarks/Traversers/GridTraversersBenchmark.h b/src/Benchmarks/Traversers/GridTraversersBenchmark.h index c320dc591e..bd748ed097 100644 --- a/src/Benchmarks/Traversers/GridTraversersBenchmark.h +++ b/src/Benchmarks/Traversers/GridTraversersBenchmark.h @@ -42,7 +42,7 @@ class WriteOneEntitiesProcessor const GridEntity& entity ) { auto& u = userData.u.template modifyData< DeviceType >(); - u( entity ) += 1.0; + u( entity ) += (typename MeshType::RealType) 1.0; } }; @@ -68,4 +68,4 @@ class GridTraversersBenchmark{}; #include "GridTraversersBenchmark_1D.h" #include "GridTraversersBenchmark_2D.h" -#include "GridTraversersBenchmark_3D.h" \ No newline at end of file +#include "GridTraversersBenchmark_3D.h" diff --git a/src/Benchmarks/Traversers/GridTraversersBenchmark_1D.h b/src/Benchmarks/Traversers/GridTraversersBenchmark_1D.h index 1683cc8688..e626b17e35 100644 --- a/src/Benchmarks/Traversers/GridTraversersBenchmark_1D.h +++ b/src/Benchmarks/Traversers/GridTraversersBenchmark_1D.h @@ -64,7 +64,7 @@ class GridTraversersBenchmark< 1, Device, Real, Index > if( std::is_same< Device, Devices::Host >::value ) { for( int i = 0; i < size; i++ ) - v_data[ i ] += 1.0; + v_data[ i ] += (Real) 1.0; } else // Device == Devices::Cuda { @@ -94,7 +94,7 @@ class GridTraversersBenchmark< 1, Device, Real, Index > { auto f = [] __cuda_callable__ ( Index i, Real* data ) { - data[ i ] += 1.0; + data[ i ] += (Real) 1.0; }; ParallelFor< Device >::exec( ( Index ) 0, size, f, v.getData() ); } @@ -107,7 +107,7 @@ class GridTraversersBenchmark< 1, Device, Real, Index > Cell entity( *currentGrid ); entity.getCoordinates().x() = i; entity.refresh(); - data[ entity.getIndex() ] += 1.0; + data[ entity.getIndex() ] += (Real) 1.0; }; ParallelFor< Device >::exec( ( Index ) 0, size, f, v.getData() ); } @@ -121,7 +121,7 @@ class GridTraversersBenchmark< 1, Device, Real, Index > Cell entity( *currentGrid ); entity.getCoordinates().x() = i; entity.refresh(); - ( *_u )( entity ) += 1.0; + ( *_u )( entity ) += (Real) 1.0; //WriteOneEntitiesProcessorType::processEntity( *currentGrid, userData, entity ); }; ParallelFor< Device >::exec( ( Index ) 0, size, f ); @@ -154,10 +154,10 @@ class GridTraversersBenchmark< 1, Device, Real, Index > { if( std::is_same< Device, Devices::Host >::value ) { - v_data[ 0 ] = +2; + v_data[ 0 ] += (Real) 2; for( int i = 1; i < size - 1; i++ ) - v_data[ i ] = +1.0; - v_data[ size - 1 ] = +2; + v_data[ i ] += (Real) 1.0; + v_data[ size - 1 ] += (Real) 2; } else // Device == Devices::Cuda { @@ -213,4 +213,4 @@ class GridTraversersBenchmark< 1, Device, Real, Index > } // namespace Traversers } // namespace Benchmarks -} // namespace TNL \ No newline at end of file +} // namespace TNL diff --git a/src/Benchmarks/Traversers/GridTraversersBenchmark_2D.h b/src/Benchmarks/Traversers/GridTraversersBenchmark_2D.h index 48f11bfb91..1296a9a463 100644 --- a/src/Benchmarks/Traversers/GridTraversersBenchmark_2D.h +++ b/src/Benchmarks/Traversers/GridTraversersBenchmark_2D.h @@ -65,7 +65,7 @@ class GridTraversersBenchmark< 2, Device, Real, Index > { for( int i = 0; i < size; i++ ) for( int j = 0; j < size; j++ ) - v_data[ i * size + j ] += 1.0; + v_data[ i * size + j ] += (Real) 1.0; } else // Device == Devices::Cuda { @@ -98,7 +98,7 @@ class GridTraversersBenchmark< 2, Device, Real, Index > Index _size = this->size; auto f = [=] __cuda_callable__ ( Index i, Index j, Real* data ) { - data[ j * _size + i ] += 1.0; + data[ j * _size + i ] += (Real) 1.0; }; ParallelFor2D< Device >::exec( ( Index ) 0, @@ -117,7 +117,7 @@ class GridTraversersBenchmark< 2, Device, Real, Index > entity.getCoordinates().x() = i; entity.getCoordinates().y() = j; entity.refresh(); - data[ entity.getIndex() ] += 1.0; + data[ entity.getIndex() ] += (Real) 1.0; }; ParallelFor2D< Device >::exec( ( Index ) 0, @@ -137,7 +137,7 @@ class GridTraversersBenchmark< 2, Device, Real, Index > entity.getCoordinates().x() = i; entity.getCoordinates().y() = j; entity.refresh(); - ( *_u )( entity ) += 1.0; + ( *_u )( entity ) += (Real) 1.0; }; ParallelFor2D< Device >::exec( ( Index ) 0, @@ -179,18 +179,18 @@ class GridTraversersBenchmark< 2, Device, Real, Index > { for( int i = 0; i < size; i++ ) { - v_data[ i * size ] = 2.0; - v_data[ i * size + size - 1 ] = 2.0; + v_data[ i * size ] += (Real) 2.0; + v_data[ i * size + size - 1 ] += (Real) 2.0; } for( int j = 1; j < size - 1; j++ ) { - v_data[ j ] = 2.0; - v_data[ ( size - 1 ) * size + j ] = 2.0; + v_data[ j ] += (Real) 2.0; + v_data[ ( size - 1 ) * size + j ] += (Real) 2.0; } for( int i = 1; i < size - 1; i++ ) for( int j = 1; j < size - 1; j++ ) - v_data[ i * size + j ] = 1.0; + v_data[ i * size + j ] += (Real) 1.0; } else // Device == Devices::Cuda { diff --git a/src/Benchmarks/Traversers/GridTraversersBenchmark_3D.h b/src/Benchmarks/Traversers/GridTraversersBenchmark_3D.h index cceffa328d..35863a3c96 100644 --- a/src/Benchmarks/Traversers/GridTraversersBenchmark_3D.h +++ b/src/Benchmarks/Traversers/GridTraversersBenchmark_3D.h @@ -69,7 +69,7 @@ class GridTraversersBenchmark< 3, Device, Real, Index > for( int i = 0; i < size; i++ ) for( int j = 0; j < size; j++ ) for( int k = 0; k < size; k++ ) - v_data[ ( i * size + j ) * size + k ] += 1.0; + v_data[ ( i * size + j ) * size + k ] += (Real) 1.0; } else // Device == Devices::Cuda { @@ -104,7 +104,7 @@ class GridTraversersBenchmark< 3, Device, Real, Index > Index _size = this->size; auto f = [=] __cuda_callable__ ( Index i, Index j, Index k, Real* data ) { - data[ ( k * _size + j ) * _size + i ] += 1.0; + data[ ( k * _size + j ) * _size + i ] += (Real) 1.0; }; ParallelFor3D< Device >::exec( ( Index ) 0, @@ -126,7 +126,7 @@ class GridTraversersBenchmark< 3, Device, Real, Index > entity.getCoordinates().y() = j; entity.getCoordinates().z() = k; entity.refresh(); - data[ entity.getIndex() ] += 1.0; + data[ entity.getIndex() ] += (Real) 1.0; }; ParallelFor3D< Device >::exec( ( Index ) 0, @@ -149,7 +149,7 @@ class GridTraversersBenchmark< 3, Device, Real, Index > entity.getCoordinates().y() = j; entity.getCoordinates().z() = k; entity.refresh(); - ( *_u )( entity ) += 1.0; + ( *_u )( entity ) += (Real) 1.0; }; ParallelFor3D< Device >::exec( ( Index ) 0, @@ -175,27 +175,27 @@ class GridTraversersBenchmark< 3, Device, Real, Index > for( int i = 0; i < size; i++ ) for( int j = 0; j < size; j++ ) { - v_data[ ( i * size + j ) * size ] = 2.0; - v_data[ ( i * size + j ) * size + size - 1 ] = 2.0; + v_data[ ( i * size + j ) * size ] += (Real) 2.0; + v_data[ ( i * size + j ) * size + size - 1 ] += (Real) 2.0; } for( int j = 0; j < size; j++ ) for( int k = 1; k < size - 1; k++ ) { - v_data[ j * size + k ] = 1.0; - v_data[ ( ( size - 1) * size + j ) * size + k ] = 1.0; + v_data[ j * size + k ] += (Real) 1.0; + v_data[ ( ( size - 1) * size + j ) * size + k ] += (Real) 1.0; } for( int i = 1; i < size -1; i++ ) for( int k = 1; k < size - 1; k++ ) { - v_data[ ( i * size ) * size + k ] = 2.0; - v_data[ ( i * size + size - 1 ) * size + k ] = 2.0; + v_data[ ( i * size ) * size + k ] += (Real) 2.0; + v_data[ ( i * size + size - 1 ) * size + k ] += (Real) 2.0; } for( int i = 1; i < size -1; i++ ) for( int j = 1; j < size -1; j++ ) for( int k = 1; k < size - 1; k++ ) - v_data[ ( i * size + j ) * size + k ] = 1.0; + v_data[ ( i * size + j ) * size + k ] += (Real) 1.0; } else // Device == Devices::Cuda { diff --git a/src/Benchmarks/Traversers/cuda-kernels.h b/src/Benchmarks/Traversers/cuda-kernels.h index 2802b73eba..a90baf5b02 100644 --- a/src/Benchmarks/Traversers/cuda-kernels.h +++ b/src/Benchmarks/Traversers/cuda-kernels.h @@ -27,7 +27,7 @@ __global__ void fullGridTraverseKernel1D( const Index size, const dim3 gridIdx, { const Index threadIdx_x = ( gridIdx.x * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; if( threadIdx_x < size ) - v_data[ threadIdx_x ] += 1.0; + v_data[ threadIdx_x ] += (Real) 1.0; } template< typename Real, @@ -37,7 +37,7 @@ __global__ void fullGridTraverseKernel2D( const Index size, const dim3 gridIdx, const Index threadIdx_x = ( gridIdx.x * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; const Index threadIdx_y = ( gridIdx.y * Devices::Cuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y; if( threadIdx_x < size && threadIdx_y < size ) - v_data[ threadIdx_y * size + threadIdx_x ] += 1.0; + v_data[ threadIdx_y * size + threadIdx_x ] += (Real) 1.0; } template< typename Real, @@ -48,7 +48,7 @@ __global__ void fullGridTraverseKernel3D( const Index size, const dim3 gridIdx, const Index threadIdx_y = ( gridIdx.y * Devices::Cuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y; const Index threadIdx_z = ( gridIdx.z * Devices::Cuda::getMaxGridSize() + blockIdx.z ) * blockDim.z + threadIdx.z; if( threadIdx_x < size && threadIdx_y < size && threadIdx_z < size ) - v_data[ ( threadIdx_z * size + threadIdx_y ) * size + threadIdx_x ] += 1.0; + v_data[ ( threadIdx_z * size + threadIdx_y ) * size + threadIdx_x ] += (Real) 1.0; } /**** @@ -60,7 +60,7 @@ __global__ void interiorTraverseKernel1D( const Index size, const dim3 gridIdx, { const Index threadIdx_x = ( gridIdx.x * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; if( threadIdx_x > 0 && threadIdx_x < size - 1 ) - v_data[ threadIdx_x ] += 1.0; + v_data[ threadIdx_x ] += (Real) 1.0; } template< typename Real, @@ -71,7 +71,7 @@ __global__ void interiorTraverseKernel2D( const Index size, const dim3 gridIdx, const Index threadIdx_y = ( gridIdx.y * Devices::Cuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y; if( threadIdx_x > 0 && threadIdx_y > 0 && threadIdx_x < size - 1 && threadIdx_y < size - 1 ) - v_data[ threadIdx_y * size + threadIdx_x ] += 1.0; + v_data[ threadIdx_y * size + threadIdx_x ] += (Real) 1.0; } template< typename Real, @@ -83,7 +83,7 @@ __global__ void interiorTraverseKernel3D( const Index size, const dim3 gridIdx, const Index threadIdx_z = ( gridIdx.z * Devices::Cuda::getMaxGridSize() + blockIdx.z ) * blockDim.z + threadIdx.z; if( threadIdx_x > 0 && threadIdx_y > 0 && threadIdx_z > 0 && threadIdx_x < size - 1 && threadIdx_y < size - 1 && threadIdx_z < size - 1 ) - v_data[ ( threadIdx_z * size + threadIdx_y ) * size + threadIdx_x ] += 1.0; + v_data[ ( threadIdx_z * size + threadIdx_y ) * size + threadIdx_x ] += (Real) 1.0; } /**** @@ -95,7 +95,7 @@ __global__ void boundariesTraverseKernel1D( const Index size, const dim3 gridIdx { const Index threadIdx_x = ( gridIdx.x * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; if( threadIdx_x == 0 || threadIdx_x == size - 1 ) - v_data[ threadIdx_x ] += 2.0; + v_data[ threadIdx_x ] += (Real) 2.0; } template< typename Real, @@ -106,7 +106,7 @@ __global__ void boundariesTraverseKernel2D( const Index size, const dim3 gridIdx const Index threadIdx_y = ( gridIdx.y * Devices::Cuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y; if( threadIdx_x > 0 && threadIdx_y > 0 && threadIdx_x < size - 1 && threadIdx_y < size - 1 ) - v_data[ threadIdx_y * size + threadIdx_x ] += 2.0; + v_data[ threadIdx_y * size + threadIdx_x ] += (Real) 2.0; } template< typename Real, @@ -118,7 +118,7 @@ __global__ void boundariesTraverseKernel3D( const Index size, const dim3 gridIdx const Index threadIdx_z = ( gridIdx.z * Devices::Cuda::getMaxGridSize() + blockIdx.z ) * blockDim.z + threadIdx.z; if( threadIdx_x == 0 || threadIdx_y == 0 || threadIdx_z == 0 || threadIdx_x == size - 1 || threadIdx_y == size - 1 || threadIdx_z == size - 1 ) - v_data[ ( threadIdx_z * size + threadIdx_y ) * size + threadIdx_x ] += 2.0; + v_data[ ( threadIdx_z * size + threadIdx_y ) * size + threadIdx_x ] += (Real) 2.0; } #endif -- GitLab