Skip to content
Snippets Groups Projects
Commit 7f7bff4c authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

Traverser benchmarks: added explicit cast to Real

Because constants 1.0 and 2.0 have type double.
parent 57dc814c
No related branches found
No related tags found
1 merge request!20Traversers optimizations
......@@ -42,7 +42,7 @@ class WriteOneEntitiesProcessor
const GridEntity& entity )
{
auto& u = userData.u.template modifyData< DeviceType >();
u( entity ) += 1.0;
u( entity ) += (typename MeshType::RealType) 1.0;
}
};
......@@ -68,4 +68,4 @@ class GridTraversersBenchmark{};
#include "GridTraversersBenchmark_1D.h"
#include "GridTraversersBenchmark_2D.h"
#include "GridTraversersBenchmark_3D.h"
\ No newline at end of file
#include "GridTraversersBenchmark_3D.h"
......@@ -64,7 +64,7 @@ class GridTraversersBenchmark< 1, Device, Real, Index >
if( std::is_same< Device, Devices::Host >::value )
{
for( int i = 0; i < size; i++ )
v_data[ i ] += 1.0;
v_data[ i ] += (Real) 1.0;
}
else // Device == Devices::Cuda
{
......@@ -94,7 +94,7 @@ class GridTraversersBenchmark< 1, Device, Real, Index >
{
auto f = [] __cuda_callable__ ( Index i, Real* data )
{
data[ i ] += 1.0;
data[ i ] += (Real) 1.0;
};
ParallelFor< Device >::exec( ( Index ) 0, size, f, v.getData() );
}
......@@ -107,7 +107,7 @@ class GridTraversersBenchmark< 1, Device, Real, Index >
Cell entity( *currentGrid );
entity.getCoordinates().x() = i;
entity.refresh();
data[ entity.getIndex() ] += 1.0;
data[ entity.getIndex() ] += (Real) 1.0;
};
ParallelFor< Device >::exec( ( Index ) 0, size, f, v.getData() );
}
......@@ -121,7 +121,7 @@ class GridTraversersBenchmark< 1, Device, Real, Index >
Cell entity( *currentGrid );
entity.getCoordinates().x() = i;
entity.refresh();
( *_u )( entity ) += 1.0;
( *_u )( entity ) += (Real) 1.0;
//WriteOneEntitiesProcessorType::processEntity( *currentGrid, userData, entity );
};
ParallelFor< Device >::exec( ( Index ) 0, size, f );
......@@ -154,10 +154,10 @@ class GridTraversersBenchmark< 1, Device, Real, Index >
{
if( std::is_same< Device, Devices::Host >::value )
{
v_data[ 0 ] = +2;
v_data[ 0 ] += (Real) 2;
for( int i = 1; i < size - 1; i++ )
v_data[ i ] = +1.0;
v_data[ size - 1 ] = +2;
v_data[ i ] += (Real) 1.0;
v_data[ size - 1 ] += (Real) 2;
}
else // Device == Devices::Cuda
{
......@@ -213,4 +213,4 @@ class GridTraversersBenchmark< 1, Device, Real, Index >
} // namespace Traversers
} // namespace Benchmarks
} // namespace TNL
\ No newline at end of file
} // namespace TNL
......@@ -65,7 +65,7 @@ class GridTraversersBenchmark< 2, Device, Real, Index >
{
for( int i = 0; i < size; i++ )
for( int j = 0; j < size; j++ )
v_data[ i * size + j ] += 1.0;
v_data[ i * size + j ] += (Real) 1.0;
}
else // Device == Devices::Cuda
{
......@@ -98,7 +98,7 @@ class GridTraversersBenchmark< 2, Device, Real, Index >
Index _size = this->size;
auto f = [=] __cuda_callable__ ( Index i, Index j, Real* data )
{
data[ j * _size + i ] += 1.0;
data[ j * _size + i ] += (Real) 1.0;
};
ParallelFor2D< Device >::exec( ( Index ) 0,
......@@ -117,7 +117,7 @@ class GridTraversersBenchmark< 2, Device, Real, Index >
entity.getCoordinates().x() = i;
entity.getCoordinates().y() = j;
entity.refresh();
data[ entity.getIndex() ] += 1.0;
data[ entity.getIndex() ] += (Real) 1.0;
};
ParallelFor2D< Device >::exec( ( Index ) 0,
......@@ -137,7 +137,7 @@ class GridTraversersBenchmark< 2, Device, Real, Index >
entity.getCoordinates().x() = i;
entity.getCoordinates().y() = j;
entity.refresh();
( *_u )( entity ) += 1.0;
( *_u )( entity ) += (Real) 1.0;
};
ParallelFor2D< Device >::exec( ( Index ) 0,
......@@ -179,18 +179,18 @@ class GridTraversersBenchmark< 2, Device, Real, Index >
{
for( int i = 0; i < size; i++ )
{
v_data[ i * size ] = 2.0;
v_data[ i * size + size - 1 ] = 2.0;
v_data[ i * size ] += (Real) 2.0;
v_data[ i * size + size - 1 ] += (Real) 2.0;
}
for( int j = 1; j < size - 1; j++ )
{
v_data[ j ] = 2.0;
v_data[ ( size - 1 ) * size + j ] = 2.0;
v_data[ j ] += (Real) 2.0;
v_data[ ( size - 1 ) * size + j ] += (Real) 2.0;
}
for( int i = 1; i < size - 1; i++ )
for( int j = 1; j < size - 1; j++ )
v_data[ i * size + j ] = 1.0;
v_data[ i * size + j ] += (Real) 1.0;
}
else // Device == Devices::Cuda
{
......
......@@ -69,7 +69,7 @@ class GridTraversersBenchmark< 3, Device, Real, Index >
for( int i = 0; i < size; i++ )
for( int j = 0; j < size; j++ )
for( int k = 0; k < size; k++ )
v_data[ ( i * size + j ) * size + k ] += 1.0;
v_data[ ( i * size + j ) * size + k ] += (Real) 1.0;
}
else // Device == Devices::Cuda
{
......@@ -104,7 +104,7 @@ class GridTraversersBenchmark< 3, Device, Real, Index >
Index _size = this->size;
auto f = [=] __cuda_callable__ ( Index i, Index j, Index k, Real* data )
{
data[ ( k * _size + j ) * _size + i ] += 1.0;
data[ ( k * _size + j ) * _size + i ] += (Real) 1.0;
};
ParallelFor3D< Device >::exec( ( Index ) 0,
......@@ -126,7 +126,7 @@ class GridTraversersBenchmark< 3, Device, Real, Index >
entity.getCoordinates().y() = j;
entity.getCoordinates().z() = k;
entity.refresh();
data[ entity.getIndex() ] += 1.0;
data[ entity.getIndex() ] += (Real) 1.0;
};
ParallelFor3D< Device >::exec( ( Index ) 0,
......@@ -149,7 +149,7 @@ class GridTraversersBenchmark< 3, Device, Real, Index >
entity.getCoordinates().y() = j;
entity.getCoordinates().z() = k;
entity.refresh();
( *_u )( entity ) += 1.0;
( *_u )( entity ) += (Real) 1.0;
};
ParallelFor3D< Device >::exec( ( Index ) 0,
......@@ -175,27 +175,27 @@ class GridTraversersBenchmark< 3, Device, Real, Index >
for( int i = 0; i < size; i++ )
for( int j = 0; j < size; j++ )
{
v_data[ ( i * size + j ) * size ] = 2.0;
v_data[ ( i * size + j ) * size + size - 1 ] = 2.0;
v_data[ ( i * size + j ) * size ] += (Real) 2.0;
v_data[ ( i * size + j ) * size + size - 1 ] += (Real) 2.0;
}
for( int j = 0; j < size; j++ )
for( int k = 1; k < size - 1; k++ )
{
v_data[ j * size + k ] = 1.0;
v_data[ ( ( size - 1) * size + j ) * size + k ] = 1.0;
v_data[ j * size + k ] += (Real) 1.0;
v_data[ ( ( size - 1) * size + j ) * size + k ] += (Real) 1.0;
}
for( int i = 1; i < size -1; i++ )
for( int k = 1; k < size - 1; k++ )
{
v_data[ ( i * size ) * size + k ] = 2.0;
v_data[ ( i * size + size - 1 ) * size + k ] = 2.0;
v_data[ ( i * size ) * size + k ] += (Real) 2.0;
v_data[ ( i * size + size - 1 ) * size + k ] += (Real) 2.0;
}
for( int i = 1; i < size -1; i++ )
for( int j = 1; j < size -1; j++ )
for( int k = 1; k < size - 1; k++ )
v_data[ ( i * size + j ) * size + k ] = 1.0;
v_data[ ( i * size + j ) * size + k ] += (Real) 1.0;
}
else // Device == Devices::Cuda
{
......
......@@ -27,7 +27,7 @@ __global__ void fullGridTraverseKernel1D( const Index size, const dim3 gridIdx,
{
const Index threadIdx_x = ( gridIdx.x * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
if( threadIdx_x < size )
v_data[ threadIdx_x ] += 1.0;
v_data[ threadIdx_x ] += (Real) 1.0;
}
template< typename Real,
......@@ -37,7 +37,7 @@ __global__ void fullGridTraverseKernel2D( const Index size, const dim3 gridIdx,
const Index threadIdx_x = ( gridIdx.x * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
const Index threadIdx_y = ( gridIdx.y * Devices::Cuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y;
if( threadIdx_x < size && threadIdx_y < size )
v_data[ threadIdx_y * size + threadIdx_x ] += 1.0;
v_data[ threadIdx_y * size + threadIdx_x ] += (Real) 1.0;
}
template< typename Real,
......@@ -48,7 +48,7 @@ __global__ void fullGridTraverseKernel3D( const Index size, const dim3 gridIdx,
const Index threadIdx_y = ( gridIdx.y * Devices::Cuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y;
const Index threadIdx_z = ( gridIdx.z * Devices::Cuda::getMaxGridSize() + blockIdx.z ) * blockDim.z + threadIdx.z;
if( threadIdx_x < size && threadIdx_y < size && threadIdx_z < size )
v_data[ ( threadIdx_z * size + threadIdx_y ) * size + threadIdx_x ] += 1.0;
v_data[ ( threadIdx_z * size + threadIdx_y ) * size + threadIdx_x ] += (Real) 1.0;
}
/****
......@@ -60,7 +60,7 @@ __global__ void interiorTraverseKernel1D( const Index size, const dim3 gridIdx,
{
const Index threadIdx_x = ( gridIdx.x * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
if( threadIdx_x > 0 && threadIdx_x < size - 1 )
v_data[ threadIdx_x ] += 1.0;
v_data[ threadIdx_x ] += (Real) 1.0;
}
template< typename Real,
......@@ -71,7 +71,7 @@ __global__ void interiorTraverseKernel2D( const Index size, const dim3 gridIdx,
const Index threadIdx_y = ( gridIdx.y * Devices::Cuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y;
if( threadIdx_x > 0 && threadIdx_y > 0 &&
threadIdx_x < size - 1 && threadIdx_y < size - 1 )
v_data[ threadIdx_y * size + threadIdx_x ] += 1.0;
v_data[ threadIdx_y * size + threadIdx_x ] += (Real) 1.0;
}
template< typename Real,
......@@ -83,7 +83,7 @@ __global__ void interiorTraverseKernel3D( const Index size, const dim3 gridIdx,
const Index threadIdx_z = ( gridIdx.z * Devices::Cuda::getMaxGridSize() + blockIdx.z ) * blockDim.z + threadIdx.z;
if( threadIdx_x > 0 && threadIdx_y > 0 && threadIdx_z > 0 &&
threadIdx_x < size - 1 && threadIdx_y < size - 1 && threadIdx_z < size - 1 )
v_data[ ( threadIdx_z * size + threadIdx_y ) * size + threadIdx_x ] += 1.0;
v_data[ ( threadIdx_z * size + threadIdx_y ) * size + threadIdx_x ] += (Real) 1.0;
}
/****
......@@ -95,7 +95,7 @@ __global__ void boundariesTraverseKernel1D( const Index size, const dim3 gridIdx
{
const Index threadIdx_x = ( gridIdx.x * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
if( threadIdx_x == 0 || threadIdx_x == size - 1 )
v_data[ threadIdx_x ] += 2.0;
v_data[ threadIdx_x ] += (Real) 2.0;
}
template< typename Real,
......@@ -106,7 +106,7 @@ __global__ void boundariesTraverseKernel2D( const Index size, const dim3 gridIdx
const Index threadIdx_y = ( gridIdx.y * Devices::Cuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y;
if( threadIdx_x > 0 && threadIdx_y > 0 &&
threadIdx_x < size - 1 && threadIdx_y < size - 1 )
v_data[ threadIdx_y * size + threadIdx_x ] += 2.0;
v_data[ threadIdx_y * size + threadIdx_x ] += (Real) 2.0;
}
template< typename Real,
......@@ -118,7 +118,7 @@ __global__ void boundariesTraverseKernel3D( const Index size, const dim3 gridIdx
const Index threadIdx_z = ( gridIdx.z * Devices::Cuda::getMaxGridSize() + blockIdx.z ) * blockDim.z + threadIdx.z;
if( threadIdx_x == 0 || threadIdx_y == 0 || threadIdx_z == 0 ||
threadIdx_x == size - 1 || threadIdx_y == size - 1 || threadIdx_z == size - 1 )
v_data[ ( threadIdx_z * size + threadIdx_y ) * size + threadIdx_x ] += 2.0;
v_data[ ( threadIdx_z * size + threadIdx_y ) * size + threadIdx_x ] += (Real) 2.0;
}
#endif
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment