Skip to content
Snippets Groups Projects
Commit c9182447 authored by Tomáš Oberhuber's avatar Tomáš Oberhuber Committed by Jakub Klinkovský
Browse files

Fixing traversers benchmark kernels.

parent be5a8002
No related branches found
No related tags found
1 merge request!20Traversers optimizations
......@@ -27,7 +27,7 @@ __global__ void fullGridTraverseKernel1D( const Index size, const dim3 gridIdx,
{
const Index threadIdx_x = ( gridIdx.x * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
if( threadIdx_x < size )
v_data[ threadIdx_x ] = 1.0;
v_data[ threadIdx_x ] += 1.0;
}
template< typename Real,
......@@ -37,7 +37,7 @@ __global__ void fullGridTraverseKernel2D( const Index size, const dim3 gridIdx,
const Index threadIdx_x = ( gridIdx.x * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
const Index threadIdx_y = ( gridIdx.y * Devices::Cuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y;
if( threadIdx_x < size && threadIdx_y < size )
v_data[ threadIdx_y * size + threadIdx_x ] = 1.0;
v_data[ threadIdx_y * size + threadIdx_x ] += 1.0;
}
template< typename Real,
......@@ -48,7 +48,7 @@ __global__ void fullGridTraverseKernel3D( const Index size, const dim3 gridIdx,
const Index threadIdx_y = ( gridIdx.y * Devices::Cuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y;
const Index threadIdx_z = ( gridIdx.z * Devices::Cuda::getMaxGridSize() + blockIdx.z ) * blockDim.z + threadIdx.z;
if( threadIdx_x < size && threadIdx_y < size && threadIdx_z < size )
v_data[ ( threadIdx_z * size + threadIdx_y ) * size + threadIdx_x ] = 1.0;
v_data[ ( threadIdx_z * size + threadIdx_y ) * size + threadIdx_x ] += 1.0;
}
/****
......@@ -60,7 +60,7 @@ __global__ void interiorTraverseKernel1D( const Index size, const dim3 gridIdx,
{
const Index threadIdx_x = ( gridIdx.x * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
if( threadIdx_x > 0 && threadIdx_x < size - 1 )
v_data[ threadIdx_x ] = 1.0;
v_data[ threadIdx_x ] += 1.0;
}
template< typename Real,
......@@ -71,7 +71,7 @@ __global__ void interiorTraverseKernel2D( const Index size, const dim3 gridIdx,
const Index threadIdx_y = ( gridIdx.y * Devices::Cuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y;
if( threadIdx_x > 0 && threadIdx_y > 0 &&
threadIdx_x < size - 1 && threadIdx_y < size - 1 )
v_data[ threadIdx_y * size + threadIdx_x ] = 1.0;
v_data[ threadIdx_y * size + threadIdx_x ] += 1.0;
}
template< typename Real,
......@@ -83,7 +83,7 @@ __global__ void interiorTraverseKernel3D( const Index size, const dim3 gridIdx,
const Index threadIdx_z = ( gridIdx.z * Devices::Cuda::getMaxGridSize() + blockIdx.z ) * blockDim.z + threadIdx.z;
if( threadIdx_x > 0 && threadIdx_y > 0 && threadIdx_z > 0 &&
threadIdx_x < size - 1 && threadIdx_y < size - 1 && threadIdx_z < size - 1 )
v_data[ ( threadIdx_z * size + threadIdx_y ) * size + threadIdx_x ] = 1.0;
v_data[ ( threadIdx_z * size + threadIdx_y ) * size + threadIdx_x ] += 1.0;
}
/****
......@@ -95,7 +95,7 @@ __global__ void boundariesTraverseKernel1D( const Index size, const dim3 gridIdx
{
const Index threadIdx_x = ( gridIdx.x * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
if( threadIdx_x == 0 || threadIdx_x == size - 1 )
v_data[ threadIdx_x ] = 2.0;
v_data[ threadIdx_x ] += 2.0;
}
template< typename Real,
......@@ -106,7 +106,7 @@ __global__ void boundariesTraverseKernel2D( const Index size, const dim3 gridIdx
const Index threadIdx_y = ( gridIdx.y * Devices::Cuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y;
if( threadIdx_x > 0 && threadIdx_y > 0 &&
threadIdx_x < size - 1 && threadIdx_y < size - 1 )
v_data[ threadIdx_y * size + threadIdx_x ] = 2.0;
v_data[ threadIdx_y * size + threadIdx_x ] += 2.0;
}
template< typename Real,
......@@ -118,7 +118,7 @@ __global__ void boundariesTraverseKernel3D( const Index size, const dim3 gridIdx
const Index threadIdx_z = ( gridIdx.z * Devices::Cuda::getMaxGridSize() + blockIdx.z ) * blockDim.z + threadIdx.z;
if( threadIdx_x == 0 || threadIdx_y == 0 || threadIdx_z == 0 ||
threadIdx_x == size - 1 || threadIdx_y == size - 1 || threadIdx_z == size - 1 )
v_data[ ( threadIdx_z * size + threadIdx_y ) * size + threadIdx_x ] = 2.0;
v_data[ ( threadIdx_z * size + threadIdx_y ) * size + threadIdx_x ] += 2.0;
}
#endif
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment