Skip to content
Snippets Groups Projects
Commit 444e01c4 authored by Matouš Fencl's avatar Matouš Fencl
Browse files

FIM method is now faster than chess method but some random error occurs.

parent f5c32276
No related branches found
No related tags found
2 merge requests!12Hamilton jacobi,!11Hamilton jacobi
...@@ -61,8 +61,9 @@ class tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ...@@ -61,8 +61,9 @@ class tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >
typedef Index IndexType; typedef Index IndexType;
typedef Functions::MeshFunction< MeshType > MeshFunctionType; typedef Functions::MeshFunction< MeshType > MeshFunctionType;
typedef Functions::MeshFunction< MeshType, 2, bool > InterfaceMapType; typedef Functions::MeshFunction< MeshType, 2, bool > InterfaceMapType;
typedef TNL::Containers::Array< int, Device, IndexType > ArrayContainer;
using MeshFunctionPointer = Pointers::SharedPointer< MeshFunctionType >; using MeshFunctionPointer = Pointers::SharedPointer< MeshFunctionType >;
using InterfaceMapPointer = Pointers::SharedPointer< InterfaceMapType >; using InterfaceMapPointer = Pointers::SharedPointer< InterfaceMapType >;
void initInterface( const MeshFunctionPointer& input, void initInterface( const MeshFunctionPointer& input,
MeshFunctionPointer& output, MeshFunctionPointer& output,
...@@ -76,6 +77,11 @@ class tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ...@@ -76,6 +77,11 @@ class tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >
__cuda_callable__ bool updateCell( volatile Real sArray[18][18], __cuda_callable__ bool updateCell( volatile Real sArray[18][18],
int thri, int thrj, const Real hx, const Real hy, int thri, int thrj, const Real hx, const Real hy,
const Real velocity = 1.0 ); const Real velocity = 1.0 );
void updateBlocks( InterfaceMapType interfaceMap,
MeshFunctionType aux,
ArrayContainer BlockIterHost, int numThreadsPerBlock );
void getNeighbours( ArrayContainer BlockIterHost, int numBlockX, int numBlockY );
}; };
template< typename Real, template< typename Real,
...@@ -113,6 +119,8 @@ T1 meet2DCondition( T1 a, T1 b, const T2 ha, const T2 hb, const T1 value, double ...@@ -113,6 +119,8 @@ T1 meet2DCondition( T1 a, T1 b, const T2 ha, const T2 hb, const T1 value, double
template < typename T1 > template < typename T1 >
__cuda_callable__ void sortMinims( T1 pom[] ); __cuda_callable__ void sortMinims( T1 pom[] );
template < typename Index >
void GetNeighbours( TNL::Containers::Array< int, Devices::Host, Index > BlockIter, int numBlockX, int numBlockY );
#ifdef HAVE_CUDA #ifdef HAVE_CUDA
template < typename Real, typename Device, typename Index > template < typename Real, typename Device, typename Index >
...@@ -130,11 +138,15 @@ template < typename Real, typename Device, typename Index > ...@@ -130,11 +138,15 @@ template < typename Real, typename Device, typename Index >
__global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr, __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr,
const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap, const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap,
Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux,
int *BlockIterDevice, int oddEvenBlock); TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, int ne = 1 );
__global__ void CudaParallelReduc( int *BlockIterDevice, int *dBlock, int nBlocks );
template < typename Index >
__global__ void CudaParallelReduc( TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice,
TNL::Containers::Array< int, Devices::Cuda, Index > dBlock, int nBlocks );
/*template < typename Real, typename Device, typename Index > template < typename Index >
__global__ void aux1( Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, Real *dAux, int a );*/ __global__ void GetNeighbours( TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice,
/*TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterPom,*/ int numBlockX, int numBlockY );
template < typename Real, typename Device, typename Index > template < typename Real, typename Device, typename Index >
__global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& input, __global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& input,
...@@ -150,7 +162,7 @@ template < typename Real, typename Device, typename Index > ...@@ -150,7 +162,7 @@ template < typename Real, typename Device, typename Index >
__global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > > ptr, __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > > ptr,
const Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index >, 3, bool >& interfaceMap, const Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index >, 3, bool >& interfaceMap,
Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& aux, Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& aux,
int *BlockIterDevice ); TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice );
#endif #endif
#include "tnlDirectEikonalMethodsBase_impl.h" #include "tnlDirectEikonalMethodsBase_impl.h"
...@@ -89,6 +89,199 @@ initInterface( const MeshFunctionPointer& _input, ...@@ -89,6 +89,199 @@ initInterface( const MeshFunctionPointer& _input,
} }
} }
template< typename Real,
typename Device,
typename Index >
void
tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >::
updateBlocks( InterfaceMapType interfaceMap,
MeshFunctionType aux,
ArrayContainer BlockIterHost, int numThreadsPerBlock )
{
for( int i = 0; i < BlockIterHost.getSize(); i++ )
{
if( BlockIterHost[ i ] )
{
MeshType mesh = interfaceMap.template getMesh< Devices::Host >();
int dimX = mesh.getDimensions().x(); int dimY = mesh.getDimensions().y();
int numOfBlockx = dimY/numThreadsPerBlock + ((dimY%numThreadsPerBlock != 0) ? 1:0);
int numOfBlocky = dimX/numThreadsPerBlock + ((dimX%numThreadsPerBlock != 0) ? 1:0);
int xkolik = numThreadsPerBlock + 1;
int ykolik = numThreadsPerBlock + 1;
int blIdx = i%numOfBlockx;
int blIdy = i/numOfBlocky;
if( numOfBlockx - 1 == blIdx )
xkolik = dimX - (blIdx)*numThreadsPerBlock+1;
if( numOfBlocky -1 == blIdy )
ykolik = dimY - (blIdy)*numThreadsPerBlock+1;
/*bool changed[numThreadsPerBlock*numThreadsPerBlock];
changed[ 0 ] = 1;*/
Real hx = mesh.getSpaceSteps().x();
Real hy = mesh.getSpaceSteps().y();
Real changed1[ 16*16 ];
/*Real changed2[ 16*16 ];
Real changed3[ 16*16 ];
Real changed4[ 16*16 ];*/
Real sArray[18][18];
for( int thri = 0; thri < numThreadsPerBlock + 2; thri++ )
for( int thrj = 0; thrj < numThreadsPerBlock + 2; thrj++ )
sArray[thrj][thri] = std::numeric_limits< Real >::max();
BlockIterHost[ blIdy * numOfBlockx + blIdx ] = 0;
for( int thrj = 0; thrj < numThreadsPerBlock + 1; thrj++ )
{
if( dimX > (blIdx+1) * numThreadsPerBlock && thrj+1 < ykolik )
sArray[thrj+1][xkolik] = aux[ blIdy*numThreadsPerBlock*dimX - dimX + blIdx*numThreadsPerBlock - 1 + (thrj+1)*dimX + xkolik ];
else
sArray[thrj+1][xkolik] = std::numeric_limits< Real >::max();
if( blIdx != 0 && thrj+1 < ykolik )
sArray[thrj+1][0] = aux[ blIdy*numThreadsPerBlock*dimX - dimX + blIdx*numThreadsPerBlock - 1 + (thrj+1)*dimX ];
else
sArray[thrj+1][0] = std::numeric_limits< Real >::max();
if( dimY > (blIdy+1) * numThreadsPerBlock && thrj+1 < xkolik )
sArray[ykolik][thrj+1] = aux[ blIdy*numThreadsPerBlock*dimX - dimX + blIdx*numThreadsPerBlock - 1 + ykolik*dimX + thrj+1 ];
else
sArray[ykolik][thrj+1] = std::numeric_limits< Real >::max();
if( blIdy != 0 && thrj+1 < xkolik )
sArray[0][thrj+1] = aux[ blIdy*numThreadsPerBlock*dimX - dimX + blIdx*numThreadsPerBlock - 1 + thrj+1 ];
else
sArray[0][thrj+1] = std::numeric_limits< Real >::max();
}
for( int k = 0; k < numThreadsPerBlock; k++ )
for( int l = 0; l < numThreadsPerBlock; l++ )
sArray[k+1][l+1] = aux[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ];
for( int k = 0; k < numThreadsPerBlock; k++ )
for( int l = 0; l < numThreadsPerBlock; l++ ){
changed1[ k*numThreadsPerBlock + l ] = 0;
/*changed2[ k*numThreadsPerBlock + l ] = 0;
changed3[ k*numThreadsPerBlock + l ] = 0;
changed4[ k*numThreadsPerBlock + l ] = 0;*/
if( blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l < dimX*dimY )
{
if( ! interfaceMap[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ] )
{
changed1[ k*numThreadsPerBlock + l ] = this->updateCell( sArray, l+1, k+1, hx,hy);
}
}
}
for( int k = numThreadsPerBlock-1; k > -1; k-- )
for( int l = 0; l < numThreadsPerBlock; l++ ) {
if( blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l < dimX*dimY )
{
if( ! interfaceMap[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ] )
{
/*changed2[ k*numThreadsPerBlock + l ] = */this->updateCell( sArray, l+1, k+1, hx,hy);
}
}
}
for( int k = 0; k < numThreadsPerBlock; k++ )
for( int l = numThreadsPerBlock-1; l >-1; l-- ) {
if( blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l < dimX*dimY )
{
if( ! interfaceMap[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ] )
{
/*changed3[ k*numThreadsPerBlock + l ] = */this->updateCell( sArray, l+1, k+1, hx,hy);
}
}
}
for( int k = numThreadsPerBlock-1; k > -1; k-- )
for( int l = numThreadsPerBlock-1; l >-1; l-- ) {
if( blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l < dimX*dimY )
{
if( ! interfaceMap[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ] )
{
/*changed4[ k*numThreadsPerBlock + l ] = */this->updateCell( sArray, l+1, k+1, hx,hy);
}
}
}
for( int k = numThreadsPerBlock-1; k > -1; k-- )
for( int l = numThreadsPerBlock-1; l >-1; l-- ){
changed1[ 0 ] = changed1[ 0 ] || changed1[ k*numThreadsPerBlock + l ];
/*changed2[ 0 ] = changed2[ 0 ] || changed2[ k*numThreadsPerBlock + l ];
changed3[ 0 ] = changed3[ 0 ] || changed3[ k*numThreadsPerBlock + l ];
changed4[ 0 ] = changed4[ 0 ] || changed4[ k*numThreadsPerBlock + l ];*/
}
if( changed1[ 0 ] /*|| changed2[ 0 ] ||changed3[ 0 ] ||changed4[ 0 ]*/ )
BlockIterHost[ blIdy * numOfBlockx + blIdx ] = 1;
for( int k = 0; k < numThreadsPerBlock; k++ ){
for( int l = 0; l < numThreadsPerBlock; l++ ) {
if( blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l < dimX*dimY &&
(!interfaceMap[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ]) )
aux[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ] = sArray[ k + 1 ][ l + 1 ];
//std::cout<< sArray[k+1][l+1];
}
//std::cout<<std::endl;
}
}
}
}
template< typename Real,
typename Device,
typename Index >
void
tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >::
getNeighbours( ArrayContainer BlockIterHost, int numBlockX, int numBlockY )
{
int* BlockIterPom;
BlockIterPom = new int [numBlockX * numBlockY];
for(int i = 0; i < numBlockX * numBlockY; i++)
{
BlockIterPom[ i ] = 0;
if( BlockIterHost[ i ] )
{
// i = k*numBlockY + m;
int m=0, k=0;
m = i%numBlockX;
k = i/numBlockX;
if( k > 0 )
BlockIterPom[i - numBlockX] = 1;
if( k < numBlockY - 1 )
BlockIterPom[i + numBlockX] = 1;
if( m < numBlockX - 1 )
BlockIterPom[ i+1 ] = 1;
if( m > 0 )
BlockIterPom[ i-1 ] = 1;
}
}
for(int i = 0; i < numBlockX * numBlockY; i++ )
//if( !BlockIter[ i ] )
BlockIterHost[ i ] = BlockIterPom[ i ];
/*else
BlockIter[ i ] = 0;*/
/*for( int i = numBlockX-1; i > -1; i-- )
{
for( int j = 0; j< numBlockY; j++ )
std::cout << BlockIterHost[ i*numBlockY + j ];
std::cout << std::endl;
}
std::cout << std::endl;*/
delete[] BlockIterPom;
}
template< typename Real, template< typename Real,
typename Device, typename Device,
typename Index > typename Index >
......
...@@ -88,7 +88,8 @@ class FastSweepingMethod< Meshes::Grid< 2, Real, Device, Index >, Anisotropy > ...@@ -88,7 +88,8 @@ class FastSweepingMethod< Meshes::Grid< 2, Real, Device, Index >, Anisotropy >
using typename BaseType::InterfaceMapType; using typename BaseType::InterfaceMapType;
using typename BaseType::MeshFunctionType; using typename BaseType::MeshFunctionType;
using typename BaseType::InterfaceMapPointer; using typename BaseType::InterfaceMapPointer;
using typename BaseType::MeshFunctionPointer; using typename BaseType::MeshFunctionPointer;
using typename BaseType::ArrayContainer;
FastSweepingMethod(); FastSweepingMethod();
......
...@@ -258,13 +258,21 @@ solve( const MeshPointer& mesh, ...@@ -258,13 +258,21 @@ solve( const MeshPointer& mesh,
tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > > ptr; tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > > ptr;
int *BlockIterDevice;
int BlockIterD = 1; int BlockIterD = 1;
cudaMalloc(&BlockIterDevice, ( numBlocksX * numBlocksY * numBlocksZ ) * sizeof( int ) ); TNL::Containers::Array< int, Devices::Cuda, IndexType > BlockIterDevice;
BlockIterDevice.setSize( numBlocksX * numBlocksY * numBlocksZ );
BlockIterDevice.setValue( 1 );
/*int *BlockIterDevice;
cudaMalloc(&BlockIterDevice, ( numBlocksX * numBlocksY * numBlocksZ ) * sizeof( int ) );*/
int nBlocks = ( numBlocksX * numBlocksY * numBlocksZ )/512 + ((( numBlocksX * numBlocksY * numBlocksZ )%512 != 0) ? 1:0); int nBlocks = ( numBlocksX * numBlocksY * numBlocksZ )/512 + ((( numBlocksX * numBlocksY * numBlocksZ )%512 != 0) ? 1:0);
int *dBlock;
cudaMalloc(&dBlock, nBlocks * sizeof( int ) ); TNL::Containers::Array< int, Devices::Cuda, IndexType > dBlock;
dBlock.setSize( nBlocks );
dBlock.setValue( 0 );
/*int *dBlock;
cudaMalloc(&dBlock, nBlocks * sizeof( int ) );*/
while( BlockIterD ) while( BlockIterD )
{ {
...@@ -272,17 +280,24 @@ solve( const MeshPointer& mesh, ...@@ -272,17 +280,24 @@ solve( const MeshPointer& mesh,
interfaceMapPtr.template getData< Device >(), interfaceMapPtr.template getData< Device >(),
auxPtr.template modifyData< Device>(), auxPtr.template modifyData< Device>(),
BlockIterDevice ); BlockIterDevice );
cudaDeviceSynchronize();
TNL_CHECK_CUDA_DEVICE;
CudaParallelReduc<<< nBlocks , 512 >>>( BlockIterDevice, dBlock, ( numBlocksX * numBlocksY * numBlocksZ ) ); CudaParallelReduc<<< nBlocks , 512 >>>( BlockIterDevice, dBlock, ( numBlocksX * numBlocksY * numBlocksZ ) );
CudaParallelReduc<<< 1, nBlocks >>>( dBlock, dBlock, nBlocks ); cudaDeviceSynchronize();
TNL_CHECK_CUDA_DEVICE;
CudaParallelReduc<<< 1, nBlocks >>>( dBlock, dBlock, nBlocks );
cudaDeviceSynchronize();
TNL_CHECK_CUDA_DEVICE;
cudaMemcpy(&BlockIterD, &dBlock[0], sizeof( int ), cudaMemcpyDeviceToHost); cudaMemcpy(&BlockIterD, &dBlock[0], sizeof( int ), cudaMemcpyDeviceToHost);
/*for( int i = 1; i < numBlocksX * numBlocksY; i++ ) /*for( int i = 1; i < numBlocksX * numBlocksY; i++ )
BlockIter[ 0 ] = BlockIter[ 0 ] || BlockIter[ i ];*/ BlockIter[ 0 ] = BlockIter[ 0 ] || BlockIter[ i ];*/
} }
cudaFree( BlockIterDevice ); //cudaFree( BlockIterDevice );
cudaFree( dBlock ); //cudaFree( dBlock );
cudaDeviceSynchronize(); cudaDeviceSynchronize();
TNL_CHECK_CUDA_DEVICE; TNL_CHECK_CUDA_DEVICE;
aux = *auxPtr; aux = *auxPtr;
...@@ -302,7 +317,7 @@ template < typename Real, typename Device, typename Index > ...@@ -302,7 +317,7 @@ template < typename Real, typename Device, typename Index >
__global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > > ptr, __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > > ptr,
const Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index >, 3, bool >& interfaceMap, const Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index >, 3, bool >& interfaceMap,
Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& aux, Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& aux,
int *BlockIterDevice ) TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice )
{ {
int thri = threadIdx.x; int thrj = threadIdx.y; int thrk = threadIdx.z; int thri = threadIdx.x; int thrj = threadIdx.y; int thrk = threadIdx.z;
int blIdx = blockIdx.x; int blIdy = blockIdx.y; int blIdz = blockIdx.z; int blIdx = blockIdx.x; int blIdy = blockIdx.y; int blIdz = blockIdx.z;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment