Commit bf5e6fd6 authored by Matouš Fencl's avatar Matouš Fencl Committed by Tomáš Oberhuber
Browse files

Chess model implemented in 2D.

parent a13675de
Loading
Loading
Loading
Loading
+4 −4
Original line number Diff line number Diff line
@@ -129,12 +129,12 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid<
template < typename Real, typename Device, typename Index >
__global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr,
                                      const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap,
                                      Real *aux,
                                      int *BlockIterDevice);
                                      Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux,
                                      int *BlockIterDevice, int oddEvenBlock);
__global__ void CudaParallelReduc( int *BlockIterDevice, int *dBlock, int nBlocks );

template < typename Real, typename Device, typename Index >
__global__ void aux1( Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, Real *dAux, int a );
/*template < typename Real, typename Device, typename Index >
__global__ void aux1( Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, Real *dAux, int a );*/

template < typename Real, typename Device, typename Index >
__global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& input, 
+6 −6
Original line number Diff line number Diff line
@@ -945,7 +945,7 @@ updateCell( volatile Real sArray[18][18], int thri, int thrj, const Real hx, con
    {
        sArray[ thrj ][ thri ] = argAbsMin( value, tmp );
        tmp = value - sArray[ thrj ][ thri ];
        if ( fabs( tmp ) >  0.01*hx )
        if ( fabs( tmp ) >  0.001*hx )
            return true;
        else
            return false;
@@ -957,7 +957,7 @@ updateCell( volatile Real sArray[18][18], int thri, int thrj, const Real hx, con
            ( pom[ 1 ] - pom[ 0 ] ) * ( pom[ 1 ] - pom[ 0 ] ) ) )/( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] );
        sArray[ thrj ][ thri ] = argAbsMin( value, tmp );
        tmp = value - sArray[ thrj ][ thri ];
        if ( fabs( tmp ) > 0.01*hx )
        if ( fabs( tmp ) > 0.001*hx )
            return true;
        else
            return false;
@@ -989,7 +989,7 @@ updateCell( volatile Real sArray[18], int thri, const Real h, const Real v )
    sArray[ thri ] = argAbsMin( value, tmp );
    
    tmp = value - sArray[ thri ];
    if ( fabs( tmp ) >  0.01*h )
    if ( fabs( tmp ) >  0.001*h )
        return true;
    else
        return false;
@@ -1032,7 +1032,7 @@ updateCell( volatile Real sArray[10][10][10], int thri, int thrj, int thrk,
    {
        sArray[ thrk ][ thrj ][ thri ] = argAbsMin( value, tmp );
        tmp = value - sArray[ thrk ][ thrj ][ thri ];
        if ( fabs( tmp ) >  0.01*hx )
        if ( fabs( tmp ) >  0.001*hx )
            return true;
        else
            return false;
@@ -1046,7 +1046,7 @@ updateCell( volatile Real sArray[10][10][10], int thri, int thrj, int thrk,
        {
            sArray[ thrk ][ thrj ][ thri ] = argAbsMin( value, tmp );
            tmp = value - sArray[ thrk ][ thrj ][ thri ];
            if ( fabs( tmp ) > 0.01*hx )
            if ( fabs( tmp ) > 0.001*hx )
                return true;
            else
                return false;
@@ -1059,7 +1059,7 @@ updateCell( volatile Real sArray[10][10][10], int thri, int thrj, int thrk,
                hx * hx * ( b - c ) * ( b - c ) ) )/( hx * hx * hy * hy + hy * hy * hz * hz + hz * hz * hx *hx );
            sArray[ thrk ][ thrj ][ thri ] = argAbsMin( value, tmp );
            tmp = value - sArray[ thrk ][ thrj ][ thri ];
            if ( fabs( tmp ) > 0.01*hx )
            if ( fabs( tmp ) > 0.001*hx )
                return true;
            else
                return false;
+114 −98
Original line number Diff line number Diff line
@@ -26,7 +26,7 @@ template< typename Real,
          typename Anisotropy >
FastSweepingMethod< Meshes::Grid< 2, Real, Device, Index >, Anisotropy >::
FastSweepingMethod()
: maxIterations( 100 )
: maxIterations( 1 )
{
   
}
@@ -250,7 +250,7 @@ solve( const MeshPointer& mesh,
          
          tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr;
          
          aux1<<< gridSize, blockSize >>>( auxPtr.template modifyData< Device>(), dAux,1 );
          //aux1<<< gridSize, blockSize >>>( auxPtr.template modifyData< Device>(), dAux,1 );
          
          //int BlockIter = 1;// = (bool*)malloc( ( numBlocksX * numBlocksY ) * sizeof( bool ) );

@@ -261,7 +261,7 @@ solve( const MeshPointer& mesh,
          int nBlocks = ( numBlocksX * numBlocksY )/512 + ((( numBlocksX * numBlocksY )%512 != 0) ? 1:0);
          int *dBlock;
          cudaMalloc(&dBlock, nBlocks * sizeof( int ) );
          
          int oddEvenBlock = 0;
          while( BlockIterD )
          {
           /*for( int i = 0; i < numBlocksX * numBlocksY; i++ )
@@ -269,19 +269,30 @@ solve( const MeshPointer& mesh,
                       
            CudaUpdateCellCaller<<< gridSize, blockSize >>>( ptr,
                                                             interfaceMapPtr.template getData< Device >(),
                                                             dAux,
                                                             BlockIterDevice );
                                                             auxPtr.template modifyData< Device>(),
                                                             BlockIterDevice,
                                                             oddEvenBlock );
	    TNL_CHECK_CUDA_DEVICE;
            oddEvenBlock= (oddEvenBlock == 0) ? 1: 0;
            CudaUpdateCellCaller<<< gridSize, blockSize >>>( ptr,
                                                             interfaceMapPtr.template getData< Device >(),
                                                             auxPtr.template modifyData< Device>(),
                                                             BlockIterDevice,
                                                             oddEvenBlock );
	    TNL_CHECK_CUDA_DEVICE;
            oddEvenBlock= (oddEvenBlock == 0) ? 1: 0;
            
            CudaParallelReduc<<< nBlocks , 512 >>>( BlockIterDevice, dBlock, ( numBlocksX * numBlocksY ) );
	    TNL_CHECK_CUDA_DEVICE;
            CudaParallelReduc<<< 1, nBlocks >>>( dBlock, dBlock, nBlocks );
            
            TNL_CHECK_CUDA_DEVICE;
            cudaMemcpy(&BlockIterD, &dBlock[0], sizeof( int ), cudaMemcpyDeviceToHost);
                                   
            /*for( int i = 1; i < numBlocksX * numBlocksY; i++ )
                BlockIter[ 0 ] = BlockIter[ 0 ] || BlockIter[ i ];*/
            
          }
          aux1<<<gridSize,blockSize>>>( auxPtr.template modifyData< Device>(), dAux, 0 );
          //aux1<<<gridSize,blockSize>>>( auxPtr.template modifyData< Device>(), dAux, 0 );
          cudaFree( dAux );
          cudaFree( BlockIterDevice );
          cudaFree( dBlock );
@@ -299,7 +310,7 @@ solve( const MeshPointer& mesh,
}

#ifdef HAVE_CUDA
template < typename Real, typename Device, typename Index >
/*template < typename Real, typename Device, typename Index >
__global__ void aux1( Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, Real *dAux, int a )
{
    int i = threadIdx.x + blockDim.x*blockIdx.x;
@@ -314,7 +325,7 @@ __global__ void aux1( Functions::MeshFunction< Meshes::Grid< 2, Real, Device, In
        aux[ j*mesh.getDimensions().x() + i ] = dAux[ j*mesh.getDimensions().x() + i ];
    }
    
}
}*/

__global__ void CudaParallelReduc( int *BlockIterDevice, int *dBlock, int nBlocks )
{
@@ -366,8 +377,8 @@ __global__ void CudaParallelReduc( int *BlockIterDevice, int *dBlock, int nBlock
template < typename Real, typename Device, typename Index >
__global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr,
                                      const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap,
                                      Real *aux,
                                      int *BlockIterDevice )
                                      Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux,
                                      int *BlockIterDevice, int oddEvenBlock )
{
    int thri = threadIdx.x; int thrj = threadIdx.y;
    int blIdx = blockIdx.x; int blIdy = blockIdx.y;
@@ -417,6 +428,9 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid<
    }
    __syncthreads();
    
    if( (blIdy%2  + blIdx) % 2 == oddEvenBlock )
    {
    
        if( thri == 0 )
        {        
            if( dimX > (blIdx+1) * blockDim.x  && thrj+1 < ykolik )
@@ -521,5 +535,7 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid<
        
        if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() && (!interfaceMap[ j * mesh.getDimensions().x() + i ]) )
            aux[ j * mesh.getDimensions().x() + i ] = sArray[ thrj + 1 ][ thri + 1 ];

    }
}
#endif