Commit 092c2538 authored by Matouš Fencl's avatar Matouš Fencl
Browse files

Cuda init start

parent c04814b8
Loading
Loading
Loading
Loading
+13 −2
Original line number Diff line number Diff line
@@ -49,7 +49,6 @@ template< typename Real,
class tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >
{
   public:
      
      typedef Meshes::Grid< 2, Real, Device, Index > MeshType;
      typedef Real RealType;
      typedef Device DevcieType;
@@ -65,6 +64,8 @@ class tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >
      __cuda_callable__ void updateCell( MeshFunctionType& u,
                                         const MeshEntity& cell,
                                         const RealType velocity = 1.0 );
   protected:
       
};

template< typename Real,
@@ -100,6 +101,16 @@ template < typename T1, typename T2 >
T1 meet2DCondition( T1 a, T1 b, const T2 ha, const T2 hb, const T1 value, double v = 1);

template < typename T1 >
void sortMinims( T1 pom[] );
__cuda_callable__ void sortMinims( T1 pom[] );


template < typename Real, typename Device, typename Index >
__global__ void CudaUpdateCellCaller( Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap,
                                      Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux );

template < typename Real, typename Device, typename Index >
__global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& input, 
                                Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& output,
                                Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap );

#include "tnlDirectEikonalMethodsBase_impl.h"
+185 −90
Original line number Diff line number Diff line
@@ -8,7 +8,9 @@
#pragma once

#include <TNL/TypeInfo.h>

#include <iostream>
#include "tnlFastSweepingMethod.h"
template< typename Real,
          typename Device,
          typename Index >
@@ -67,7 +69,6 @@ updateCell( MeshFunctionType& u,
{
}


template< typename Real,
          typename Device,
          typename Index >
@@ -77,9 +78,30 @@ initInterface( const MeshFunctionType& input,
               MeshFunctionType& output,
               InterfaceMapType& interfaceMap )
{
    /*
     doplnit přepočty pro cudu:
     * overit is_same device
     * na kazdy bod jedno cuda vlakno
     */
    const MeshType& mesh = input.getMesh();
    typedef typename MeshType::Cell Cell;
    Cell cell( mesh );
    
    if( std::is_same< Device, Devices::Cuda >::value )
    {
#ifdef HAVE_CUDA
        const int cudaBlockSize( 16 );
        int numBlocksX = Devices::Cuda::getNumberOfBlocks( mesh.getDimensions().x(), cudaBlockSize );
        int numBlocksY = Devices::Cuda::getNumberOfBlocks( mesh.getDimensions().y(), cudaBlockSize );
        dim3 blockSize( cudaBlockSize, cudaBlockSize );
        dim3 gridSize( numBlocksX, numBlocksY );
        Devices::Cuda::synchronizeDevice();
        CudaInitCaller< Real, Device, Index ><<< gridSize, blockSize >>>( input, output, interfaceMap );
        TNL_CHECK_CUDA_DEVICE;
#endif
    }
    if( std::is_same< Device, Devices::Host >::value )
    {
        for( cell.getCoordinates().y() = 0;
             cell.getCoordinates().y() < mesh.getDimensions().y();
             cell.getCoordinates().y() ++ )
@@ -112,7 +134,7 @@ initInterface( const MeshFunctionType& input,
                const IndexType e = neighbors.template getEntityIndex<  1,  0 >();
                const IndexType n = neighbors.template getEntityIndex<  0,  1 >();
                //Try init with exact data:
            if( c * input[ n ] <= 0 )
                /*if( c * input[ n ] <= 0 )
                {
                    output[ cell.getIndex() ] = c;
                    output[ n ] = input[ n ];
@@ -125,39 +147,40 @@ initInterface( const MeshFunctionType& input,
                    output[ e ] = input[ e ];
                    interfaceMap[ cell.getIndex() ] = true;
                    interfaceMap[ e ] = true;
            }
            /*if( c * input[ n ] <= 0 )
            {
                if( c >= 0 )
                }*/
                if( c * input[ n ] <= 0 )
                {
                    pom = ( hy * c )/( c - input[ n ]);
                    if( output[ cell.getIndex() ] > pom ) 
                    /*if( c >= 0 )
                    {*/
                        pom = TNL::sign( c )*( hy * c )/( c - input[ n ]);
                        if( TNL::abs( output[ cell.getIndex() ] ) > TNL::abs( pom ) ) 
                            output[ cell.getIndex() ] = pom;
                    if( output[ n ] < pom - hy )
                        output[ n ] = pom - hy; //( hy * c )/( c - input[ n ]) - hy;
                }else
                        pom = pom - TNL::sign( c )*hy;
                        if( TNL::abs( output[ n ] ) > TNL::abs( pom ) )
                            output[ n ] = pom; //( hy * c )/( c - input[ n ]) - hy;
                    /*}else
                    {
                        pom = - ( hy * c )/( c - input[ n ]);
                        if( output[ cell.getIndex() ] < pom )
                            output[ cell.getIndex() ] = pom;
                        if( output[ n ] > hy + pom )
                            output[ n ] = hy + pom; //hy - ( hy * c )/( c - input[ n ]);
                }
                    }*/
                    interfaceMap[ cell.getIndex() ] = true;
                    interfaceMap[ n ] = true;
                }
                if( c * input[ e ] <= 0 )
                {
                if( c >= 0 )
                {
                    pom = ( hx * c )/( c - input[ e ]);
                    if( output[ cell.getIndex() ] > pom )
                    /*if( c >= 0 )
                    {*/
                        pom = TNL::sign( c )*( hx * c )/( c - input[ e ]);
                        if( TNL::abs( output[ cell.getIndex() ] ) > TNL::abs( pom ) )
                            output[ cell.getIndex() ] = pom;

                    pom = pom - hx; //output[ e ] = (hx * c)/( c - input[ e ]) - hx;
                    if( output[ e ] < pom )
                        pom = pom - TNL::sign( c )*hx; //output[ e ] = (hx * c)/( c - input[ e ]) - hx;
                        if( TNL::abs( output[ e ] ) > TNL::abs( pom ) )
                            output[ e ] = pom;                         
                }else
                    /*}else
                    {
                        pom = - (hx * c)/( c - input[ e ]);
                        if( output[ cell.getIndex() ] < pom )
@@ -166,10 +189,11 @@ initInterface( const MeshFunctionType& input,
                        pom = pom + hx; //output[ e ] = hx - (hx * c)/( c - input[ e ]);
                        if( output[ e ] > pom )
                            output[ e ] = pom;
                }
                    }*/
                    interfaceMap[ cell.getIndex() ] = true;
                    interfaceMap[ e ] = true;
            }*/
                }
             }
          }
      }
}
@@ -190,7 +214,7 @@ updateCell( MeshFunctionType& u,
   const RealType& hx = mesh.getSpaceSteps().x();
   const RealType& hy = mesh.getSpaceSteps().y();
   const RealType value = u( cell );
   RealType a, b, tmp1, tmp = TypeInfo< RealType >::getMaxValue();
   RealType a, b, tmp = TypeInfo< RealType >::getMaxValue();
   
   if( cell.getCoordinates().x() == 0 )
      a = u[ neighborEntities.template getEntityIndex< 1,  0 >() ];
@@ -538,7 +562,7 @@ T1 meet2DCondition( T1 a, T1 b, const T2 ha, const T2 hb, const T1 value, double
}

template < typename T1 >
void sortMinims( T1 pom[])
__cuda_callable__ void sortMinims( T1 pom[])
{
    T1 tmp[6] = {0.0,0.0,0.0,0.0,0.0,0.0}; 
    if( fabs(pom[0]) <= fabs(pom[1]) && fabs(pom[1]) <= fabs(pom[2])){
@@ -572,3 +596,74 @@ void sortMinims( T1 pom[])
        pom[ i ] = tmp[ i ];
    }   
}

template < typename Real, typename Device, typename Index >
__global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& input, 
                                Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& output,
                                Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap ) 
{
    int i = threadIdx.x + blockDim.x*blockIdx.x;
    int j = blockDim.y*blockIdx.y + threadIdx.y;
    const Meshes::Grid< 2, Real, Device, Index >& mesh = input.getMesh();
    
    if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() )
    {
        typedef typename Meshes::Grid< 2, Real, Device, Index >::Cell Cell;
        Cell cell( mesh );
        cell.getCoordinates().x() = i; cell.getCoordinates().y() = j;
        cell.refresh();


        output[ cell.getIndex() ] =
               input( cell ) >= 0 ? TypeInfo< Real >::getMaxValue() :
                                    - TypeInfo< Real >::getMaxValue();
        interfaceMap[ cell.getIndex() ] = false; 

        const Real& hx = mesh.getSpaceSteps().x();
        const Real& hy = mesh.getSpaceSteps().y();
        cell.refresh();
        const Real& c = input( cell );
        if( ! cell.isBoundaryEntity()  )
        {
           auto neighbors = cell.getNeighborEntities();
           Real pom = 0;
           const Index e = neighbors.template getEntityIndex<  1,  0 >();
           const Index w = neighbors.template getEntityIndex<  -1,  0 >();
           const Index n = neighbors.template getEntityIndex<  0,  1 >();
           const Index s = neighbors.template getEntityIndex<  0,  -1 >();
          
           if( c * input[ n ] <= 0 )
           {
               pom = TNL::sign( c )*( hy * c )/( c - input[ n ]);
               if( TNL::abs( output[ cell.getIndex() ] ) > TNL::abs( pom ) ) 
                   output[ cell.getIndex() ] = pom;

               interfaceMap[ cell.getIndex() ] = true;
           }
           if( c * input[ e ] <= 0 )
           {
               pom = TNL::sign( c )*( hx * c )/( c - input[ e ]);
               if( TNL::abs( output[ cell.getIndex() ] ) > TNL::abs( pom ) )
                   output[ cell.getIndex() ] = pom;                       

               interfaceMap[ cell.getIndex() ] = true;
           }
           if( c * input[ w ] <= 0 )
           {
               pom = TNL::sign( c )*( hx * c )/( c - input[ w ]);
               if( TNL::abs( output[ cell.getIndex() ] ) > TNL::abs( pom ) ) 
                   output[ cell.getIndex() ] = pom;

               interfaceMap[ cell.getIndex() ] = true;
           }
           if( c * input[ s ] <= 0 )
           {
               pom = TNL::sign( c )*( hy * c )/( c - input[ s ]);
               if( TNL::abs( output[ cell.getIndex() ] ) > TNL::abs( pom ) ) 
                   output[ cell.getIndex() ] = pom;

               interfaceMap[ cell.getIndex() ] = true;
           }
        }
    }
}
 No newline at end of file
+47 −2
Original line number Diff line number Diff line
@@ -64,7 +64,17 @@ solve( const MeshPointer& mesh,
   interfaceMap.setMesh( mesh );
   std::cout << "Initiating the interface cells ..." << std::endl;
   BaseType::initInterface( u, aux, interfaceMap );
   
   //if( std::is_same< DeviceType, Devices::Cuda >::value )
   //{
   //    Functions::MeshFunction< Meshes::Grid< 2, Real, TNL::Devices::Host, Index > > h_aux;
       //cudaMemcpy( h_aux, aux, sizeof(MeshFunctionType), cudaMemcpyDeviceToHost );
       //h_aux->save("aux-init-cuda.tnl");
   //}
   //if( std::is_same< DeviceType, Devices::Host >::value )
   {
       aux.save( "aux-ini.tnl" );
   }

   typename MeshType::Cell cell( *mesh );
   
@@ -207,10 +217,45 @@ solve( const MeshPointer& mesh,
      if( std::is_same< DeviceType, Devices::Cuda >::value )
      {
         // TODO: CUDA code
      }
          int numBlocks = 2;
          int threadsPerBlock;
          if( mesh->getDimensions().x() >= mesh->getDimensions().y() )
               threadsPerBlock = (int)( mesh->getDimensions().x() );
          else
               threadsPerBlock = (int)( mesh->getDimensions().y() );
          
          CudaUpdateCellCaller< Real, Device, Index ><<< numBlocks, threadsPerBlock >>>( interfaceMap, aux );
          cudaDeviceSynchronize(); //copak dela?
      }
      iteration++;
   }
   aux.save("aux-final.tnl");
}

template < typename Real, typename Device, typename Index >
__global__ void CudaUpdateCellCaller( Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap,
                                      Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux )
{
    int i = threadIdx.x + blockDim.x*blockIdx.x;
    int j = threadIdx.y + blockDim.y*blockIdx.y;
    const Meshes::Grid< 2, Real, Device, Index >& mesh = aux.getMesh();
    
    if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() )
    {
        //make cell of aux from index
        typedef typename Meshes::Grid< 2, Real, Device, Index >::Cell Cell;
        Cell cell( mesh );
        cell.getCoordinates().x() = i; cell.getCoordinates().y() = j;
        cell.refresh();

        //update cell value few times 
        //for( int i = 0; i < mesh.getDimensions() ; i++ )
        //{
            cell.refresh();
            if( ! interfaceMap( cell ) )
            {
        //        tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >::updateCell( aux, cell );
            }
        //}
    }
}