Commit db36cd42 authored by Matouš Fencl's avatar Matouš Fencl
Browse files

CUDA init problem not solved

parent 092c2538
Loading
Loading
Loading
Loading
+6 −2
Original line number Diff line number Diff line
@@ -104,13 +104,17 @@ template < typename T1 >
__cuda_callable__ void sortMinims( T1 pom[] );


#ifdef HAVE_CUDA
template < typename Real, typename Device, typename Index >
__global__ void CudaUpdateCellCaller( Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap,
                                      Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux );

template < typename Real, typename Device, typename Index >
/*template < typename Real, typename Device, typename Index >
__global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& input, 
                                Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& output,
                                Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap );
                                Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap );*/

__global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, double, TNL::Devices::Cuda, int > >& input );
#endif

#include "tnlDirectEikonalMethodsBase_impl.h"
+21 −7
Original line number Diff line number Diff line
@@ -83,25 +83,30 @@ initInterface( const MeshFunctionType& input,
     * overit is_same device
     * na kazdy bod jedno cuda vlakno
     */
    const MeshType& mesh = input.getMesh();
    typedef typename MeshType::Cell Cell;
    Cell cell( mesh );
    
        
    if( std::is_same< Device, Devices::Cuda >::value )
    {
#ifdef HAVE_CUDA
        const MeshType& mesh = input.getMesh();
        
        const int cudaBlockSize( 16 );
        int numBlocksX = Devices::Cuda::getNumberOfBlocks( mesh.getDimensions().x(), cudaBlockSize );
        int numBlocksY = Devices::Cuda::getNumberOfBlocks( mesh.getDimensions().y(), cudaBlockSize );
        dim3 blockSize( cudaBlockSize, cudaBlockSize );
        dim3 gridSize( numBlocksX, numBlocksY );
        Devices::Cuda::synchronizeDevice();
        CudaInitCaller< Real, Device, Index ><<< gridSize, blockSize >>>( input, output, interfaceMap );
        //CudaInitCaller< Real, Device, Index ><<< gridSize, blockSize >>>( input, output, interfaceMap );
        CudaInitCaller<<< gridSize, blockSize >>>( input );
        cudaDeviceSynchronize();
        TNL_CHECK_CUDA_DEVICE;
#endif
    }
    if( std::is_same< Device, Devices::Host >::value )
    {
        const MeshType& mesh = input.getMesh();
        typedef typename MeshType::Cell Cell;
        Cell cell( mesh );
        for( cell.getCoordinates().y() = 0;
             cell.getCoordinates().y() < mesh.getDimensions().y();
             cell.getCoordinates().y() ++ )
@@ -597,7 +602,7 @@ __cuda_callable__ void sortMinims( T1 pom[])
    }   
}

template < typename Real, typename Device, typename Index >
/*template < typename Real, typename Device, typename Index >
__global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& input, 
                                Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& output,
                                Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap ) 
@@ -606,7 +611,7 @@ __global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2,
    int j = blockDim.y*blockIdx.y + threadIdx.y;
    const Meshes::Grid< 2, Real, Device, Index >& mesh = input.getMesh();
    
    if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() )
    //if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() )
    {
        typedef typename Meshes::Grid< 2, Real, Device, Index >::Cell Cell;
        Cell cell( mesh );
@@ -666,4 +671,13 @@ __global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2,
           }
        }
    }
}*/


__global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, double, TNL::Devices::Cuda, int > >& input )
{
    int i = threadIdx.x + blockDim.x*blockIdx.x;
    int j = blockDim.y*blockIdx.y + threadIdx.y;
    //const Meshes::Grid< 2, double, TNL::Devices::Cuda, int >& mesh = input.getMesh();
    
}
+7 −13
Original line number Diff line number Diff line
@@ -64,17 +64,9 @@ solve( const MeshPointer& mesh,
   interfaceMap.setMesh( mesh );
   std::cout << "Initiating the interface cells ..." << std::endl;
   BaseType::initInterface( u, aux, interfaceMap );
   cudaDeviceSynchronize();
        
   //if( std::is_same< DeviceType, Devices::Cuda >::value )
   //{
   //    Functions::MeshFunction< Meshes::Grid< 2, Real, TNL::Devices::Host, Index > > h_aux;
       //cudaMemcpy( h_aux, aux, sizeof(MeshFunctionType), cudaMemcpyDeviceToHost );
       //h_aux->save("aux-init-cuda.tnl");
   //}
   //if( std::is_same< DeviceType, Devices::Host >::value )
   {
   aux.save( "aux-ini.tnl" );
   }

   typename MeshType::Cell cell( *mesh );
   
@@ -217,7 +209,8 @@ solve( const MeshPointer& mesh,
      if( std::is_same< DeviceType, Devices::Cuda >::value )
      {
         // TODO: CUDA code
          int numBlocks = 2;
#ifdef HAVE_CUDA
          /*int numBlocks = 2;
          int threadsPerBlock;
          if( mesh->getDimensions().x() >= mesh->getDimensions().y() )
               threadsPerBlock = (int)( mesh->getDimensions().x() );
@@ -225,7 +218,8 @@ solve( const MeshPointer& mesh,
               threadsPerBlock = (int)( mesh->getDimensions().y() );
          
          CudaUpdateCellCaller< Real, Device, Index ><<< numBlocks, threadsPerBlock >>>( interfaceMap, aux );
          cudaDeviceSynchronize(); //copak dela?
          cudaDeviceSynchronize(); //copak dela?*/
#endif
      }
      iteration++;
   }