Commit 5dfb1c1d authored by Vít Hanousek's avatar Vít Hanousek
Browse files

Introduciton of CUDA-aware-MPI support - basic grid-meshfunction works for 1D,2D a 3D

parent 498019a4
Loading
Loading
Loading
Loading
+16 −16
Original line number Diff line number Diff line
@@ -21,17 +21,17 @@ namespace Communicators {
    {

        private:
        inline static MPI_Datatype MPIDataType( const signed char ) { return MPI_CHAR; };
        inline static MPI_Datatype MPIDataType( const signed short int ) { return MPI_SHORT; };
        inline static MPI_Datatype MPIDataType( const signed int ) { return MPI_INT; };
        inline static MPI_Datatype MPIDataType( const signed long int ) { return MPI_LONG; };
        inline static MPI_Datatype MPIDataType( const unsigned char ) { return MPI_UNSIGNED_CHAR; };
        inline static MPI_Datatype MPIDataType( const unsigned short int ) { return MPI_UNSIGNED_SHORT; };
        inline static MPI_Datatype MPIDataType( const unsigned int ) { return MPI_UNSIGNED; };
        inline static MPI_Datatype MPIDataType( const unsigned long int ) { return MPI_UNSIGNED_LONG; };
        inline static MPI_Datatype MPIDataType( const float ) { return MPI_FLOAT; };
        inline static MPI_Datatype MPIDataType( const double ) { return MPI_DOUBLE; };
        inline static MPI_Datatype MPIDataType( const long double ) { return MPI_LONG_DOUBLE; };
        inline static MPI_Datatype MPIDataType( const signed char* ) { return MPI_CHAR; };
        inline static MPI_Datatype MPIDataType( const signed short int* ) { return MPI_SHORT; };
        inline static MPI_Datatype MPIDataType( const signed int* ) { return MPI_INT; };
        inline static MPI_Datatype MPIDataType( const signed long int* ) { return MPI_LONG; };
        inline static MPI_Datatype MPIDataType( const unsigned char *) { return MPI_UNSIGNED_CHAR; };
        inline static MPI_Datatype MPIDataType( const unsigned short int* ) { return MPI_UNSIGNED_SHORT; };
        inline static MPI_Datatype MPIDataType( const unsigned int* ) { return MPI_UNSIGNED; };
        inline static MPI_Datatype MPIDataType( const unsigned long int* ) { return MPI_UNSIGNED_LONG; };
        inline static MPI_Datatype MPIDataType( const float* ) { return MPI_FLOAT; };
        inline static MPI_Datatype MPIDataType( const double* ) { return MPI_DOUBLE; };
        inline static MPI_Datatype MPIDataType( const long double* ) { return MPI_LONG_DOUBLE; };
        
        public:

@@ -80,14 +80,14 @@ namespace Communicators {
        template <typename T>
        static Request ISend( const T *data, int count, int dest)
        {
                return MPI::COMM_WORLD.Isend((void*) data, count, MPIDataType(*data) , dest, 0);
        };    
                return MPI::COMM_WORLD.Isend((void*) data, count, MPIDataType(data) , dest, 0);
        }    

        template <typename T>
        static Request IRecv( const T *data, int count, int src)
        {
                return MPI::COMM_WORLD.Irecv((void*) data, count, MPIDataType(*data) , src, 0);
        };
                return MPI::COMM_WORLD.Irecv((void*) data, count, MPIDataType(data) , src, 0);
        }

        static void WaitAll(Request *reqs, int length)
        {
@@ -98,7 +98,7 @@ namespace Communicators {
        static void Bcast(  T& data, int count, int root)
        {
                MPI::COMM_WORLD.Bcast((void*) &data, count,  MPIDataType(data), root);
        };
        }

      /*  template< typename T >
        static void Allreduce( T& data,
+3 −3
Original line number Diff line number Diff line
@@ -62,13 +62,13 @@ namespace Communicators {
        static Request ISend( const T *data, int count, int dest)
        {
            return 1;
        };    
        }    

        template <typename T>
        static Request IRecv( const T *data, int count, int src)
        {
            return 1;
        };
        }

        static void WaitAll(Request *reqs, int length)
        {
@@ -77,7 +77,7 @@ namespace Communicators {
        template< typename T > 
        static void Bcast(  T& data, int count, int root)
        {
        };
        }

       /* template< typename T >
        static void Allreduce( T& data,
+1 −0
Original line number Diff line number Diff line
@@ -547,6 +547,7 @@ boundLoad( File& file )
   return true;
}


template< typename Element,
          typename Device,
          typename Index >
+124 −0
Original line number Diff line number Diff line
/***************************************************************************
                          BufferEntittiesHelper.h  -  description
                             -------------------
    begin                : March 1, 2018
    copyright            : (C) 2018 by Tomas Oberhuber
    email                : tomas.oberhuber@fjfi.cvut.cz
 ***************************************************************************/

/* See Copyright Notice in tnl/Copyright */

#pragma once

#include <TNL/Devices/Host.h>
#include <TNL/Devices/Cuda.h>
#include <TNL/ParallelFor.h>

namespace TNL {
namespace Meshes { 
namespace DistributedMeshes { 


template < typename MeshFunctionType,
           int dim,
           typename RealType,
           typename Device >
class BufferEntitiesHelper
{
};

//======================================== 1D ====================================================

//host
template < typename MeshFunctionType, typename RealType, typename Device >
class BufferEntitiesHelper<MeshFunctionType,1,RealType,Device>
{
    public:
    static void BufferEntities(MeshFunctionType meshFunction, RealType * buffer, int beginx, int sizex,bool tobuffer)
    {
        auto mesh = meshFunction.getMesh();
        RealType* meshFunctionData = meshFunction.getData().getData();
        auto kernel = [tobuffer, mesh, buffer, meshFunctionData, beginx] __cuda_callable__ ( int j )
        {
            typename MeshFunctionType::MeshType::Cell entity(mesh);
            entity.getCoordinates().x()=beginx+j;
            entity.refresh();
            if(tobuffer)
                buffer[j]=meshFunctionData[entity.getIndex()];
            else
                meshFunctionData[entity.getIndex()]=buffer[j];
        };
        ParallelFor< Device >::exec( 0, sizex, kernel );
    };  
};


//======================================== 2D ====================================================
template <typename MeshFunctionType, typename RealType, typename Device > 
class BufferEntitiesHelper<MeshFunctionType,2,RealType,Device>
{
    public:
    static void BufferEntities(MeshFunctionType meshFunction, RealType * buffer, int beginx, int beginy, int sizex, int sizey,bool tobuffer)
    {
        auto mesh=meshFunction.getMesh();
        RealType *meshFunctionData=meshFunction.getData().getData();
        auto kernel = [tobuffer, mesh, buffer, meshFunctionData, beginx, sizex, beginy] __cuda_callable__ ( int i, int j )
        {
            typename MeshFunctionType::MeshType::Cell entity(mesh);
            entity.getCoordinates().x()=beginx+j;
            entity.getCoordinates().y()=beginy+i;				
            entity.refresh();
            if(tobuffer)
                    buffer[i*sizex+j]=meshFunctionData[entity.getIndex()];
            else
                    meshFunctionData[entity.getIndex()]=buffer[i*sizex+j];
        };
        
        ParallelFor2D< Device >::exec( 0, 0, sizey, sizex, kernel );       
        
    };
};


//======================================== 3D ====================================================
template <typename MeshFunctionType, typename RealType, typename Device>
class BufferEntitiesHelper<MeshFunctionType,3,RealType,Device>
{
    public:
    static void BufferEntities(MeshFunctionType meshFunction, RealType * buffer, int beginx, int beginy, int beginz, int sizex, int sizey, int sizez, bool tobuffer)
    {

        auto mesh=meshFunction.getMesh();
        RealType * meshFunctionData=meshFunction.getData().getData();
        auto kernel = [tobuffer, mesh, buffer, meshFunctionData, beginx, sizex, beginy, sizey, beginz] __cuda_callable__ ( int k, int i, int j )
        {
            typename MeshFunctionType::MeshType::Cell entity(mesh);
            entity.getCoordinates().x()=beginx+j;
            entity.getCoordinates().z()=beginz+k;
            entity.getCoordinates().y()=beginy+i;
            entity.refresh();
            if(tobuffer)
                    buffer[k*sizex*sizey+i*sizex+j]=meshFunctionData[entity.getIndex()];
            else
                    meshFunctionData[entity.getIndex()]=buffer[k*sizex*sizey+i*sizex+j];
        };

        ParallelFor3D< Device >::exec( 0, 0, 0, sizez, sizey, sizex, kernel ); 

        /*for(int k=0;k<sizez;k++)
        {
            for(int i=0;i<sizey;i++)
            {
                for(int j=0;j<sizex;j++)
                {
                        kernel(k,i,j);
                }
            }
        }*/
    };
};


} // namespace DistributedMeshes
} // namespace Meshes
} // namespace TNL
+88 −217

File changed.

Preview size limit exceeded, changes collapsed.

Loading