Introduciton of CUDA-aware-MPI support - basic grid-meshfunction works for 1D,2D a 3D (5dfb1c1d) · Commits · TNL / tnl-dev

src/TNL/Communicators/MpiCommunicator.h

+16 −16

Original line number	Diff line number	Diff line
		@@ -21,17 +21,17 @@ namespace Communicators {
		{

		private:
		inline static MPI_Datatype MPIDataType( const signed char ) { return MPI_CHAR; };
		inline static MPI_Datatype MPIDataType( const signed short int ) { return MPI_SHORT; };
		inline static MPI_Datatype MPIDataType( const signed int ) { return MPI_INT; };
		inline static MPI_Datatype MPIDataType( const signed long int ) { return MPI_LONG; };
		inline static MPI_Datatype MPIDataType( const unsigned char ) { return MPI_UNSIGNED_CHAR; };
		inline static MPI_Datatype MPIDataType( const unsigned short int ) { return MPI_UNSIGNED_SHORT; };
		inline static MPI_Datatype MPIDataType( const unsigned int ) { return MPI_UNSIGNED; };
		inline static MPI_Datatype MPIDataType( const unsigned long int ) { return MPI_UNSIGNED_LONG; };
		inline static MPI_Datatype MPIDataType( const float ) { return MPI_FLOAT; };
		inline static MPI_Datatype MPIDataType( const double ) { return MPI_DOUBLE; };
		inline static MPI_Datatype MPIDataType( const long double ) { return MPI_LONG_DOUBLE; };
		inline static MPI_Datatype MPIDataType( const signed char* ) { return MPI_CHAR; };
		inline static MPI_Datatype MPIDataType( const signed short int* ) { return MPI_SHORT; };
		inline static MPI_Datatype MPIDataType( const signed int* ) { return MPI_INT; };
		inline static MPI_Datatype MPIDataType( const signed long int* ) { return MPI_LONG; };
		inline static MPI_Datatype MPIDataType( const unsigned char *) { return MPI_UNSIGNED_CHAR; };
		inline static MPI_Datatype MPIDataType( const unsigned short int* ) { return MPI_UNSIGNED_SHORT; };
		inline static MPI_Datatype MPIDataType( const unsigned int* ) { return MPI_UNSIGNED; };
		inline static MPI_Datatype MPIDataType( const unsigned long int* ) { return MPI_UNSIGNED_LONG; };
		inline static MPI_Datatype MPIDataType( const float* ) { return MPI_FLOAT; };
		inline static MPI_Datatype MPIDataType( const double* ) { return MPI_DOUBLE; };
		inline static MPI_Datatype MPIDataType( const long double* ) { return MPI_LONG_DOUBLE; };

		public:

		@@ -80,14 +80,14 @@ namespace Communicators {
		template <typename T>
		static Request ISend( const T *data, int count, int dest)
		{
		return MPI::COMM_WORLD.Isend((void) data, count, MPIDataType(data) , dest, 0);
		};
		return MPI::COMM_WORLD.Isend((void*) data, count, MPIDataType(data) , dest, 0);
		}

		template <typename T>
		static Request IRecv( const T *data, int count, int src)
		{
		return MPI::COMM_WORLD.Irecv((void) data, count, MPIDataType(data) , src, 0);
		};
		return MPI::COMM_WORLD.Irecv((void*) data, count, MPIDataType(data) , src, 0);
		}

		static void WaitAll(Request *reqs, int length)
		{
		@@ -98,7 +98,7 @@ namespace Communicators {
		static void Bcast( T& data, int count, int root)
		{
		MPI::COMM_WORLD.Bcast((void*) &data, count, MPIDataType(data), root);
		};
		}

		/* template< typename T >
		static void Allreduce( T& data,

src/TNL/Communicators/NoDistrCommunicator.h

+3 −3

Original line number	Diff line number	Diff line
		@@ -62,13 +62,13 @@ namespace Communicators {
		static Request ISend( const T *data, int count, int dest)
		{
		return 1;
		};
		}

		template <typename T>
		static Request IRecv( const T *data, int count, int src)
		{
		return 1;
		};
		}

		static void WaitAll(Request *reqs, int length)
		{
		@@ -77,7 +77,7 @@ namespace Communicators {
		template< typename T >
		static void Bcast( T& data, int count, int root)
		{
		};
		}

		/* template< typename T >
		static void Allreduce( T& data,

src/TNL/Containers/Array_impl.h

+1 −0

Original line number	Diff line number	Diff line
		@@ -547,6 +547,7 @@ boundLoad( File& file )
		return true;
		}


		template< typename Element,
		typename Device,
		typename Index >

src/TNL/Meshes/DistributedMeshes/BufferEntitiesHelper.h

0 → 100644

+124 −0

Original line number	Diff line number	Diff line
		/***************************************************************************
		BufferEntittiesHelper.h - description
		-------------------
		begin : March 1, 2018
		copyright : (C) 2018 by Tomas Oberhuber
		email : tomas.oberhuber@fjfi.cvut.cz
		***************************************************************************/

		/* See Copyright Notice in tnl/Copyright */

		#pragma once

		#include <TNL/Devices/Host.h>
		#include <TNL/Devices/Cuda.h>
		#include <TNL/ParallelFor.h>

		namespace TNL {
		namespace Meshes {
		namespace DistributedMeshes {


		template < typename MeshFunctionType,
		int dim,
		typename RealType,
		typename Device >
		class BufferEntitiesHelper
		{
		};

		//======================================== 1D ====================================================

		//host
		template < typename MeshFunctionType, typename RealType, typename Device >
		class BufferEntitiesHelper<MeshFunctionType,1,RealType,Device>
		{
		public:
		static void BufferEntities(MeshFunctionType meshFunction, RealType * buffer, int beginx, int sizex,bool tobuffer)
		{
		auto mesh = meshFunction.getMesh();
		RealType* meshFunctionData = meshFunction.getData().getData();
		auto kernel = [tobuffer, mesh, buffer, meshFunctionData, beginx] __cuda_callable__ ( int j )
		{
		typename MeshFunctionType::MeshType::Cell entity(mesh);
		entity.getCoordinates().x()=beginx+j;
		entity.refresh();
		if(tobuffer)
		buffer[j]=meshFunctionData[entity.getIndex()];
		else
		meshFunctionData[entity.getIndex()]=buffer[j];
		};
		ParallelFor< Device >::exec( 0, sizex, kernel );
		};
		};


		//======================================== 2D ====================================================
		template <typename MeshFunctionType, typename RealType, typename Device >
		class BufferEntitiesHelper<MeshFunctionType,2,RealType,Device>
		{
		public:
		static void BufferEntities(MeshFunctionType meshFunction, RealType * buffer, int beginx, int beginy, int sizex, int sizey,bool tobuffer)
		{
		auto mesh=meshFunction.getMesh();
		RealType *meshFunctionData=meshFunction.getData().getData();
		auto kernel = [tobuffer, mesh, buffer, meshFunctionData, beginx, sizex, beginy] __cuda_callable__ ( int i, int j )
		{
		typename MeshFunctionType::MeshType::Cell entity(mesh);
		entity.getCoordinates().x()=beginx+j;
		entity.getCoordinates().y()=beginy+i;
		entity.refresh();
		if(tobuffer)
		buffer[i*sizex+j]=meshFunctionData[entity.getIndex()];
		else
		meshFunctionData[entity.getIndex()]=buffer[i*sizex+j];
		};

		ParallelFor2D< Device >::exec( 0, 0, sizey, sizex, kernel );

		};
		};


		//======================================== 3D ====================================================
		template <typename MeshFunctionType, typename RealType, typename Device>
		class BufferEntitiesHelper<MeshFunctionType,3,RealType,Device>
		{
		public:
		static void BufferEntities(MeshFunctionType meshFunction, RealType * buffer, int beginx, int beginy, int beginz, int sizex, int sizey, int sizez, bool tobuffer)
		{

		auto mesh=meshFunction.getMesh();
		RealType * meshFunctionData=meshFunction.getData().getData();
		auto kernel = [tobuffer, mesh, buffer, meshFunctionData, beginx, sizex, beginy, sizey, beginz] __cuda_callable__ ( int k, int i, int j )
		{
		typename MeshFunctionType::MeshType::Cell entity(mesh);
		entity.getCoordinates().x()=beginx+j;
		entity.getCoordinates().z()=beginz+k;
		entity.getCoordinates().y()=beginy+i;
		entity.refresh();
		if(tobuffer)
		buffer[ksizexsizey+i*sizex+j]=meshFunctionData[entity.getIndex()];
		else
		meshFunctionData[entity.getIndex()]=buffer[ksizexsizey+i*sizex+j];
		};

		ParallelFor3D< Device >::exec( 0, 0, 0, sizez, sizey, sizex, kernel );

		/*for(int k=0;k<sizez;k++)
		{
		for(int i=0;i<sizey;i++)
		{
		for(int j=0;j<sizex;j++)
		{
		kernel(k,i,j);
		}
		}
		}*/
		};
		};


		} // namespace DistributedMeshes
		} // namespace Meshes
		} // namespace TNL

src/TNL/Meshes/DistributedMeshes/DistributedGridSynchronizer.h

+88 −217

File changed.

Preview size limit exceeded, changes collapsed.