Added example on distributed array. (433d612b) · Commits · TNL / tnl-dev

Documentation/Examples/Containers/CMakeLists.txt

+20 −0

Original line number	Diff line number	Diff line
		@@ -8,18 +8,38 @@ set( COMMON_EXAMPLES
		VectorExample
		)

		set( MPI_COMMON_EXAMPLES
		DistributedArrayExample
		)

		SET( mpi_test_parameters -np 4 -H localhost:4 "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/DistributedArrayTest${CMAKE_EXECUTABLE_SUFFIX}" )

		if( BUILD_CUDA )
		foreach( target IN ITEMS ${COMMON_EXAMPLES} )
		cuda_add_executable( ${target}-cuda ${target}.cu OPTIONS )
		add_custom_command( COMMAND ${target}-cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/${target}.out OUTPUT ${target}.out )
		set( CUDA_OUTPUTS ${CUDA_OUTPUTS} ${target}.out )
		endforeach()
		if( ${BUILD_MPI} )
		foreach( target IN ITEMS ${MPI_COMMON_EXAMPLES} )
		cuda_add_executable( ${target}-cuda ${target}.cu OPTIONS )
		add_custom_command( COMMAND "mpirun" ${mpi_test_parameters} ${target}-cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/${target}.out OUTPUT ${target}.out )
		set( CUDA_OUTPUTS ${CUDA_OUTPUTS} ${target}.out )
		endforeach()
		endif( ${BUILD_MPI} )
		else()
		foreach( target IN ITEMS ${HOST_EXAMPLES} )
		add_executable( ${target} ${target}.cpp )
		add_custom_command( COMMAND ${target} > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/${target}.out OUTPUT ${target}.out )
		set( HOST_OUTPUTS ${HOST_OUTPUTS} ${target}.out )
		endforeach()
		if( ${BUILD_MPI} )
		foreach( target IN ITEMS ${MPI_COMMON_EXAMPLES} )
		add_executable( ${target} ${target}.cpp )
		add_custom_command( COMMAND "mpirun" ${mpi_test_parameters} ${target}-cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/${target}.out OUTPUT ${target}.out )
		set( HOST_OUTPUTS ${CUDA_OUTPUTS} ${target}.out )
		endforeach( ${BUILD_MPI} )
		endif()
		endif()

		IF( BUILD_CUDA )

Documentation/Examples/Containers/DistributedArrayExample.cpp

0 → 100644

+44 −0

Original line number	Diff line number	Diff line
		#include <iostream>
		#include <TNL/Containers/Partitioner.h>
		#include <TNL/Containers/DistributedArray.h>
		#include <TNL/MPI/ScopedInitializer.h>

		using namespace TNL;
		using namespace std;

		/***
		* The following works for any device (CPU, GPU ...).
		*/
		template< typename Device >
		void distributedArrayExample()
		{
		using ArrayType = Containers::DistributedArray< int, Device >;
		using IndexType = typename ArrayType::IndexType;
		using LocalRangeType = typename ArrayType::LocalRangeType;

		const MPI_Comm group = TNL::MPI::AllGroup();
		const int rank = TNL::MPI::GetRank(group);
		const int nproc = TNL::MPI::GetSize(group);

		/***
		* We set size to prime number to force non-uniform distribution of the distributed array.
		*/
		const int size = 97;
		const int ghosts = (nproc > 1) ? 4 : 0;

		const LocalRangeType localRange = Containers::Partitioner< IndexType >::splitRange( size, group );
		ArrayType a( localRange, ghosts, size, group );

		}

		int main( int argc, char* argv[] )
		{
		TNL::MPI::ScopedInitializer mpi(argc, argv);

		std::cout << "The first test runs on CPU ..." << std::endl;
		distributedArrayExample< Devices::Host >();
		#ifdef HAVE_CUDA
		std::cout << "The second test runs on GPU ..." << std::endl;
		distributedArrayExample< Devices::Cuda >();
		#endif
		}