Merge branch 'TO/reduction' into 'develop' (e465a5f3) · Commits · TNL / tnl-dev

Documentation/Examples/Containers/CMakeLists.txt

+20 −0

Original line number	Diff line number	Diff line
		@@ -8,18 +8,38 @@ set( COMMON_EXAMPLES
		VectorExample
		)

		set( MPI_COMMON_EXAMPLES
		DistributedArrayExample
		)

		SET( mpi_test_parameters -np 4 -H localhost:4 "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/DistributedArrayTest${CMAKE_EXECUTABLE_SUFFIX}" )

		if( BUILD_CUDA )
		foreach( target IN ITEMS ${COMMON_EXAMPLES} )
		cuda_add_executable( ${target}-cuda ${target}.cu OPTIONS )
		add_custom_command( COMMAND ${target}-cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/${target}.out OUTPUT ${target}.out )
		set( CUDA_OUTPUTS ${CUDA_OUTPUTS} ${target}.out )
		endforeach()
		if( ${BUILD_MPI} )
		foreach( target IN ITEMS ${MPI_COMMON_EXAMPLES} )
		cuda_add_executable( ${target}-cuda ${target}.cu OPTIONS )
		add_custom_command( COMMAND "mpirun" ${mpi_test_parameters} ${target}-cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/${target}.out OUTPUT ${target}.out )
		set( CUDA_OUTPUTS ${CUDA_OUTPUTS} ${target}.out )
		endforeach()
		endif( ${BUILD_MPI} )
		else()
		foreach( target IN ITEMS ${HOST_EXAMPLES} )
		add_executable( ${target} ${target}.cpp )
		add_custom_command( COMMAND ${target} > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/${target}.out OUTPUT ${target}.out )
		set( HOST_OUTPUTS ${HOST_OUTPUTS} ${target}.out )
		endforeach()
		if( ${BUILD_MPI} )
		foreach( target IN ITEMS ${MPI_COMMON_EXAMPLES} )
		add_executable( ${target} ${target}.cpp )
		add_custom_command( COMMAND "mpirun" ${mpi_test_parameters} ${target}-cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/${target}.out OUTPUT ${target}.out )
		set( HOST_OUTPUTS ${CUDA_OUTPUTS} ${target}.out )
		endforeach( ${BUILD_MPI} )
		endif()
		endif()

		IF( BUILD_CUDA )

Documentation/Examples/Containers/DistributedArrayExample.cpp

0 → 100644

+48 −0

Original line number	Diff line number	Diff line
		#include <iostream>
		#include <TNL/Containers/Partitioner.h>
		#include <TNL/Containers/DistributedArray.h>
		#include <TNL/MPI/ScopedInitializer.h>

		using namespace TNL;
		using namespace std;

		/***
		* The following works for any device (CPU, GPU ...).
		*/
		template< typename Device >
		void distributedArrayExample()
		{
		using ArrayType = Containers::DistributedArray< int, Device >;
		using LocalArrayType = Containers::Array< int, Device >;
		using IndexType = typename ArrayType::IndexType;
		using LocalRangeType = typename ArrayType::LocalRangeType;

		const MPI_Comm group = TNL::MPI::AllGroup();
		//const int rank = TNL::MPI::GetRank(group);
		const int nproc = TNL::MPI::GetSize(group);

		/***
		* We set size to prime number to force non-uniform distribution of the distributed array.
		*/
		const int size = 97;
		const int ghosts = (nproc > 1) ? 4 : 0;

		const LocalRangeType localRange = Containers::Partitioner< IndexType >::splitRange( size, group );
		ArrayType a( localRange, ghosts, size, group );
		a.forElements( 0, size, [=] __cuda_callable__ ( const int idx, int& value ) { value = idx; } );
		//LocalArrayType localArray = a;
		//std::cout << a << std::endl;

		}

		int main( int argc, char* argv[] )
		{
		TNL::MPI::ScopedInitializer mpi(argc, argv);

		std::cout << "The first test runs on CPU ..." << std::endl;
		distributedArrayExample< Devices::Host >();
		#ifdef HAVE_CUDA
		std::cout << "The second test runs on GPU ..." << std::endl;
		distributedArrayExample< Devices::Cuda >();
		#endif
		}

Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_getConstRow.cpp

+1 −1

Original line number	Diff line number	Diff line
		@@ -36,7 +36,7 @@ void getRowExample()
		/***
		* Compute the matrix trace.
		*/
		int trace = TNL::Algorithms::Reduction< Device >::reduce( 0, matrix->getRows(), fetch, std::plus<>{}, 0 );
		int trace = TNL::Algorithms::reduce< Device >( 0, matrix->getRows(), fetch, std::plus<>{}, 0 );
		std::cout << "Matrix trace is " << trace << "." << std::endl;
		}

Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getConstRow.cpp

+1 −1

Original line number	Diff line number	Diff line
		@@ -29,7 +29,7 @@ void getRowExample()
		return row.getValue( rowIdx );
		};

		int trace = TNL::Algorithms::Reduction< Device >::reduce( 0, matrix.getRows(), fetch, std::plus<>{}, 0 );
		int trace = TNL::Algorithms::reduce< Device >( 0, matrix.getRows(), fetch, std::plus<>{}, 0 );
		std::cout << "Matrix trace is " << trace << "." << std::endl;
		}

Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getConstRow.cpp

+1 −1

Original line number	Diff line number	Diff line
		@@ -41,7 +41,7 @@ void getRowExample()
		/***
		* Compute the matrix trace.
		*/
		int trace = TNL::Algorithms::Reduction< Device >::reduce( 0, matrix->getRows(), fetch, std::plus<>{}, 0 );
		int trace = TNL::Algorithms::reduce< Device >( 0, matrix->getRows(), fetch, std::plus<>{}, 0 );
		std::cout << "Matrix reads as: " << std::endl << *matrix << std::endl;
		std::cout << "Matrix trace is: " << trace << "." << std::endl;
		}