Loading Documentation/Examples/Containers/CMakeLists.txt +20 −0 Original line number Diff line number Diff line Loading @@ -8,18 +8,38 @@ set( COMMON_EXAMPLES VectorExample ) set( MPI_COMMON_EXAMPLES DistributedArrayExample ) SET( mpi_test_parameters -np 4 -H localhost:4 "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/DistributedArrayTest${CMAKE_EXECUTABLE_SUFFIX}" ) if( BUILD_CUDA ) foreach( target IN ITEMS ${COMMON_EXAMPLES} ) cuda_add_executable( ${target}-cuda ${target}.cu OPTIONS ) add_custom_command( COMMAND ${target}-cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/${target}.out OUTPUT ${target}.out ) set( CUDA_OUTPUTS ${CUDA_OUTPUTS} ${target}.out ) endforeach() if( ${BUILD_MPI} ) foreach( target IN ITEMS ${MPI_COMMON_EXAMPLES} ) cuda_add_executable( ${target}-cuda ${target}.cu OPTIONS ) add_custom_command( COMMAND "mpirun" ${mpi_test_parameters} ${target}-cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/${target}.out OUTPUT ${target}.out ) set( CUDA_OUTPUTS ${CUDA_OUTPUTS} ${target}.out ) endforeach() endif( ${BUILD_MPI} ) else() foreach( target IN ITEMS ${HOST_EXAMPLES} ) add_executable( ${target} ${target}.cpp ) add_custom_command( COMMAND ${target} > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/${target}.out OUTPUT ${target}.out ) set( HOST_OUTPUTS ${HOST_OUTPUTS} ${target}.out ) endforeach() if( ${BUILD_MPI} ) foreach( target IN ITEMS ${MPI_COMMON_EXAMPLES} ) add_executable( ${target} ${target}.cpp ) add_custom_command( COMMAND "mpirun" ${mpi_test_parameters} ${target}-cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/${target}.out OUTPUT ${target}.out ) set( HOST_OUTPUTS ${CUDA_OUTPUTS} ${target}.out ) endforeach( ${BUILD_MPI} ) endif() endif() IF( BUILD_CUDA ) Loading Documentation/Examples/Containers/DistributedArrayExample.cpp 0 → 100644 +44 −0 Original line number Diff line number Diff line #include <iostream> #include <TNL/Containers/Partitioner.h> #include <TNL/Containers/DistributedArray.h> #include <TNL/MPI/ScopedInitializer.h> using namespace TNL; using namespace std; /*** * The following works for any device (CPU, GPU ...). */ template< typename Device > void distributedArrayExample() { using ArrayType = Containers::DistributedArray< int, Device >; using IndexType = typename ArrayType::IndexType; using LocalRangeType = typename ArrayType::LocalRangeType; const MPI_Comm group = TNL::MPI::AllGroup(); const int rank = TNL::MPI::GetRank(group); const int nproc = TNL::MPI::GetSize(group); /*** * We set size to prime number to force non-uniform distribution of the distributed array. */ const int size = 97; const int ghosts = (nproc > 1) ? 4 : 0; const LocalRangeType localRange = Containers::Partitioner< IndexType >::splitRange( size, group ); ArrayType a( localRange, ghosts, size, group ); } int main( int argc, char* argv[] ) { TNL::MPI::ScopedInitializer mpi(argc, argv); std::cout << "The first test runs on CPU ..." << std::endl; distributedArrayExample< Devices::Host >(); #ifdef HAVE_CUDA std::cout << "The second test runs on GPU ..." << std::endl; distributedArrayExample< Devices::Cuda >(); #endif } Loading
Documentation/Examples/Containers/CMakeLists.txt +20 −0 Original line number Diff line number Diff line Loading @@ -8,18 +8,38 @@ set( COMMON_EXAMPLES VectorExample ) set( MPI_COMMON_EXAMPLES DistributedArrayExample ) SET( mpi_test_parameters -np 4 -H localhost:4 "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/DistributedArrayTest${CMAKE_EXECUTABLE_SUFFIX}" ) if( BUILD_CUDA ) foreach( target IN ITEMS ${COMMON_EXAMPLES} ) cuda_add_executable( ${target}-cuda ${target}.cu OPTIONS ) add_custom_command( COMMAND ${target}-cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/${target}.out OUTPUT ${target}.out ) set( CUDA_OUTPUTS ${CUDA_OUTPUTS} ${target}.out ) endforeach() if( ${BUILD_MPI} ) foreach( target IN ITEMS ${MPI_COMMON_EXAMPLES} ) cuda_add_executable( ${target}-cuda ${target}.cu OPTIONS ) add_custom_command( COMMAND "mpirun" ${mpi_test_parameters} ${target}-cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/${target}.out OUTPUT ${target}.out ) set( CUDA_OUTPUTS ${CUDA_OUTPUTS} ${target}.out ) endforeach() endif( ${BUILD_MPI} ) else() foreach( target IN ITEMS ${HOST_EXAMPLES} ) add_executable( ${target} ${target}.cpp ) add_custom_command( COMMAND ${target} > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/${target}.out OUTPUT ${target}.out ) set( HOST_OUTPUTS ${HOST_OUTPUTS} ${target}.out ) endforeach() if( ${BUILD_MPI} ) foreach( target IN ITEMS ${MPI_COMMON_EXAMPLES} ) add_executable( ${target} ${target}.cpp ) add_custom_command( COMMAND "mpirun" ${mpi_test_parameters} ${target}-cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/${target}.out OUTPUT ${target}.out ) set( HOST_OUTPUTS ${CUDA_OUTPUTS} ${target}.out ) endforeach( ${BUILD_MPI} ) endif() endif() IF( BUILD_CUDA ) Loading
Documentation/Examples/Containers/DistributedArrayExample.cpp 0 → 100644 +44 −0 Original line number Diff line number Diff line #include <iostream> #include <TNL/Containers/Partitioner.h> #include <TNL/Containers/DistributedArray.h> #include <TNL/MPI/ScopedInitializer.h> using namespace TNL; using namespace std; /*** * The following works for any device (CPU, GPU ...). */ template< typename Device > void distributedArrayExample() { using ArrayType = Containers::DistributedArray< int, Device >; using IndexType = typename ArrayType::IndexType; using LocalRangeType = typename ArrayType::LocalRangeType; const MPI_Comm group = TNL::MPI::AllGroup(); const int rank = TNL::MPI::GetRank(group); const int nproc = TNL::MPI::GetSize(group); /*** * We set size to prime number to force non-uniform distribution of the distributed array. */ const int size = 97; const int ghosts = (nproc > 1) ? 4 : 0; const LocalRangeType localRange = Containers::Partitioner< IndexType >::splitRange( size, group ); ArrayType a( localRange, ghosts, size, group ); } int main( int argc, char* argv[] ) { TNL::MPI::ScopedInitializer mpi(argc, argv); std::cout << "The first test runs on CPU ..." << std::endl; distributedArrayExample< Devices::Host >(); #ifdef HAVE_CUDA std::cout << "The second test runs on GPU ..." << std::endl; distributedArrayExample< Devices::Cuda >(); #endif }