Commit e465a5f3 authored by Tomáš Oberhuber's avatar Tomáš Oberhuber
Browse files

Merge branch 'TO/reduction' into 'develop'

To/reduction

See merge request !98
parents 4c1e8ae0 2d2309cd
Loading
Loading
Loading
Loading
+20 −0
Original line number Diff line number Diff line
@@ -8,18 +8,38 @@ set( COMMON_EXAMPLES
         VectorExample
)

set( MPI_COMMON_EXAMPLES
         DistributedArrayExample
)

SET( mpi_test_parameters -np 4 -H localhost:4 "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/DistributedArrayTest${CMAKE_EXECUTABLE_SUFFIX}" )

if( BUILD_CUDA )
   foreach( target IN ITEMS ${COMMON_EXAMPLES} )
      cuda_add_executable( ${target}-cuda ${target}.cu OPTIONS )
      add_custom_command( COMMAND ${target}-cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/${target}.out OUTPUT ${target}.out )
      set( CUDA_OUTPUTS ${CUDA_OUTPUTS} ${target}.out )
   endforeach()
   if( ${BUILD_MPI} )
      foreach( target IN ITEMS ${MPI_COMMON_EXAMPLES} )
         cuda_add_executable( ${target}-cuda ${target}.cu OPTIONS )
         add_custom_command( COMMAND "mpirun" ${mpi_test_parameters} ${target}-cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/${target}.out OUTPUT ${target}.out )
         set( CUDA_OUTPUTS ${CUDA_OUTPUTS} ${target}.out )
      endforeach()
   endif( ${BUILD_MPI} )
else()
   foreach( target IN ITEMS ${HOST_EXAMPLES} )
      add_executable( ${target} ${target}.cpp )
      add_custom_command( COMMAND ${target} > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/${target}.out OUTPUT ${target}.out )
      set( HOST_OUTPUTS ${HOST_OUTPUTS} ${target}.out )
   endforeach()
   if( ${BUILD_MPI} )
      foreach( target IN ITEMS ${MPI_COMMON_EXAMPLES} )
         add_executable( ${target} ${target}.cpp )
         add_custom_command( COMMAND "mpirun" ${mpi_test_parameters} ${target}-cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/${target}.out OUTPUT ${target}.out )
         set( HOST_OUTPUTS ${CUDA_OUTPUTS} ${target}.out )
      endforeach( ${BUILD_MPI} )
   endif()
endif()

IF( BUILD_CUDA )
+48 −0
Original line number Diff line number Diff line
#include <iostream>
#include <TNL/Containers/Partitioner.h>
#include <TNL/Containers/DistributedArray.h>
#include <TNL/MPI/ScopedInitializer.h>

using namespace TNL;
using namespace std;

/***
 * The following works for any device (CPU, GPU ...).
 */
template< typename Device >
void distributedArrayExample()
{
   using ArrayType = Containers::DistributedArray< int, Device >;
   using LocalArrayType = Containers::Array< int, Device >;
   using IndexType = typename ArrayType::IndexType;
   using LocalRangeType = typename ArrayType::LocalRangeType;

   const MPI_Comm group = TNL::MPI::AllGroup();
   //const int rank = TNL::MPI::GetRank(group);
   const int nproc = TNL::MPI::GetSize(group);

   /***
    * We set size to prime number to force non-uniform distribution of the distributed array.
    */
   const int size = 97;
   const int ghosts = (nproc > 1) ? 4 : 0;

   const LocalRangeType localRange = Containers::Partitioner< IndexType >::splitRange( size, group );
   ArrayType a( localRange, ghosts, size, group );
   a.forElements( 0, size, [=] __cuda_callable__ ( const int idx, int& value ) { value = idx; } );
   //LocalArrayType localArray = a;
   //std::cout << a << std::endl;

}

int main( int argc, char* argv[] )
{
   TNL::MPI::ScopedInitializer mpi(argc, argv);

   std::cout << "The first test runs on CPU ..." << std::endl;
   distributedArrayExample< Devices::Host >();
#ifdef HAVE_CUDA
   std::cout << "The second test runs on GPU ..." << std::endl;
   distributedArrayExample< Devices::Cuda >();
#endif
}
+1 −1
Original line number Diff line number Diff line
@@ -36,7 +36,7 @@ void getRowExample()
   /***
    * Compute the matrix trace.
    */
   int trace = TNL::Algorithms::Reduction< Device >::reduce( 0, matrix->getRows(), fetch, std::plus<>{}, 0 );
   int trace = TNL::Algorithms::reduce< Device >( 0, matrix->getRows(), fetch, std::plus<>{}, 0 );
   std::cout << "Matrix trace is " << trace << "." << std::endl;
}

+1 −1
Original line number Diff line number Diff line
@@ -29,7 +29,7 @@ void getRowExample()
      return row.getValue( rowIdx );
   };

   int trace = TNL::Algorithms::Reduction< Device >::reduce( 0, matrix.getRows(), fetch, std::plus<>{}, 0 );
   int trace = TNL::Algorithms::reduce< Device >( 0, matrix.getRows(), fetch, std::plus<>{}, 0 );
   std::cout << "Matrix trace is " << trace << "." << std::endl;
}

+1 −1
Original line number Diff line number Diff line
@@ -41,7 +41,7 @@ void getRowExample()
   /***
    * Compute the matrix trace.
    */
   int trace = TNL::Algorithms::Reduction< Device >::reduce( 0, matrix->getRows(), fetch, std::plus<>{}, 0 );
   int trace = TNL::Algorithms::reduce< Device >( 0, matrix->getRows(), fetch, std::plus<>{}, 0 );
   std::cout << "Matrix reads as: " << std::endl << *matrix << std::endl;
   std::cout << "Matrix trace is: " << trace << "." << std::endl;
}
Loading