Commit b3a8feac authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

Merge branch 'JK/scan' into 'develop'

Scan refactoring

Closes #87

See merge request !100
parents 0d735ef4 d9af4a61
Loading
Loading
Loading
Loading
+24 −4
Original line number Diff line number Diff line
IF( BUILD_CUDA )
   CUDA_ADD_EXECUTABLE(ParallelForExampleCuda ParallelForExample.cu)
   ADD_CUSTOM_COMMAND( COMMAND ParallelForExampleCuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/ParallelForExample.out OUTPUT ParallelForExample.out )
   CUDA_ADD_EXECUTABLE( SortingExampleCuda SortingExample.cu)
   ADD_CUSTOM_COMMAND( COMMAND SortingExampleCuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample.out OUTPUT SortingExample.out )

   CUDA_ADD_EXECUTABLE( SortingExample2Cuda SortingExample2.cu)
   ADD_CUSTOM_COMMAND( COMMAND SortingExample2Cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample2.out OUTPUT SortingExample2.out )

   CUDA_ADD_EXECUTABLE( SortingExample3Cuda SortingExample3.cu)
   ADD_CUSTOM_COMMAND( COMMAND SortingExample3Cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample3.out OUTPUT SortingExample3.out )

   CUDA_ADD_EXECUTABLE(ParallelForExampleCuda ParallelForExample.cu)
   ADD_CUSTOM_COMMAND( COMMAND ParallelForExampleCuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/ParallelForExample.out OUTPUT ParallelForExample.out )

   CUDA_ADD_EXECUTABLE(reduceArrayExampleCuda reduceArrayExample.cu)
   ADD_CUSTOM_COMMAND( COMMAND reduceArrayExampleCuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/reduceArrayExample.out OUTPUT reduceArrayExample.out )

   CUDA_ADD_EXECUTABLE(reduceWithArgumentArrayExampleCuda reduceWithArgumentArrayExample.cu)
   ADD_CUSTOM_COMMAND( COMMAND reduceWithArgumentArrayExampleCuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/reduceWithArgumentArrayExample.out OUTPUT reduceWithArgumentArrayExample.out )
ELSE()
   ADD_EXECUTABLE(ParallelForExample ParallelForExample.cpp)
   ADD_CUSTOM_COMMAND( COMMAND ParallelForExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/ParallelForExample.out OUTPUT ParallelForExample.out )
   ADD_EXECUTABLE( SortingExample SortingExample.cpp)
   ADD_CUSTOM_COMMAND( COMMAND SortingExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample.out OUTPUT SortingExample.out )

   ADD_EXECUTABLE( SortingExample2 SortingExample2.cpp)
   ADD_CUSTOM_COMMAND( COMMAND SortingExample2 > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample2.out OUTPUT SortingExample2.out )

   ADD_EXECUTABLE( SortingExample3 SortingExample3.cpp)
   ADD_CUSTOM_COMMAND( COMMAND SortingExample3 > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample3.out OUTPUT SortingExample3.out )

   ADD_EXECUTABLE(ParallelForExample ParallelForExample.cpp)
   ADD_CUSTOM_COMMAND( COMMAND ParallelForExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/ParallelForExample.out OUTPUT ParallelForExample.out )

   ADD_EXECUTABLE(reduceArrayExample reduceArrayExample.cpp)
   ADD_CUSTOM_COMMAND( COMMAND reduceArrayExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/reduceArrayExample.out OUTPUT reduceArrayExample.out )

   ADD_EXECUTABLE(reduceWithArgumentArrayExample reduceWithArgumentArrayExample.cpp)
   ADD_CUSTOM_COMMAND( COMMAND reduceWithArgumentArrayExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/reduceWithArgumentArrayExample.out OUTPUT reduceWithArgumentArrayExample.out )
ENDIF()

ADD_EXECUTABLE(staticForExample staticForExample.cpp)
@@ -29,6 +47,8 @@ ADD_CUSTOM_TARGET( RunAlgorithmsExamples ALL DEPENDS
   SortingExample2.out
   SortingExample3.out
   ParallelForExample.out
   reduceArrayExample.out
   reduceWithArgumentArrayExample.out
   unrolledForExample.out
   staticForExample.out
)
+12 −9
Original line number Diff line number Diff line
#include <iostream>
#include <functional>
#include <TNL/Containers/Array.h>
#include <TNL/Containers/ArrayView.h>
#include <TNL/Algorithms/reduce.h>

using namespace TNL;

template< typename Device >
void reduceElementsExample()
void reduceArrayExample()
{
   /****
    * Create new arrays
@@ -22,23 +20,28 @@ void reduceElementsExample()
   /****
    * Sum all elements of array `a`
    */
   auto fetch = [=] __cuda_callable__ ( int i, float& value ) { return value; };
   auto sum = a.reduceEachElement( fetch, std::plus<>{}, 0.0 );
   float sum_total = Algorithms::reduce( a, TNL::Plus{} );

   /****
    * Sum last 5 elements of array `a`
    */
   float sum_last_five = Algorithms::reduce( a.getConstView( 5, 10 ), TNL::Plus{} );

   /****
    * Print the results
    */
   std::cout << " a = " << a << std::endl;
   std::cout << " sum = " << sum << std::endl;
   std::cout << " sum of all elements = " << sum_total << std::endl;
   std::cout << " sum of last 5 elements = " << sum_last_five << std::endl;
}

int main( int argc, char* argv[] )
{
   std::cout << "Running example on the host system: " << std::endl;
   reduceElementsExample< Devices::Host >();
   reduceArrayExample< Devices::Host >();

#ifdef HAVE_CUDA
   std::cout << "Running example on the CUDA device: " << std::endl;
   reduceElementsExample< Devices::Cuda >();
   reduceArrayExample< Devices::Cuda >();
#endif
}
+1 −0
Original line number Diff line number Diff line
reduceArrayExample.cpp
 No newline at end of file
+41 −0
Original line number Diff line number Diff line
#include <iostream>
#include <functional>
#include <TNL/Containers/Array.h>
#include <TNL/Containers/ArrayView.h>
#include <TNL/Containers/Vector.h>
#include <TNL/Algorithms/reduce.h>

using namespace TNL;

template< typename Device >
void reduceElementsExample()
void reduceArrayExample()
{
   /****
    * Create new arrays
    */
   const int size = 10;
   Containers::Array< float, Device > a( size );
   auto a_view = a.getView();
   Containers::Vector< float, Device > a( size );

   /****
    * Initiate the elements of array `a`
    */
   a_view.forAllElements( [] __cuda_callable__ ( int i, float& value ) { value = i; } );
   a.forAllElements( [] __cuda_callable__ ( int i, float& value ) { value = 3 - i; } );

   /****
    * Sum all elements of array `a`
    * Reduce all elements of array `a`
    */
   auto fetch = [=] __cuda_callable__ ( int i, float& value ) { return value; };
   auto sum = a_view.reduceEachElement( fetch, std::plus<>{}, 0.0 );
   std::pair< float, int > result_total = Algorithms::reduceWithArgument( TNL::abs( a ), TNL::MaxWithArg{} );

   /****
    * Print the results
    */
   std::cout << " a = " << a << std::endl;
   std::cout << " sum = " << sum << std::endl;
   std::cout << " abs-max of all elements = " << result_total.first << " at position " << result_total.second << std::endl;
}

int main( int argc, char* argv[] )
{
   std::cout << "Running example on the host system: " << std::endl;
   reduceElementsExample< Devices::Host >();
   reduceArrayExample< Devices::Host >();

#ifdef HAVE_CUDA
   std::cout << "Running example on the CUDA device: " << std::endl;
   reduceElementsExample< Devices::Cuda >();
   reduceArrayExample< Devices::Cuda >();
#endif
}
+1 −0
Original line number Diff line number Diff line
reduceWithArgumentArrayExample.cpp
 No newline at end of file
Loading