Loading CMakeLists.txt +5 −0 Original line number Diff line number Diff line Loading @@ -32,6 +32,7 @@ option(BUILD_TESTS "Build tests" OFF) option(BUILD_MATRIX_TESTS "Build tests for matrix formats" OFF) option(BUILD_PYTHON "Compile the Python bindings" OFF) option(BUILD_DOC "Build examples included in the documentation" OFF) set(CUDA_SAMPLES_PATH "none" CACHE STRING "Path to CUDA Samples - it is used only for some benchmarking.") # install paths relative to the cmake's prefix set( TNL_TARGET_INCLUDE_DIRECTORY "include/TNL" ) Loading Loading @@ -253,6 +254,9 @@ if( ${WITH_CUDA} ) endif() set( CMAKE_EXECUTABLE_SUFFIX "${executable_suffix_backup}" ) endif() if( NOT CUDA_SAMPLES_DIR STREQUAL "none" ) set( CUDA_SAMPLES_FLAGS "-I${CUDA_SAMPLES_DIR} -DHAVE_CUDA_SAMPLES") endif() endif() Loading Loading @@ -407,6 +411,7 @@ message( " CMAKE_SHARED_LINKER_FLAGS = ${CMAKE_SHARED_LINKER_FLAGS}" ) message( " CMAKE_SHARED_LINKER_FLAGS_DEBUG = ${CMAKE_SHARED_LINKER_FLAGS_DEBUG}" ) message( " CMAKE_SHARED_LINKER_FLAGS_RELEASE = ${CMAKE_SHARED_LINKER_FLAGS_RELEASE}" ) message( " CUDA_NVCC_FLAGS = ${CUDA_NVCC_FLAGS}" ) message( " CUDA_SAMPLES_FLAGS = ${CUDA_SAMPLES_FLAGS}" ) message( " GMP_LIBRARIES = ${GMP_LIBRARIES}" ) if( MPI_CXX_FOUND AND ${WITH_MPI} ) message( " MPI_CXX_COMPILE_OPTIONS = ${MPI_CXX_COMPILE_OPTIONS}" ) Loading Documentation/Examples/Algorithms/CMakeLists.txt +15 −0 Original line number Diff line number Diff line IF( BUILD_CUDA ) CUDA_ADD_EXECUTABLE(ParallelForExampleCuda ParallelForExample.cu) ADD_CUSTOM_COMMAND( COMMAND ParallelForExampleCuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/ParallelForExample.out OUTPUT ParallelForExample.out ) CUDA_ADD_EXECUTABLE( SortingExampleCuda SortingExample.cu) ADD_CUSTOM_COMMAND( COMMAND SortingExampleCuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample.out OUTPUT SortingExample.out ) CUDA_ADD_EXECUTABLE( SortingExample2Cuda SortingExample2.cu) ADD_CUSTOM_COMMAND( COMMAND SortingExample2Cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample2.out OUTPUT SortingExample2.out ) CUDA_ADD_EXECUTABLE( SortingExample3Cuda SortingExample3.cu) ADD_CUSTOM_COMMAND( COMMAND SortingExample3Cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample3.out OUTPUT SortingExample3.out ) ELSE() ADD_EXECUTABLE(ParallelForExample ParallelForExample.cpp) ADD_CUSTOM_COMMAND( COMMAND ParallelForExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/ParallelForExample.out OUTPUT ParallelForExample.out ) ADD_EXECUTABLE( SortingExample SortingExample.cpp) ADD_CUSTOM_COMMAND( COMMAND SortingExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample.out OUTPUT SortingExample.out ) ADD_EXECUTABLE( SortingExample2 SortingExample2.cpp) ADD_CUSTOM_COMMAND( COMMAND SortingExample2 > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample2.out OUTPUT SortingExample2.out ) ADD_EXECUTABLE( SortingExample3 SortingExample3.cpp) ADD_CUSTOM_COMMAND( COMMAND SortingExample3 > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample3.out OUTPUT SortingExample3.out ) ENDIF() ADD_EXECUTABLE(staticForExample staticForExample.cpp) Loading @@ -13,6 +25,9 @@ ADD_EXECUTABLE(unrolledForExample unrolledForExample.cpp) ADD_CUSTOM_COMMAND( COMMAND unrolledForExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/unrolledForExample.out OUTPUT unrolledForExample.out ) ADD_CUSTOM_TARGET( RunAlgorithmsExamples ALL DEPENDS SortingExample.out SortingExample2.out SortingExample3.out ParallelForExample.out unrolledForExample.out staticForExample.out Loading Documentation/Examples/Algorithms/SortingExample.cpp 0 → 100644 +55 −0 Original line number Diff line number Diff line #include <iostream> #include <TNL/Containers/Array.h> #include <TNL/Algorithms/sort.h> using namespace TNL; using namespace TNL::Containers; using namespace TNL::Algorithms; template< typename ArrayT > void sort( ArrayT& array ) { const int size = 10; /**** * Fill the array with random integers. */ Array< int > aux_array( size ); srand( size + 2021 ); aux_array.forAllElements( [=] __cuda_callable__ ( int i, int& value ) { value = std::rand() % (2*size); } ); array = aux_array; std::cout << "Random array: " << array << std::endl; /**** * Sort the array in ascending order. */ ascendingSort( array ); std::cout << "Array sorted in ascending order:" << array << std::endl; /*** * Sort the array in descending order. */ descendingSort( array ); std::cout << "Array sorted in descending order:" << array << std::endl; } int main( int argc, char* argv[] ) { /*** * Firstly, test the sorting on CPU. */ std::cout << "Sorting on CPU ... " << std::endl; Array< int, Devices::Host > host_array; sort( host_array ); #ifdef HAVE_CUDA /*** * And then also on GPU. */ std::cout << "Sorting on GPU ... " << std::endl; Array< int, Devices::Cuda > cuda_array; sort( cuda_array ); #endif return EXIT_SUCCESS; } Documentation/Examples/Algorithms/SortingExample.cu 0 → 120000 +1 −0 Original line number Diff line number Diff line SortingExample.cpp No newline at end of file Documentation/Examples/Algorithms/SortingExample2.cpp 0 → 100644 +55 −0 Original line number Diff line number Diff line #include <iostream> #include <TNL/Containers/Array.h> #include <TNL/Algorithms/sort.h> using namespace TNL; using namespace TNL::Containers; using namespace TNL::Algorithms; template< typename ArrayT > void sort( ArrayT& array ) { const int size = 10; /**** * Fill the array with random integers. */ Array< int > aux_array( size ); srand( size + 2021 ); aux_array.forAllElements( [=] __cuda_callable__ ( int i, int& value ) { value = std::rand() % (2*size); } ); array = aux_array; std::cout << "Random array: " << array << std::endl; /**** * Sort the array in ascending order. */ sort( array, [] __cuda_callable__ ( int a, int b ) { return a < b; } ); std::cout << "Array sorted in ascending order:" << array << std::endl; /*** * Sort the array in descending order. */ sort( array, [] __cuda_callable__ ( int a, int b ) { return a > b; } ); std::cout << "Array sorted in descending order:" << array << std::endl; } int main( int argc, char* argv[] ) { /*** * Firstly, test the sorting on CPU. */ std::cout << "Sorting on CPU ... " << std::endl; Array< int, Devices::Host > host_array; sort( host_array ); #ifdef HAVE_CUDA /*** * And then also on GPU. */ std::cout << "Sorting on GPU ... " << std::endl; Array< int, Devices::Cuda > cuda_array; sort( cuda_array ); #endif return EXIT_SUCCESS; } Loading
CMakeLists.txt +5 −0 Original line number Diff line number Diff line Loading @@ -32,6 +32,7 @@ option(BUILD_TESTS "Build tests" OFF) option(BUILD_MATRIX_TESTS "Build tests for matrix formats" OFF) option(BUILD_PYTHON "Compile the Python bindings" OFF) option(BUILD_DOC "Build examples included in the documentation" OFF) set(CUDA_SAMPLES_PATH "none" CACHE STRING "Path to CUDA Samples - it is used only for some benchmarking.") # install paths relative to the cmake's prefix set( TNL_TARGET_INCLUDE_DIRECTORY "include/TNL" ) Loading Loading @@ -253,6 +254,9 @@ if( ${WITH_CUDA} ) endif() set( CMAKE_EXECUTABLE_SUFFIX "${executable_suffix_backup}" ) endif() if( NOT CUDA_SAMPLES_DIR STREQUAL "none" ) set( CUDA_SAMPLES_FLAGS "-I${CUDA_SAMPLES_DIR} -DHAVE_CUDA_SAMPLES") endif() endif() Loading Loading @@ -407,6 +411,7 @@ message( " CMAKE_SHARED_LINKER_FLAGS = ${CMAKE_SHARED_LINKER_FLAGS}" ) message( " CMAKE_SHARED_LINKER_FLAGS_DEBUG = ${CMAKE_SHARED_LINKER_FLAGS_DEBUG}" ) message( " CMAKE_SHARED_LINKER_FLAGS_RELEASE = ${CMAKE_SHARED_LINKER_FLAGS_RELEASE}" ) message( " CUDA_NVCC_FLAGS = ${CUDA_NVCC_FLAGS}" ) message( " CUDA_SAMPLES_FLAGS = ${CUDA_SAMPLES_FLAGS}" ) message( " GMP_LIBRARIES = ${GMP_LIBRARIES}" ) if( MPI_CXX_FOUND AND ${WITH_MPI} ) message( " MPI_CXX_COMPILE_OPTIONS = ${MPI_CXX_COMPILE_OPTIONS}" ) Loading
Documentation/Examples/Algorithms/CMakeLists.txt +15 −0 Original line number Diff line number Diff line IF( BUILD_CUDA ) CUDA_ADD_EXECUTABLE(ParallelForExampleCuda ParallelForExample.cu) ADD_CUSTOM_COMMAND( COMMAND ParallelForExampleCuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/ParallelForExample.out OUTPUT ParallelForExample.out ) CUDA_ADD_EXECUTABLE( SortingExampleCuda SortingExample.cu) ADD_CUSTOM_COMMAND( COMMAND SortingExampleCuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample.out OUTPUT SortingExample.out ) CUDA_ADD_EXECUTABLE( SortingExample2Cuda SortingExample2.cu) ADD_CUSTOM_COMMAND( COMMAND SortingExample2Cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample2.out OUTPUT SortingExample2.out ) CUDA_ADD_EXECUTABLE( SortingExample3Cuda SortingExample3.cu) ADD_CUSTOM_COMMAND( COMMAND SortingExample3Cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample3.out OUTPUT SortingExample3.out ) ELSE() ADD_EXECUTABLE(ParallelForExample ParallelForExample.cpp) ADD_CUSTOM_COMMAND( COMMAND ParallelForExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/ParallelForExample.out OUTPUT ParallelForExample.out ) ADD_EXECUTABLE( SortingExample SortingExample.cpp) ADD_CUSTOM_COMMAND( COMMAND SortingExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample.out OUTPUT SortingExample.out ) ADD_EXECUTABLE( SortingExample2 SortingExample2.cpp) ADD_CUSTOM_COMMAND( COMMAND SortingExample2 > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample2.out OUTPUT SortingExample2.out ) ADD_EXECUTABLE( SortingExample3 SortingExample3.cpp) ADD_CUSTOM_COMMAND( COMMAND SortingExample3 > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample3.out OUTPUT SortingExample3.out ) ENDIF() ADD_EXECUTABLE(staticForExample staticForExample.cpp) Loading @@ -13,6 +25,9 @@ ADD_EXECUTABLE(unrolledForExample unrolledForExample.cpp) ADD_CUSTOM_COMMAND( COMMAND unrolledForExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/unrolledForExample.out OUTPUT unrolledForExample.out ) ADD_CUSTOM_TARGET( RunAlgorithmsExamples ALL DEPENDS SortingExample.out SortingExample2.out SortingExample3.out ParallelForExample.out unrolledForExample.out staticForExample.out Loading
Documentation/Examples/Algorithms/SortingExample.cpp 0 → 100644 +55 −0 Original line number Diff line number Diff line #include <iostream> #include <TNL/Containers/Array.h> #include <TNL/Algorithms/sort.h> using namespace TNL; using namespace TNL::Containers; using namespace TNL::Algorithms; template< typename ArrayT > void sort( ArrayT& array ) { const int size = 10; /**** * Fill the array with random integers. */ Array< int > aux_array( size ); srand( size + 2021 ); aux_array.forAllElements( [=] __cuda_callable__ ( int i, int& value ) { value = std::rand() % (2*size); } ); array = aux_array; std::cout << "Random array: " << array << std::endl; /**** * Sort the array in ascending order. */ ascendingSort( array ); std::cout << "Array sorted in ascending order:" << array << std::endl; /*** * Sort the array in descending order. */ descendingSort( array ); std::cout << "Array sorted in descending order:" << array << std::endl; } int main( int argc, char* argv[] ) { /*** * Firstly, test the sorting on CPU. */ std::cout << "Sorting on CPU ... " << std::endl; Array< int, Devices::Host > host_array; sort( host_array ); #ifdef HAVE_CUDA /*** * And then also on GPU. */ std::cout << "Sorting on GPU ... " << std::endl; Array< int, Devices::Cuda > cuda_array; sort( cuda_array ); #endif return EXIT_SUCCESS; }
Documentation/Examples/Algorithms/SortingExample.cu 0 → 120000 +1 −0 Original line number Diff line number Diff line SortingExample.cpp No newline at end of file
Documentation/Examples/Algorithms/SortingExample2.cpp 0 → 100644 +55 −0 Original line number Diff line number Diff line #include <iostream> #include <TNL/Containers/Array.h> #include <TNL/Algorithms/sort.h> using namespace TNL; using namespace TNL::Containers; using namespace TNL::Algorithms; template< typename ArrayT > void sort( ArrayT& array ) { const int size = 10; /**** * Fill the array with random integers. */ Array< int > aux_array( size ); srand( size + 2021 ); aux_array.forAllElements( [=] __cuda_callable__ ( int i, int& value ) { value = std::rand() % (2*size); } ); array = aux_array; std::cout << "Random array: " << array << std::endl; /**** * Sort the array in ascending order. */ sort( array, [] __cuda_callable__ ( int a, int b ) { return a < b; } ); std::cout << "Array sorted in ascending order:" << array << std::endl; /*** * Sort the array in descending order. */ sort( array, [] __cuda_callable__ ( int a, int b ) { return a > b; } ); std::cout << "Array sorted in descending order:" << array << std::endl; } int main( int argc, char* argv[] ) { /*** * Firstly, test the sorting on CPU. */ std::cout << "Sorting on CPU ... " << std::endl; Array< int, Devices::Host > host_array; sort( host_array ); #ifdef HAVE_CUDA /*** * And then also on GPU. */ std::cout << "Sorting on GPU ... " << std::endl; Array< int, Devices::Cuda > cuda_array; sort( cuda_array ); #endif return EXIT_SUCCESS; }