Loading Documentation/Examples/Algorithms/CMakeLists.txt +2 −0 Original line number Diff line number Diff line ADD_SUBDIRECTORY( Segments ) IF( BUILD_CUDA ) CUDA_ADD_EXECUTABLE( SortingExampleCuda SortingExample.cu) ADD_CUSTOM_COMMAND( COMMAND SortingExampleCuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample.out OUTPUT SortingExample.out ) Loading Documentation/Examples/Algorithms/Segments/.SegmentsExample_General.cpp.swp 0 → 100644 +12 KiB File added.No diff preview for this file type. View file Documentation/Examples/Algorithms/Segments/CMakeLists.txt 0 → 100644 +23 −0 Original line number Diff line number Diff line set( COMMON_EXAMPLES SegmentsExample_General ) if( BUILD_CUDA ) foreach( target IN ITEMS ${COMMON_EXAMPLES} ) cuda_add_executable( ${target}-cuda ${target}.cu OPTIONS ) add_custom_command( COMMAND ${target}-cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/${target}.out OUTPUT ${target}.out ) set( CUDA_OUTPUTS ${CUDA_OUTPUTS} ${target}.out ) endforeach() else() foreach( target IN ITEMS ${COMMON_EXAMPLES} ) add_executable( ${target} ${target}.cpp ) add_custom_command( COMMAND ${target} > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/${target}.out OUTPUT ${target}.out ) set( HOST_OUTPUTS ${HOST_OUTPUTS} ${target}.out ) endforeach() endif() IF( BUILD_CUDA ) ADD_CUSTOM_TARGET( RunSegmentsExamples-cuda ALL DEPENDS ${CUDA_OUTPUTS} ) ELSE() ADD_CUSTOM_TARGET( RunSegmentsExamples ALL DEPENDS ${HOST_OUTPUTS} ) ENDIF() No newline at end of file Documentation/Examples/Algorithms/Segments/SegmentsExample_General.cpp 0 → 100644 +79 −0 Original line number Diff line number Diff line #include <iostream> #include <functional> #include <TNL/Containers/Vector.h> #include <TNL/Algorithms/Segments/CSR.h> #include <TNL/Algorithms/Segments/Ellpack.h> #include <TNL/Devices/Host.h> #include <TNL/Devices/Cuda.h> template< typename Segments > void SegmentsExample() { using DeviceType = typename Segments::DeviceType; using IndexType = typename Segments::IndexType; /*** * Create segments with given segments sizes. */ Segments segments{ 1, 2, 3, 4, 5 }; std::cout << "Segments sizes are: " << segments << std::endl; /*** * Allocate array for the segments; */ TNL::Containers::Array< double, DeviceType > data( segments.getStorageSize() ); data = 0.0; /*** * Insert data into particular segments. */ auto data_view = data.getView(); segments.forAllElements( [=] __cuda_callable__ ( IndexType segmentIdx, IndexType localIdx, IndexType globalIdx, bool& compute ) mutable { if( localIdx <= segmentIdx ) data_view[ globalIdx ] = segmentIdx; } ); /*** * Print the data managed by the segments. */ auto fetch = [=] __cuda_callable__ ( IndexType globalIdx ) -> double { return data_view[ globalIdx ]; }; printSegments( segments, fetch, std::cout ); /*** * Compute sums of elements in particular segments. */ TNL::Containers::Vector< double, DeviceType, IndexType > sums( segments.getSegmentsCount() ); auto sums_view = sums.getView(); auto sum_fetch = [=] __cuda_callable__ ( IndexType segmentIdx, IndexType localIdx, IndexType globalIdx, bool& compute ) -> double { return data_view[ globalIdx ]; }; auto keep = [=] __cuda_callable__ ( const IndexType& segmentIdx, const double& value ) mutable { sums_view[ segmentIdx ] = value; }; segments.reduceAllSegments( sum_fetch, std::plus<>{}, keep, 0.0 ); std::cout << "The sums are: " << sums << std::endl; } int main( int argc, char* argv[] ) { using HostCSR = TNL::Algorithms::Segments::CSR< TNL::Devices::Host, int >; using HostEllpack = TNL::Algorithms::Segments::Ellpack< TNL::Devices::Host, int >; using CudaCSR = TNL::Algorithms::Segments::CSR< TNL::Devices::Cuda, int >; using CudaEllpack = TNL::Algorithms::Segments::Ellpack< TNL::Devices::Cuda, int >; std::cout << "Example of CSR segments on host: " << std::endl; SegmentsExample< HostCSR >(); std::cout << "Example of Ellpack segments on host: " << std::endl; SegmentsExample< HostEllpack >(); #ifdef HAVE_CUDA std::cout << "Example of CSR segments on CUDA GPU: " << std::endl; SegmentsExample< CudaCSR >(); std::cout << "Example of Ellpack segments on CUDA GPU: " << std::endl; SegmentsExample< CudaEllpack >(); #endif return EXIT_SUCCESS; } Documentation/Examples/Algorithms/Segments/SegmentsExample_General.cu 0 → 120000 +1 −0 Original line number Diff line number Diff line SegmentsExample_General.cpp No newline at end of file Loading
Documentation/Examples/Algorithms/CMakeLists.txt +2 −0 Original line number Diff line number Diff line ADD_SUBDIRECTORY( Segments ) IF( BUILD_CUDA ) CUDA_ADD_EXECUTABLE( SortingExampleCuda SortingExample.cu) ADD_CUSTOM_COMMAND( COMMAND SortingExampleCuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample.out OUTPUT SortingExample.out ) Loading
Documentation/Examples/Algorithms/Segments/.SegmentsExample_General.cpp.swp 0 → 100644 +12 KiB File added.No diff preview for this file type. View file
Documentation/Examples/Algorithms/Segments/CMakeLists.txt 0 → 100644 +23 −0 Original line number Diff line number Diff line set( COMMON_EXAMPLES SegmentsExample_General ) if( BUILD_CUDA ) foreach( target IN ITEMS ${COMMON_EXAMPLES} ) cuda_add_executable( ${target}-cuda ${target}.cu OPTIONS ) add_custom_command( COMMAND ${target}-cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/${target}.out OUTPUT ${target}.out ) set( CUDA_OUTPUTS ${CUDA_OUTPUTS} ${target}.out ) endforeach() else() foreach( target IN ITEMS ${COMMON_EXAMPLES} ) add_executable( ${target} ${target}.cpp ) add_custom_command( COMMAND ${target} > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/${target}.out OUTPUT ${target}.out ) set( HOST_OUTPUTS ${HOST_OUTPUTS} ${target}.out ) endforeach() endif() IF( BUILD_CUDA ) ADD_CUSTOM_TARGET( RunSegmentsExamples-cuda ALL DEPENDS ${CUDA_OUTPUTS} ) ELSE() ADD_CUSTOM_TARGET( RunSegmentsExamples ALL DEPENDS ${HOST_OUTPUTS} ) ENDIF() No newline at end of file
Documentation/Examples/Algorithms/Segments/SegmentsExample_General.cpp 0 → 100644 +79 −0 Original line number Diff line number Diff line #include <iostream> #include <functional> #include <TNL/Containers/Vector.h> #include <TNL/Algorithms/Segments/CSR.h> #include <TNL/Algorithms/Segments/Ellpack.h> #include <TNL/Devices/Host.h> #include <TNL/Devices/Cuda.h> template< typename Segments > void SegmentsExample() { using DeviceType = typename Segments::DeviceType; using IndexType = typename Segments::IndexType; /*** * Create segments with given segments sizes. */ Segments segments{ 1, 2, 3, 4, 5 }; std::cout << "Segments sizes are: " << segments << std::endl; /*** * Allocate array for the segments; */ TNL::Containers::Array< double, DeviceType > data( segments.getStorageSize() ); data = 0.0; /*** * Insert data into particular segments. */ auto data_view = data.getView(); segments.forAllElements( [=] __cuda_callable__ ( IndexType segmentIdx, IndexType localIdx, IndexType globalIdx, bool& compute ) mutable { if( localIdx <= segmentIdx ) data_view[ globalIdx ] = segmentIdx; } ); /*** * Print the data managed by the segments. */ auto fetch = [=] __cuda_callable__ ( IndexType globalIdx ) -> double { return data_view[ globalIdx ]; }; printSegments( segments, fetch, std::cout ); /*** * Compute sums of elements in particular segments. */ TNL::Containers::Vector< double, DeviceType, IndexType > sums( segments.getSegmentsCount() ); auto sums_view = sums.getView(); auto sum_fetch = [=] __cuda_callable__ ( IndexType segmentIdx, IndexType localIdx, IndexType globalIdx, bool& compute ) -> double { return data_view[ globalIdx ]; }; auto keep = [=] __cuda_callable__ ( const IndexType& segmentIdx, const double& value ) mutable { sums_view[ segmentIdx ] = value; }; segments.reduceAllSegments( sum_fetch, std::plus<>{}, keep, 0.0 ); std::cout << "The sums are: " << sums << std::endl; } int main( int argc, char* argv[] ) { using HostCSR = TNL::Algorithms::Segments::CSR< TNL::Devices::Host, int >; using HostEllpack = TNL::Algorithms::Segments::Ellpack< TNL::Devices::Host, int >; using CudaCSR = TNL::Algorithms::Segments::CSR< TNL::Devices::Cuda, int >; using CudaEllpack = TNL::Algorithms::Segments::Ellpack< TNL::Devices::Cuda, int >; std::cout << "Example of CSR segments on host: " << std::endl; SegmentsExample< HostCSR >(); std::cout << "Example of Ellpack segments on host: " << std::endl; SegmentsExample< HostEllpack >(); #ifdef HAVE_CUDA std::cout << "Example of CSR segments on CUDA GPU: " << std::endl; SegmentsExample< CudaCSR >(); std::cout << "Example of Ellpack segments on CUDA GPU: " << std::endl; SegmentsExample< CudaEllpack >(); #endif return EXIT_SUCCESS; }
Documentation/Examples/Algorithms/Segments/SegmentsExample_General.cu 0 → 120000 +1 −0 Original line number Diff line number Diff line SegmentsExample_General.cpp No newline at end of file