Loading CMakeLists.txt +31 −24 Original line number Diff line number Diff line Loading @@ -309,30 +309,34 @@ if( ${WITH_GMP} ) endif() endif() #if( BUILD_MPI ) # FIND_PATH( PETSC_INCLUDE_DIR petsc.h # /usr/include/petsc # ${PETSC_DIR}/${PETSC_ARCH}/include # ${PETSC_DIR}/include # DOC "PETSC headers." # ) # if( ${PETSC_INCLUDE_DIR} STREQUAL "PETSC_INCLUDE_DIR-NOTFOUND" ) # message( "PETSC not found." ) # else() # message( "PETSC headers found -- ${PETSC_INCLUDE_DIR}" ) # FIND_LIBRARY(PETSC_LIBRARY petsc # ${PETSC_INCLUDE_DIR}/../lib # /usr/local/lib # /usr/lib) # if( PETSC_LIBRARY ) # #string( REPLACE ";" " " MPI_LIBRARIES "${MPI_CXX_LIBRARIES}" ) # #set( PETSC_LIBRARY "${MPI_LIBRARIES} ${PETSC_LIBRARY}") # message( "PETSC library found -- ${PETSC_LIBRARY}") #### # Test for PETSc if( BUILD_MPI ) FIND_PATH( PETSC_INCLUDE_DIR petsc.h /usr/include/petsc ${PETSC_DIR}/${PETSC_ARCH}/include ${PETSC_DIR}/include DOC "PETSC headers." ) if( ${PETSC_INCLUDE_DIR} STREQUAL "PETSC_INCLUDE_DIR-NOTFOUND" ) message( "PETSC not found." ) else() message( "PETSC headers found -- ${PETSC_INCLUDE_DIR}" ) FIND_LIBRARY(PETSC_LIBRARY petsc ${PETSC_INCLUDE_DIR}/../lib /usr/local/lib /usr/lib) if( PETSC_LIBRARY ) #string( REPLACE ";" " " MPI_LIBRARIES "${MPI_CXX_LIBRARIES}" ) #set( PETSC_LIBRARY "${MPI_LIBRARIES} ${PETSC_LIBRARY}") message( "PETSC library found -- ${PETSC_LIBRARY}") #list( GET MPI_CXX_INCLUDE_PATH 0 MPI_CXX_PATH ) #set(PETSC_CXX_FLAGS "-DHAVE_PETSC -I${PETSC_INCLUDE_DIR} -DHAVE_MPI -I${MPI_CXX_PATH}") # endif() # endif() #endif() set(PETSC_CXX_FLAGS -DHAVE_PETSC -I${PETSC_INCLUDE_DIR}) set(PETSC_LINKER_FLAGS ${PETSC_LIBRARY}) endif() endif() endif() # configure build paths set( CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/bin ) Loading Loading @@ -413,6 +417,9 @@ message( " CMAKE_SHARED_LINKER_FLAGS_RELEASE = ${CMAKE_SHARED_LINKER_FLAGS_REL message( " CUDA_NVCC_FLAGS = ${CUDA_NVCC_FLAGS}" ) message( " CUDA_SAMPLES_FLAGS = ${CUDA_SAMPLES_FLAGS}" ) message( " GMP_LIBRARIES = ${GMP_LIBRARIES}" ) message( " PETSC_CXX_FLAGS = ${PETSC_CXX_FLAGS}" ) message( " PETSC_LINKER_FLAGS = ${PETSC_LINKER_FLAGS}" ) if( MPI_CXX_FOUND AND ${WITH_MPI} ) message( " MPI_CXX_COMPILE_OPTIONS = ${MPI_CXX_COMPILE_OPTIONS}" ) message( " MPI_CXX_COMPILE_DEFINITIONS = ${MPI_CXX_COMPILE_DEFINITIONS}" ) Loading Documentation/Examples/Algorithms/CMakeLists.txt +30 −51 Original line number Diff line number Diff line IF( BUILD_CUDA ) CUDA_ADD_EXECUTABLE( SortingExampleCuda SortingExample.cu) ADD_CUSTOM_COMMAND( COMMAND SortingExampleCuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample.out OUTPUT SortingExample.out ) CUDA_ADD_EXECUTABLE( SortingExample2Cuda SortingExample2.cu) ADD_CUSTOM_COMMAND( COMMAND SortingExample2Cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample2.out OUTPUT SortingExample2.out ) CUDA_ADD_EXECUTABLE( SortingExample3Cuda SortingExample3.cu) ADD_CUSTOM_COMMAND( COMMAND SortingExample3Cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample3.out OUTPUT SortingExample3.out ) CUDA_ADD_EXECUTABLE(ParallelForExampleCuda ParallelForExample.cu) ADD_CUSTOM_COMMAND( COMMAND ParallelForExampleCuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/ParallelForExample.out OUTPUT ParallelForExample.out ) ADD_SUBDIRECTORY( Segments ) set( COMMON_EXAMPLES SortingExample SortingExample2 SortingExample3 ParallelForExample SequentialForExample ) CUDA_ADD_EXECUTABLE(reduceArrayExampleCuda reduceArrayExample.cu) ADD_CUSTOM_COMMAND( COMMAND reduceArrayExampleCuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/reduceArrayExample.out OUTPUT reduceArrayExample.out ) set( HOST_EXAMPLES staticForExample unrolledForExample ) if( BUILD_CUDA ) foreach( target IN ITEMS ${COMMON_EXAMPLES} ) cuda_add_executable( ${target}-cuda ${target}.cu OPTIONS ) add_custom_command( COMMAND ${target}-cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/${target}.out OUTPUT ${target}.out ) set( CUDA_OUTPUTS ${CUDA_OUTPUTS} ${target}.out ) endforeach() else() foreach( target IN ITEMS ${COMMON_EXAMPLES} ${HOST_EXAMPLES}) add_executable( ${target} ${target}.cpp ) add_custom_command( COMMAND ${target} > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/${target}.out OUTPUT ${target}.out ) set( HOST_OUTPUTS ${HOST_OUTPUTS} ${target}.out ) endforeach() endif() CUDA_ADD_EXECUTABLE(reduceWithArgumentArrayExampleCuda reduceWithArgumentArrayExample.cu) ADD_CUSTOM_COMMAND( COMMAND reduceWithArgumentArrayExampleCuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/reduceWithArgumentArrayExample.out OUTPUT reduceWithArgumentArrayExample.out ) IF( BUILD_CUDA ) ADD_CUSTOM_TARGET( RunAlgorithmsExamples-cuda ALL DEPENDS ${CUDA_OUTPUTS} ) ELSE() ADD_EXECUTABLE( SortingExample SortingExample.cpp) ADD_CUSTOM_COMMAND( COMMAND SortingExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample.out OUTPUT SortingExample.out ) ADD_EXECUTABLE( SortingExample2 SortingExample2.cpp) ADD_CUSTOM_COMMAND( COMMAND SortingExample2 > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample2.out OUTPUT SortingExample2.out ) ADD_EXECUTABLE( SortingExample3 SortingExample3.cpp) ADD_CUSTOM_COMMAND( COMMAND SortingExample3 > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample3.out OUTPUT SortingExample3.out ) ADD_EXECUTABLE(ParallelForExample ParallelForExample.cpp) ADD_CUSTOM_COMMAND( COMMAND ParallelForExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/ParallelForExample.out OUTPUT ParallelForExample.out ) ADD_EXECUTABLE(reduceArrayExample reduceArrayExample.cpp) ADD_CUSTOM_COMMAND( COMMAND reduceArrayExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/reduceArrayExample.out OUTPUT reduceArrayExample.out ) ADD_EXECUTABLE(reduceWithArgumentArrayExample reduceWithArgumentArrayExample.cpp) ADD_CUSTOM_COMMAND( COMMAND reduceWithArgumentArrayExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/reduceWithArgumentArrayExample.out OUTPUT reduceWithArgumentArrayExample.out ) ADD_CUSTOM_TARGET( RunAlgorithmsExamples ALL DEPENDS ${HOST_OUTPUTS} ) ENDIF() No newline at end of file ADD_EXECUTABLE(staticForExample staticForExample.cpp) ADD_CUSTOM_COMMAND( COMMAND staticForExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/staticForExample.out OUTPUT staticForExample.out ) ADD_EXECUTABLE(unrolledForExample unrolledForExample.cpp) ADD_CUSTOM_COMMAND( COMMAND unrolledForExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/unrolledForExample.out OUTPUT unrolledForExample.out ) ADD_CUSTOM_TARGET( RunAlgorithmsExamples ALL DEPENDS SortingExample.out SortingExample2.out SortingExample3.out ParallelForExample.out reduceArrayExample.out reduceWithArgumentArrayExample.out unrolledForExample.out staticForExample.out ) Documentation/Examples/Algorithms/Segments/CMakeLists.txt 0 → 100644 +39 −0 Original line number Diff line number Diff line set( COMMON_EXAMPLES SegmentsExample_General SegmentsPrintingExample-1 SegmentsPrintingExample-2 SegmentsExample_CSR_constructor_1 SegmentsExample_CSR_constructor_2 SegmentsExample_CSR_getSerializationType SegmentsExample_CSR_getSegmentsType SegmentsExample_CSR_setSegmentsSizes SegmentsExample_CSR_getSegmentView SegmentsExample_CSR_forElements SegmentsExample_CSR_forSegments SegmentsExample_CSR_sequentialForSegments SegmentsExample_CSR_reduceSegments SegmentsExample_forElements SegmentsExample_forSegments-1 SegmentsExample_forSegments-2 SegmentsExample_reduceSegments ) if( BUILD_CUDA ) foreach( target IN ITEMS ${COMMON_EXAMPLES} ) cuda_add_executable( ${target}-cuda ${target}.cu OPTIONS ) add_custom_command( COMMAND ${target}-cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/${target}.out OUTPUT ${target}.out ) set( CUDA_OUTPUTS ${CUDA_OUTPUTS} ${target}.out ) endforeach() else() foreach( target IN ITEMS ${COMMON_EXAMPLES} ) add_executable( ${target} ${target}.cpp ) add_custom_command( COMMAND ${target} > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/${target}.out OUTPUT ${target}.out ) set( HOST_OUTPUTS ${HOST_OUTPUTS} ${target}.out ) endforeach() endif() IF( BUILD_CUDA ) ADD_CUSTOM_TARGET( RunSegmentsExamples-cuda ALL DEPENDS ${CUDA_OUTPUTS} ) ELSE() ADD_CUSTOM_TARGET( RunSegmentsExamples ALL DEPENDS ${HOST_OUTPUTS} ) ENDIF() No newline at end of file Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_constructor_1.cpp 0 → 100644 +51 −0 Original line number Diff line number Diff line #include <iostream> #include <functional> #include <TNL/Containers/Vector.h> #include <TNL/Algorithms/Segments/CSR.h> #include <TNL/Devices/Host.h> #include <TNL/Devices/Cuda.h> template< typename Device > void SegmentsExample() { using SegmentsType = typename TNL::Algorithms::Segments::CSR< Device, int >; /*** * Create segments with given segments sizes. */ TNL::Containers::Vector< int, Device > segmentsSizes{ 1, 2, 3, 4, 5 }; SegmentsType segments( segmentsSizes ); std::cout << "Segments sizes are: " << segments << std::endl; /*** * Allocate array for the segments; */ TNL::Containers::Array< double, Device > data( segments.getStorageSize(), 0.0 ); /*** * Insert data into particular segments. */ auto data_view = data.getView(); segments.forAllElements( [=] __cuda_callable__ ( int segmentIdx, int localIdx, int globalIdx ) mutable { if( localIdx <= segmentIdx ) data_view[ globalIdx ] = segmentIdx; } ); /*** * Print the data managed by the segments. */ auto fetch = [=] __cuda_callable__ ( int globalIdx ) -> double { return data_view[ globalIdx ]; }; printSegments( segments, fetch, std::cout ); } int main( int argc, char* argv[] ) { std::cout << "Example of CSR segments on host: " << std::endl; SegmentsExample< TNL::Devices::Host >(); #ifdef HAVE_CUDA std::cout << "Example of CSR segments on CUDA GPU: " << std::endl; SegmentsExample< TNL::Devices::Cuda >(); #endif return EXIT_SUCCESS; } Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_constructor_1.cu 0 → 120000 +1 −0 Original line number Diff line number Diff line SegmentsExample_CSR_constructor_1.cpp No newline at end of file Loading
CMakeLists.txt +31 −24 Original line number Diff line number Diff line Loading @@ -309,30 +309,34 @@ if( ${WITH_GMP} ) endif() endif() #if( BUILD_MPI ) # FIND_PATH( PETSC_INCLUDE_DIR petsc.h # /usr/include/petsc # ${PETSC_DIR}/${PETSC_ARCH}/include # ${PETSC_DIR}/include # DOC "PETSC headers." # ) # if( ${PETSC_INCLUDE_DIR} STREQUAL "PETSC_INCLUDE_DIR-NOTFOUND" ) # message( "PETSC not found." ) # else() # message( "PETSC headers found -- ${PETSC_INCLUDE_DIR}" ) # FIND_LIBRARY(PETSC_LIBRARY petsc # ${PETSC_INCLUDE_DIR}/../lib # /usr/local/lib # /usr/lib) # if( PETSC_LIBRARY ) # #string( REPLACE ";" " " MPI_LIBRARIES "${MPI_CXX_LIBRARIES}" ) # #set( PETSC_LIBRARY "${MPI_LIBRARIES} ${PETSC_LIBRARY}") # message( "PETSC library found -- ${PETSC_LIBRARY}") #### # Test for PETSc if( BUILD_MPI ) FIND_PATH( PETSC_INCLUDE_DIR petsc.h /usr/include/petsc ${PETSC_DIR}/${PETSC_ARCH}/include ${PETSC_DIR}/include DOC "PETSC headers." ) if( ${PETSC_INCLUDE_DIR} STREQUAL "PETSC_INCLUDE_DIR-NOTFOUND" ) message( "PETSC not found." ) else() message( "PETSC headers found -- ${PETSC_INCLUDE_DIR}" ) FIND_LIBRARY(PETSC_LIBRARY petsc ${PETSC_INCLUDE_DIR}/../lib /usr/local/lib /usr/lib) if( PETSC_LIBRARY ) #string( REPLACE ";" " " MPI_LIBRARIES "${MPI_CXX_LIBRARIES}" ) #set( PETSC_LIBRARY "${MPI_LIBRARIES} ${PETSC_LIBRARY}") message( "PETSC library found -- ${PETSC_LIBRARY}") #list( GET MPI_CXX_INCLUDE_PATH 0 MPI_CXX_PATH ) #set(PETSC_CXX_FLAGS "-DHAVE_PETSC -I${PETSC_INCLUDE_DIR} -DHAVE_MPI -I${MPI_CXX_PATH}") # endif() # endif() #endif() set(PETSC_CXX_FLAGS -DHAVE_PETSC -I${PETSC_INCLUDE_DIR}) set(PETSC_LINKER_FLAGS ${PETSC_LIBRARY}) endif() endif() endif() # configure build paths set( CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/bin ) Loading Loading @@ -413,6 +417,9 @@ message( " CMAKE_SHARED_LINKER_FLAGS_RELEASE = ${CMAKE_SHARED_LINKER_FLAGS_REL message( " CUDA_NVCC_FLAGS = ${CUDA_NVCC_FLAGS}" ) message( " CUDA_SAMPLES_FLAGS = ${CUDA_SAMPLES_FLAGS}" ) message( " GMP_LIBRARIES = ${GMP_LIBRARIES}" ) message( " PETSC_CXX_FLAGS = ${PETSC_CXX_FLAGS}" ) message( " PETSC_LINKER_FLAGS = ${PETSC_LINKER_FLAGS}" ) if( MPI_CXX_FOUND AND ${WITH_MPI} ) message( " MPI_CXX_COMPILE_OPTIONS = ${MPI_CXX_COMPILE_OPTIONS}" ) message( " MPI_CXX_COMPILE_DEFINITIONS = ${MPI_CXX_COMPILE_DEFINITIONS}" ) Loading
Documentation/Examples/Algorithms/CMakeLists.txt +30 −51 Original line number Diff line number Diff line IF( BUILD_CUDA ) CUDA_ADD_EXECUTABLE( SortingExampleCuda SortingExample.cu) ADD_CUSTOM_COMMAND( COMMAND SortingExampleCuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample.out OUTPUT SortingExample.out ) CUDA_ADD_EXECUTABLE( SortingExample2Cuda SortingExample2.cu) ADD_CUSTOM_COMMAND( COMMAND SortingExample2Cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample2.out OUTPUT SortingExample2.out ) CUDA_ADD_EXECUTABLE( SortingExample3Cuda SortingExample3.cu) ADD_CUSTOM_COMMAND( COMMAND SortingExample3Cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample3.out OUTPUT SortingExample3.out ) CUDA_ADD_EXECUTABLE(ParallelForExampleCuda ParallelForExample.cu) ADD_CUSTOM_COMMAND( COMMAND ParallelForExampleCuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/ParallelForExample.out OUTPUT ParallelForExample.out ) ADD_SUBDIRECTORY( Segments ) set( COMMON_EXAMPLES SortingExample SortingExample2 SortingExample3 ParallelForExample SequentialForExample ) CUDA_ADD_EXECUTABLE(reduceArrayExampleCuda reduceArrayExample.cu) ADD_CUSTOM_COMMAND( COMMAND reduceArrayExampleCuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/reduceArrayExample.out OUTPUT reduceArrayExample.out ) set( HOST_EXAMPLES staticForExample unrolledForExample ) if( BUILD_CUDA ) foreach( target IN ITEMS ${COMMON_EXAMPLES} ) cuda_add_executable( ${target}-cuda ${target}.cu OPTIONS ) add_custom_command( COMMAND ${target}-cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/${target}.out OUTPUT ${target}.out ) set( CUDA_OUTPUTS ${CUDA_OUTPUTS} ${target}.out ) endforeach() else() foreach( target IN ITEMS ${COMMON_EXAMPLES} ${HOST_EXAMPLES}) add_executable( ${target} ${target}.cpp ) add_custom_command( COMMAND ${target} > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/${target}.out OUTPUT ${target}.out ) set( HOST_OUTPUTS ${HOST_OUTPUTS} ${target}.out ) endforeach() endif() CUDA_ADD_EXECUTABLE(reduceWithArgumentArrayExampleCuda reduceWithArgumentArrayExample.cu) ADD_CUSTOM_COMMAND( COMMAND reduceWithArgumentArrayExampleCuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/reduceWithArgumentArrayExample.out OUTPUT reduceWithArgumentArrayExample.out ) IF( BUILD_CUDA ) ADD_CUSTOM_TARGET( RunAlgorithmsExamples-cuda ALL DEPENDS ${CUDA_OUTPUTS} ) ELSE() ADD_EXECUTABLE( SortingExample SortingExample.cpp) ADD_CUSTOM_COMMAND( COMMAND SortingExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample.out OUTPUT SortingExample.out ) ADD_EXECUTABLE( SortingExample2 SortingExample2.cpp) ADD_CUSTOM_COMMAND( COMMAND SortingExample2 > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample2.out OUTPUT SortingExample2.out ) ADD_EXECUTABLE( SortingExample3 SortingExample3.cpp) ADD_CUSTOM_COMMAND( COMMAND SortingExample3 > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample3.out OUTPUT SortingExample3.out ) ADD_EXECUTABLE(ParallelForExample ParallelForExample.cpp) ADD_CUSTOM_COMMAND( COMMAND ParallelForExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/ParallelForExample.out OUTPUT ParallelForExample.out ) ADD_EXECUTABLE(reduceArrayExample reduceArrayExample.cpp) ADD_CUSTOM_COMMAND( COMMAND reduceArrayExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/reduceArrayExample.out OUTPUT reduceArrayExample.out ) ADD_EXECUTABLE(reduceWithArgumentArrayExample reduceWithArgumentArrayExample.cpp) ADD_CUSTOM_COMMAND( COMMAND reduceWithArgumentArrayExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/reduceWithArgumentArrayExample.out OUTPUT reduceWithArgumentArrayExample.out ) ADD_CUSTOM_TARGET( RunAlgorithmsExamples ALL DEPENDS ${HOST_OUTPUTS} ) ENDIF() No newline at end of file ADD_EXECUTABLE(staticForExample staticForExample.cpp) ADD_CUSTOM_COMMAND( COMMAND staticForExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/staticForExample.out OUTPUT staticForExample.out ) ADD_EXECUTABLE(unrolledForExample unrolledForExample.cpp) ADD_CUSTOM_COMMAND( COMMAND unrolledForExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/unrolledForExample.out OUTPUT unrolledForExample.out ) ADD_CUSTOM_TARGET( RunAlgorithmsExamples ALL DEPENDS SortingExample.out SortingExample2.out SortingExample3.out ParallelForExample.out reduceArrayExample.out reduceWithArgumentArrayExample.out unrolledForExample.out staticForExample.out )
Documentation/Examples/Algorithms/Segments/CMakeLists.txt 0 → 100644 +39 −0 Original line number Diff line number Diff line set( COMMON_EXAMPLES SegmentsExample_General SegmentsPrintingExample-1 SegmentsPrintingExample-2 SegmentsExample_CSR_constructor_1 SegmentsExample_CSR_constructor_2 SegmentsExample_CSR_getSerializationType SegmentsExample_CSR_getSegmentsType SegmentsExample_CSR_setSegmentsSizes SegmentsExample_CSR_getSegmentView SegmentsExample_CSR_forElements SegmentsExample_CSR_forSegments SegmentsExample_CSR_sequentialForSegments SegmentsExample_CSR_reduceSegments SegmentsExample_forElements SegmentsExample_forSegments-1 SegmentsExample_forSegments-2 SegmentsExample_reduceSegments ) if( BUILD_CUDA ) foreach( target IN ITEMS ${COMMON_EXAMPLES} ) cuda_add_executable( ${target}-cuda ${target}.cu OPTIONS ) add_custom_command( COMMAND ${target}-cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/${target}.out OUTPUT ${target}.out ) set( CUDA_OUTPUTS ${CUDA_OUTPUTS} ${target}.out ) endforeach() else() foreach( target IN ITEMS ${COMMON_EXAMPLES} ) add_executable( ${target} ${target}.cpp ) add_custom_command( COMMAND ${target} > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/${target}.out OUTPUT ${target}.out ) set( HOST_OUTPUTS ${HOST_OUTPUTS} ${target}.out ) endforeach() endif() IF( BUILD_CUDA ) ADD_CUSTOM_TARGET( RunSegmentsExamples-cuda ALL DEPENDS ${CUDA_OUTPUTS} ) ELSE() ADD_CUSTOM_TARGET( RunSegmentsExamples ALL DEPENDS ${HOST_OUTPUTS} ) ENDIF() No newline at end of file
Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_constructor_1.cpp 0 → 100644 +51 −0 Original line number Diff line number Diff line #include <iostream> #include <functional> #include <TNL/Containers/Vector.h> #include <TNL/Algorithms/Segments/CSR.h> #include <TNL/Devices/Host.h> #include <TNL/Devices/Cuda.h> template< typename Device > void SegmentsExample() { using SegmentsType = typename TNL::Algorithms::Segments::CSR< Device, int >; /*** * Create segments with given segments sizes. */ TNL::Containers::Vector< int, Device > segmentsSizes{ 1, 2, 3, 4, 5 }; SegmentsType segments( segmentsSizes ); std::cout << "Segments sizes are: " << segments << std::endl; /*** * Allocate array for the segments; */ TNL::Containers::Array< double, Device > data( segments.getStorageSize(), 0.0 ); /*** * Insert data into particular segments. */ auto data_view = data.getView(); segments.forAllElements( [=] __cuda_callable__ ( int segmentIdx, int localIdx, int globalIdx ) mutable { if( localIdx <= segmentIdx ) data_view[ globalIdx ] = segmentIdx; } ); /*** * Print the data managed by the segments. */ auto fetch = [=] __cuda_callable__ ( int globalIdx ) -> double { return data_view[ globalIdx ]; }; printSegments( segments, fetch, std::cout ); } int main( int argc, char* argv[] ) { std::cout << "Example of CSR segments on host: " << std::endl; SegmentsExample< TNL::Devices::Host >(); #ifdef HAVE_CUDA std::cout << "Example of CSR segments on CUDA GPU: " << std::endl; SegmentsExample< TNL::Devices::Cuda >(); #endif return EXIT_SUCCESS; }
Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_constructor_1.cu 0 → 120000 +1 −0 Original line number Diff line number Diff line SegmentsExample_CSR_constructor_1.cpp No newline at end of file