Loading Documentation/Examples/CMakeLists.txt +4 −2 Original line number Diff line number Diff line Loading @@ -3,7 +3,9 @@ ADD_SUBDIRECTORY( Containers ) ADD_SUBDIRECTORY( Pointers ) ADD_SUBDIRECTORY( Matrices ) set( COMMON_EXAMPLES set( COMMON_EXAMPLES ) set( CUDA_EXAMPLES FileExampleCuda ) Loading @@ -24,7 +26,7 @@ set( HOST_EXAMPLES TimerExampleLogger ) if( BUILD_CUDA ) foreach( target IN ITEMS ${COMMON_EXAMPLES} ) foreach( target IN ITEMS ${COMMON_EXAMPLES} ${CUDA_EXAMPLES} ) cuda_add_executable( ${target}-cuda ${target}.cu OPTIONS ) add_custom_command( COMMAND ${target}-cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/${target}.out OUTPUT ${target}.out ) set( CUDA_OUTPUTS ${CUDA_OUTPUTS} ${target}.out ) Loading Documentation/Tutorials/Pointers/CMakeLists.txt +4 −4 Original line number Diff line number Diff line IF( BUILD_CUDA ) CUDA_ADD_EXECUTABLE( UniquePointerExample UniquePointerExample.cu ) ADD_CUSTOM_COMMAND( COMMAND UniquePointerExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/UniquePointerExample.out OUTPUT UniquePointerExample.out ) CUDA_ADD_EXECUTABLE( UniquePointerExample_ UniquePointerExample.cu ) ADD_CUSTOM_COMMAND( COMMAND UniquePointerExample_ > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/UniquePointerExample.out OUTPUT UniquePointerExample.out ) CUDA_ADD_EXECUTABLE( SharedPointerExample SharedPointerExample.cu ) ADD_CUSTOM_COMMAND( COMMAND SharedPointerExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SharedPointerExample.out OUTPUT SharedPointerExample.out ) CUDA_ADD_EXECUTABLE( DevicePointerExample DevicePointerExample.cu ) ADD_CUSTOM_COMMAND( COMMAND DevicePointerExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DevicePointerExample.out OUTPUT DevicePointerExample.out ) ELSE() ADD_EXECUTABLE( UniquePointerExample UniquePointerExample.cpp ) ADD_CUSTOM_COMMAND( COMMAND UniquePointerExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/UniquePointerExample.out OUTPUT UniquePointerExample.out ) ADD_EXECUTABLE( UniquePointerExample_ UniquePointerExample.cpp ) ADD_CUSTOM_COMMAND( COMMAND UniquePointerExample_ > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/UniquePointerExample.out OUTPUT UniquePointerExample.out ) ENDIF() ADD_EXECUTABLE( UniquePointerHostExample UniquePointerHostExample.cpp ) Loading src/Python/pytnl/tnl/SparseMatrix.h +1 −1 Original line number Diff line number Diff line Loading @@ -67,7 +67,7 @@ struct export_CSR< Segments, typename TNL::enable_if_type< decltype(Segments{}.g static void e( Scope & s ) { s .def("getOffsets", []( const Segments& segments ) -> const typename Segments::OffsetsHolder& { .def("getOffsets", []( const Segments& segments ) -> const typename Segments::OffsetsContainer& { return segments.getOffsets(); }, py::return_value_policy::reference_internal) ; Loading src/TNL/Algorithms/Segments/BiEllpackView.h +2 −2 Original line number Diff line number Diff line Loading @@ -15,7 +15,7 @@ #include <TNL/Containers/Vector.h> #include <TNL/Algorithms/Segments/ElementsOrganization.h> #include <TNL/Algorithms/Segments/BiEllpackSegmentView.h> #include <TNL/Algorithms/Segments/details/BiEllpack.h> #include <TNL/Algorithms/Segments/detail/BiEllpack.h> #include <TNL/Algorithms/Segments/SegmentsPrinting.h> namespace TNL { Loading Loading @@ -205,7 +205,7 @@ class BiEllpackView Real_ zero ); template< typename Index_, typename Fetch_, int BlockDim_, int WarpSize_, bool B_ > friend struct details::BiEllpackreduceSegmentsDispatcher; friend struct detail::BiEllpackreduceSegmentsDispatcher; #endif }; Loading src/TNL/Algorithms/Segments/BiEllpackView.hpp +1 −1 Original line number Diff line number Diff line Loading @@ -425,7 +425,7 @@ reduceSegments( IndexType first, IndexType last, Fetch& fetch, const Reduction& dim3 cudaGridSize = Cuda::getMaxGridSize(); if( gridIdx == cudaGrids - 1 ) cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize(); details::BiEllpackreduceSegmentsKernel< ViewType, IndexType, Fetch, Reduction, ResultKeeper, Real, BlockDim > detail::BiEllpackreduceSegmentsKernel< ViewType, IndexType, Fetch, Reduction, ResultKeeper, Real, BlockDim > <<< cudaGridSize, cudaBlockSize, sharedMemory >>> ( *this, gridIdx, first, last, fetch, reduction, keeper, zero ); cudaThreadSynchronize(); Loading Loading
Documentation/Examples/CMakeLists.txt +4 −2 Original line number Diff line number Diff line Loading @@ -3,7 +3,9 @@ ADD_SUBDIRECTORY( Containers ) ADD_SUBDIRECTORY( Pointers ) ADD_SUBDIRECTORY( Matrices ) set( COMMON_EXAMPLES set( COMMON_EXAMPLES ) set( CUDA_EXAMPLES FileExampleCuda ) Loading @@ -24,7 +26,7 @@ set( HOST_EXAMPLES TimerExampleLogger ) if( BUILD_CUDA ) foreach( target IN ITEMS ${COMMON_EXAMPLES} ) foreach( target IN ITEMS ${COMMON_EXAMPLES} ${CUDA_EXAMPLES} ) cuda_add_executable( ${target}-cuda ${target}.cu OPTIONS ) add_custom_command( COMMAND ${target}-cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/${target}.out OUTPUT ${target}.out ) set( CUDA_OUTPUTS ${CUDA_OUTPUTS} ${target}.out ) Loading
Documentation/Tutorials/Pointers/CMakeLists.txt +4 −4 Original line number Diff line number Diff line IF( BUILD_CUDA ) CUDA_ADD_EXECUTABLE( UniquePointerExample UniquePointerExample.cu ) ADD_CUSTOM_COMMAND( COMMAND UniquePointerExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/UniquePointerExample.out OUTPUT UniquePointerExample.out ) CUDA_ADD_EXECUTABLE( UniquePointerExample_ UniquePointerExample.cu ) ADD_CUSTOM_COMMAND( COMMAND UniquePointerExample_ > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/UniquePointerExample.out OUTPUT UniquePointerExample.out ) CUDA_ADD_EXECUTABLE( SharedPointerExample SharedPointerExample.cu ) ADD_CUSTOM_COMMAND( COMMAND SharedPointerExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SharedPointerExample.out OUTPUT SharedPointerExample.out ) CUDA_ADD_EXECUTABLE( DevicePointerExample DevicePointerExample.cu ) ADD_CUSTOM_COMMAND( COMMAND DevicePointerExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DevicePointerExample.out OUTPUT DevicePointerExample.out ) ELSE() ADD_EXECUTABLE( UniquePointerExample UniquePointerExample.cpp ) ADD_CUSTOM_COMMAND( COMMAND UniquePointerExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/UniquePointerExample.out OUTPUT UniquePointerExample.out ) ADD_EXECUTABLE( UniquePointerExample_ UniquePointerExample.cpp ) ADD_CUSTOM_COMMAND( COMMAND UniquePointerExample_ > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/UniquePointerExample.out OUTPUT UniquePointerExample.out ) ENDIF() ADD_EXECUTABLE( UniquePointerHostExample UniquePointerHostExample.cpp ) Loading
src/Python/pytnl/tnl/SparseMatrix.h +1 −1 Original line number Diff line number Diff line Loading @@ -67,7 +67,7 @@ struct export_CSR< Segments, typename TNL::enable_if_type< decltype(Segments{}.g static void e( Scope & s ) { s .def("getOffsets", []( const Segments& segments ) -> const typename Segments::OffsetsHolder& { .def("getOffsets", []( const Segments& segments ) -> const typename Segments::OffsetsContainer& { return segments.getOffsets(); }, py::return_value_policy::reference_internal) ; Loading
src/TNL/Algorithms/Segments/BiEllpackView.h +2 −2 Original line number Diff line number Diff line Loading @@ -15,7 +15,7 @@ #include <TNL/Containers/Vector.h> #include <TNL/Algorithms/Segments/ElementsOrganization.h> #include <TNL/Algorithms/Segments/BiEllpackSegmentView.h> #include <TNL/Algorithms/Segments/details/BiEllpack.h> #include <TNL/Algorithms/Segments/detail/BiEllpack.h> #include <TNL/Algorithms/Segments/SegmentsPrinting.h> namespace TNL { Loading Loading @@ -205,7 +205,7 @@ class BiEllpackView Real_ zero ); template< typename Index_, typename Fetch_, int BlockDim_, int WarpSize_, bool B_ > friend struct details::BiEllpackreduceSegmentsDispatcher; friend struct detail::BiEllpackreduceSegmentsDispatcher; #endif }; Loading
src/TNL/Algorithms/Segments/BiEllpackView.hpp +1 −1 Original line number Diff line number Diff line Loading @@ -425,7 +425,7 @@ reduceSegments( IndexType first, IndexType last, Fetch& fetch, const Reduction& dim3 cudaGridSize = Cuda::getMaxGridSize(); if( gridIdx == cudaGrids - 1 ) cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize(); details::BiEllpackreduceSegmentsKernel< ViewType, IndexType, Fetch, Reduction, ResultKeeper, Real, BlockDim > detail::BiEllpackreduceSegmentsKernel< ViewType, IndexType, Fetch, Reduction, ResultKeeper, Real, BlockDim > <<< cudaGridSize, cudaBlockSize, sharedMemory >>> ( *this, gridIdx, first, last, fetch, reduction, keeper, zero ); cudaThreadSynchronize(); Loading