Loading Documentation/Examples/Matrices/CMakeLists.txt +11 −4 Original line number Diff line number Diff line Loading @@ -64,6 +64,11 @@ IF( BUILD_CUDA ) ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_forAllRows.out OUTPUT DenseMatrixExample_forAllRows.out ) CUDA_ADD_EXECUTABLE( DenseMatrixViewExample_constructor_cuda DenseMatrixViewExample_constructor.cu ) ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_constructor_cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_constructor.out OUTPUT DenseMatrixViewExample_constructor.out ) ELSE() ADD_EXECUTABLE( DenseMatrixExample_Constructor_init_list DenseMatrixExample_Constructor_init_list.cpp ) ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_Constructor_init_list > Loading Loading @@ -130,11 +135,12 @@ ELSE() ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_forAllRows.out OUTPUT DenseMatrixExample_forAllRows.out ) ENDIF() ADD_EXECUTABLE( DenseMatrixViewExample_constructor DenseMatrixViewExample_constructor.cpp ) ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_constructor > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_constructor.out OUTPUT DenseMatrixViewExample_constructor.out ) ENDIF() ADD_CUSTOM_TARGET( RunMatricesExamples ALL DEPENDS DenseMatrixExample_Constructor_init_list.out Loading @@ -150,5 +156,6 @@ ADD_CUSTOM_TARGET( RunMatricesExamples ALL DEPENDS DenseMatrixExample_allRowsReduction.out DenseMatrixExample_forRows.out DenseMatrixExample_forAllRows.out DenseMatrixViewExample_constructor.out ) Documentation/Examples/Matrices/DenseMatrixExample_setElement.cpp +17 −3 Original line number Diff line number Diff line #include <iostream> #include <TNL/Algorithms/ParallelFor.h> #include <TNL/Matrices/DenseMatrix.h> #include <TNL/Devices/Host.h> #include <TNL/Pointers/SharedPointer.h> #include <TNL/Pointers/SmartPointersRegister.h> template< typename Device > void setElements() { TNL::Matrices::DenseMatrix< double, Device > matrix( 5, 5 ); TNL::Pointers::SharedPointer< TNL::Matrices::DenseMatrix< double, Device > > matrix( 5, 5 ); for( int i = 0; i < 5; i++ ) matrix.setElement( i, i, i ); matrix->setElement( i, i, i ); std::cout << "Matrix set from the host:" << std::endl; std::cout << *matrix << std::endl; auto f = [=] __cuda_callable__ ( int i ) mutable { matrix->setElement( i, i, -i ); }; TNL::Pointers::synchronizeSmartPointersOnDevice< Device >(); TNL::Algorithms::ParallelFor< Device >::exec( 0, 5, f ); std::cout << "Matrix set from its native device:" << std::endl; std::cout << *matrix << std::endl; std::cout << matrix << std::endl; } int main( int argc, char* argv[] ) Loading Documentation/Examples/Matrices/DenseMatrixViewExample_constructor.cpp 0 → 100644 +39 −0 Original line number Diff line number Diff line #include <iostream> #include <TNL/Algorithms/ParallelFor.h> #include <TNL/Matrices/DenseMatrix.h> #include <TNL/Devices/Host.h> #include <TNL/Devices/Cuda.h> template< typename Device > void createMatrixView() { TNL::Containers::Vector< double, Device > values { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 }; TNL::Matrices::DenseMatrixView< double, Device, int, TNL::Containers::Segments::RowMajorOrder > matrix( 5, 5, values.getView() ); /*** * We need a matrix view to pass the matrix to lambda function even on CUDA device. */ /*auto matrixView = matrix.getView(); auto f = [=] __cuda_callable__ ( int rowIdx ) mutable { auto row = matrixView.getRow( rowIdx ); row.setElement( rowIdx, 10* ( rowIdx + 1 ) ); }; TNL::Algorithms::ParallelFor< Device >::exec( 0, matrix.getRows(), f ); std::cout << matrix << std::endl;*/ } int main( int argc, char* argv[] ) { std::cout << "Creating matrix view on host: " << std::endl; createMatrixView< TNL::Devices::Host >(); #ifdef HAVE_CUDA std::cout << "Creating matrix view on CUDA device: " << std::endl; createMatrixView< TNL::Devices::Cuda >(); #endif } Documentation/Examples/Matrices/DenseMatrixViewExample_constructor.cu 0 → 120000 +1 −0 Original line number Diff line number Diff line DenseMatrixViewExample_constructor.cpp No newline at end of file src/TNL/Containers/Segments/BiEllpack.h +9 −9 Original line number Diff line number Diff line Loading @@ -22,7 +22,7 @@ namespace TNL { template< typename Device, typename Index, typename IndexAllocator = typename Allocators::Default< Device >::template Allocator< Index >, bool RowMajorOrder = std::is_same< Device, Devices::Host >::value, ElementsOrganization Organization = Containers::Segments::DefaultElementsOrganization< Device >::getOrganization(), int WarpSize = 32 > class BiEllpack { Loading @@ -31,12 +31,12 @@ class BiEllpack using DeviceType = Device; using IndexType = std::remove_const_t< Index >; using OffsetsHolder = Containers::Vector< Index, DeviceType, IndexType, IndexAllocator >; static constexpr bool getRowMajorOrder() { return RowMajorOrder; } using ViewType = BiEllpackView< Device, Index, RowMajorOrder >; static constexpr bool getOrganization() { return Organization; } using ViewType = BiEllpackView< Device, Index, Organization >; template< typename Device_, typename Index_ > using ViewTemplate = BiEllpackView< Device_, Index_, RowMajorOrder >; using ConstViewType = BiEllpackView< Device, std::add_const_t< IndexType >, RowMajorOrder >; using SegmentViewType = BiEllpackSegmentView< IndexType, RowMajorOrder >; using ViewTemplate = BiEllpackView< Device_, Index_, Organization >; using ConstViewType = BiEllpackView< Device, std::add_const_t< IndexType >, Organization >; using SegmentViewType = BiEllpackSegmentView< IndexType, Organization >; BiEllpack() = default; Loading Loading @@ -109,8 +109,8 @@ class BiEllpack BiEllpack& operator=( const BiEllpack& source ) = default; template< typename Device_, typename Index_, typename IndexAllocator_, bool RowMajorOrder_ > BiEllpack& operator=( const BiEllpack< Device_, Index_, IndexAllocator_, RowMajorOrder_, WarpSize >& source ); template< typename Device_, typename Index_, typename IndexAllocator_, ElementsOrganization Organization_ > BiEllpack& operator=( const BiEllpack< Device_, Index_, IndexAllocator_, Organization_, WarpSize >& source ); void save( File& file ) const; Loading Loading @@ -163,7 +163,7 @@ class BiEllpack return 0; }; template< typename Device_, typename Index_, typename IndexAllocator_, bool RowMajorOrder_, int WarpSize_ > template< typename Device_, typename Index_, typename IndexAllocator_, ElementsOrganization Organization_, int WarpSize_ > friend class BiEllpack; }; Loading Loading
Documentation/Examples/Matrices/CMakeLists.txt +11 −4 Original line number Diff line number Diff line Loading @@ -64,6 +64,11 @@ IF( BUILD_CUDA ) ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_forAllRows.out OUTPUT DenseMatrixExample_forAllRows.out ) CUDA_ADD_EXECUTABLE( DenseMatrixViewExample_constructor_cuda DenseMatrixViewExample_constructor.cu ) ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_constructor_cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_constructor.out OUTPUT DenseMatrixViewExample_constructor.out ) ELSE() ADD_EXECUTABLE( DenseMatrixExample_Constructor_init_list DenseMatrixExample_Constructor_init_list.cpp ) ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_Constructor_init_list > Loading Loading @@ -130,11 +135,12 @@ ELSE() ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_forAllRows.out OUTPUT DenseMatrixExample_forAllRows.out ) ENDIF() ADD_EXECUTABLE( DenseMatrixViewExample_constructor DenseMatrixViewExample_constructor.cpp ) ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_constructor > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_constructor.out OUTPUT DenseMatrixViewExample_constructor.out ) ENDIF() ADD_CUSTOM_TARGET( RunMatricesExamples ALL DEPENDS DenseMatrixExample_Constructor_init_list.out Loading @@ -150,5 +156,6 @@ ADD_CUSTOM_TARGET( RunMatricesExamples ALL DEPENDS DenseMatrixExample_allRowsReduction.out DenseMatrixExample_forRows.out DenseMatrixExample_forAllRows.out DenseMatrixViewExample_constructor.out )
Documentation/Examples/Matrices/DenseMatrixExample_setElement.cpp +17 −3 Original line number Diff line number Diff line #include <iostream> #include <TNL/Algorithms/ParallelFor.h> #include <TNL/Matrices/DenseMatrix.h> #include <TNL/Devices/Host.h> #include <TNL/Pointers/SharedPointer.h> #include <TNL/Pointers/SmartPointersRegister.h> template< typename Device > void setElements() { TNL::Matrices::DenseMatrix< double, Device > matrix( 5, 5 ); TNL::Pointers::SharedPointer< TNL::Matrices::DenseMatrix< double, Device > > matrix( 5, 5 ); for( int i = 0; i < 5; i++ ) matrix.setElement( i, i, i ); matrix->setElement( i, i, i ); std::cout << "Matrix set from the host:" << std::endl; std::cout << *matrix << std::endl; auto f = [=] __cuda_callable__ ( int i ) mutable { matrix->setElement( i, i, -i ); }; TNL::Pointers::synchronizeSmartPointersOnDevice< Device >(); TNL::Algorithms::ParallelFor< Device >::exec( 0, 5, f ); std::cout << "Matrix set from its native device:" << std::endl; std::cout << *matrix << std::endl; std::cout << matrix << std::endl; } int main( int argc, char* argv[] ) Loading
Documentation/Examples/Matrices/DenseMatrixViewExample_constructor.cpp 0 → 100644 +39 −0 Original line number Diff line number Diff line #include <iostream> #include <TNL/Algorithms/ParallelFor.h> #include <TNL/Matrices/DenseMatrix.h> #include <TNL/Devices/Host.h> #include <TNL/Devices/Cuda.h> template< typename Device > void createMatrixView() { TNL::Containers::Vector< double, Device > values { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 }; TNL::Matrices::DenseMatrixView< double, Device, int, TNL::Containers::Segments::RowMajorOrder > matrix( 5, 5, values.getView() ); /*** * We need a matrix view to pass the matrix to lambda function even on CUDA device. */ /*auto matrixView = matrix.getView(); auto f = [=] __cuda_callable__ ( int rowIdx ) mutable { auto row = matrixView.getRow( rowIdx ); row.setElement( rowIdx, 10* ( rowIdx + 1 ) ); }; TNL::Algorithms::ParallelFor< Device >::exec( 0, matrix.getRows(), f ); std::cout << matrix << std::endl;*/ } int main( int argc, char* argv[] ) { std::cout << "Creating matrix view on host: " << std::endl; createMatrixView< TNL::Devices::Host >(); #ifdef HAVE_CUDA std::cout << "Creating matrix view on CUDA device: " << std::endl; createMatrixView< TNL::Devices::Cuda >(); #endif }
Documentation/Examples/Matrices/DenseMatrixViewExample_constructor.cu 0 → 120000 +1 −0 Original line number Diff line number Diff line DenseMatrixViewExample_constructor.cpp No newline at end of file
src/TNL/Containers/Segments/BiEllpack.h +9 −9 Original line number Diff line number Diff line Loading @@ -22,7 +22,7 @@ namespace TNL { template< typename Device, typename Index, typename IndexAllocator = typename Allocators::Default< Device >::template Allocator< Index >, bool RowMajorOrder = std::is_same< Device, Devices::Host >::value, ElementsOrganization Organization = Containers::Segments::DefaultElementsOrganization< Device >::getOrganization(), int WarpSize = 32 > class BiEllpack { Loading @@ -31,12 +31,12 @@ class BiEllpack using DeviceType = Device; using IndexType = std::remove_const_t< Index >; using OffsetsHolder = Containers::Vector< Index, DeviceType, IndexType, IndexAllocator >; static constexpr bool getRowMajorOrder() { return RowMajorOrder; } using ViewType = BiEllpackView< Device, Index, RowMajorOrder >; static constexpr bool getOrganization() { return Organization; } using ViewType = BiEllpackView< Device, Index, Organization >; template< typename Device_, typename Index_ > using ViewTemplate = BiEllpackView< Device_, Index_, RowMajorOrder >; using ConstViewType = BiEllpackView< Device, std::add_const_t< IndexType >, RowMajorOrder >; using SegmentViewType = BiEllpackSegmentView< IndexType, RowMajorOrder >; using ViewTemplate = BiEllpackView< Device_, Index_, Organization >; using ConstViewType = BiEllpackView< Device, std::add_const_t< IndexType >, Organization >; using SegmentViewType = BiEllpackSegmentView< IndexType, Organization >; BiEllpack() = default; Loading Loading @@ -109,8 +109,8 @@ class BiEllpack BiEllpack& operator=( const BiEllpack& source ) = default; template< typename Device_, typename Index_, typename IndexAllocator_, bool RowMajorOrder_ > BiEllpack& operator=( const BiEllpack< Device_, Index_, IndexAllocator_, RowMajorOrder_, WarpSize >& source ); template< typename Device_, typename Index_, typename IndexAllocator_, ElementsOrganization Organization_ > BiEllpack& operator=( const BiEllpack< Device_, Index_, IndexAllocator_, Organization_, WarpSize >& source ); void save( File& file ) const; Loading Loading @@ -163,7 +163,7 @@ class BiEllpack return 0; }; template< typename Device_, typename Index_, typename IndexAllocator_, bool RowMajorOrder_, int WarpSize_ > template< typename Device_, typename Index_, typename IndexAllocator_, ElementsOrganization Organization_, int WarpSize_ > friend class BiEllpack; }; Loading