Loading Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_forRows.cpp +14 −16 Original line number Diff line number Diff line #include <iostream> #include <TNL/Matrices/SparseMatrix.h> #include <TNL/Algorithms/ParallelFor.h> #include <TNL/Matrices/DenseMatrix.h> #include <TNL/Devices/Host.h> #include <TNL/Devices/Cuda.h> template< typename Device > void forRowsExample() { using MatrixType = TNL::Matrices::SparseMatrix< double, Device >; using RowViewType = typename MatrixType::RowViewType; MatrixType matrix( { 1, 2, 3, 4, 5, }, 5 ); using MatrixType = TNL::Matrices::DenseMatrix< double, Device >; using RowView = typename MatrixType::RowView; MatrixType matrix( 5, 5 ); auto f = [] __cuda_callable__ ( RowViewType& row ) mutable { for( int localIdx = 0; localIdx <= row.getRowIndex(); // This is important, some matrix formats may allocate more matrix elements localIdx++ ) // than we requested. These padding elements are processed here as well. // and so we cannot use row.getSize() { row.setValue( localIdx, row.getRowIndex() - localIdx + 1.0 ); row.setColumnIndex( localIdx, localIdx ); } auto f = [=] __cuda_callable__ ( RowView& row ) mutable { const int& rowIdx = row.getRowIndex(); row.setElement( rowIdx, 10 * ( rowIdx + 1 ) ); }; matrix.forAllRows( f ); /*** * Set the matrix elements. */ matrix.forAllRows( f ); std::cout << matrix << std::endl; } int main( int argc, char* argv[] ) { std::cout << "Creating matrix on host: " << std::endl; std::cout << "Getting matrix rows on host: " << std::endl; forRowsExample< TNL::Devices::Host >(); #ifdef HAVE_CUDA std::cout << "Creating matrix on CUDA device: " << std::endl; std::cout << "Getting matrix rows on CUDA device: " << std::endl; forRowsExample< TNL::Devices::Cuda >(); #endif } Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_forRows.cpp +14 −16 Original line number Diff line number Diff line #include <iostream> #include <TNL/Matrices/SparseMatrix.h> #include <TNL/Algorithms/ParallelFor.h> #include <TNL/Matrices/DenseMatrix.h> #include <TNL/Devices/Host.h> #include <TNL/Devices/Cuda.h> template< typename Device > void forRowsExample() { using MatrixType = TNL::Matrices::SparseMatrix< double, Device >; using RowViewType = typename MatrixType::RowViewType; MatrixType matrix( { 1, 2, 3, 4, 5, }, 5 ); using MatrixType = TNL::Matrices::DenseMatrix< double, Device >; using RowView = typename MatrixType::RowView; MatrixType matrix( 5, 5 ); auto view = matrix.getView(); auto f = [] __cuda_callable__ ( RowViewType& row ) mutable { for( int localIdx = 0; localIdx <= row.getRowIndex(); // This is important, some matrix formats may allocate more matrix elements localIdx++ ) // than we requested. These padding elements are processed here as well. // and so we cannot use row.getSize() { row.setValue( localIdx, row.getRowIndex() - localIdx + 1.0 ); row.setColumnIndex( localIdx, localIdx ); } auto f = [=] __cuda_callable__ ( RowView& row ) mutable { const int& rowIdx = row.getRowIndex(); row.setElement( rowIdx, 10 * ( rowIdx + 1 ) ); }; view.forAllRows( f ); /*** * Set the matrix elements. */ view.forAllRows( f ); std::cout << matrix << std::endl; } int main( int argc, char* argv[] ) { std::cout << "Creating matrix on host: " << std::endl; std::cout << "Getting matrix rows on host: " << std::endl; forRowsExample< TNL::Devices::Host >(); #ifdef HAVE_CUDA std::cout << "Creating matrix on CUDA device: " << std::endl; std::cout << "Getting matrix rows on CUDA device: " << std::endl; forRowsExample< TNL::Devices::Cuda >(); #endif } Documentation/Examples/Matrices/LambdaMatrix/CMakeLists.txt +1 −0 Original line number Diff line number Diff line Loading @@ -4,6 +4,7 @@ set( COMMON_EXAMPLES LambdaMatrixExample_getNonzeroElementsCount LambdaMatrixExample_Laplace LambdaMatrixExample_Laplace_2 LambdaMatrixExample_getRow LambdaMatrixExample_reduceRows LambdaMatrixExample_reduceAllRows LambdaMatrixExample_forElements Loading Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_forRows.cpp +55 −37 Original line number Diff line number Diff line Loading @@ -8,55 +8,73 @@ template< typename Device > void forRowsExample() { /** * Prepare lambda matrix of the following form: * * / 1 0 0 0 0 \ * | -2 1 -2 0 0 | * | 0 -2 1 -2 0 | * | 0 0 -2 1 -2 | * | 0 0 0 -2 1 | * \ 0 0 0 0 1 / /*** * Set matrix representing approximation of the Laplace operator on regular * grid using the finite difference method. */ int size = 5; auto rowLengths = [=] __cuda_callable__ ( const int rows, const int columns, const int rowIdx ) -> int { if( rowIdx == 0 || rowIdx == size - 1 ) const int gridSize( 4 ); const int matrixSize = gridSize * gridSize; auto rowLengths = [=] __cuda_callable__ ( const int rows, const int columns, const int rowIdx ) -> int { const int gridRow = rowIdx / gridSize; // coordinates in the numerical grid const int gridColumn = rowIdx % gridSize; if( gridRow == 0 || gridRow == gridSize - 1 || // boundary grid node gridColumn == 0 || gridColumn == gridSize - 1 ) return 1; return 3; return 5; }; auto matrixElements = [=] __cuda_callable__ ( const int rows, const int columns, const int rowIdx, const int localIdx, int& columnIdx, double& value) { if( rowIdx == 0 || rowIdx == size -1 ) const int gridRow = rowIdx / gridSize; // coordinates in the numerical grid const int gridColumn = rowIdx % gridSize; if( gridRow == 0 || gridRow == gridSize - 1 || // boundary grid node gridColumn == 0 || gridColumn == gridSize - 1 ) { columnIdx = rowIdx; value = 1.0; columnIdx = rowIdx; // diagonal element .... value = 1.0; // ... is set to 1 } else else // interior grid node { columnIdx = rowIdx + localIdx - 1; value = ( columnIdx == rowIdx ) ? -2.0 : 1.0; switch( localIdx ) // set diagonal element to 4 { // and the others to -1 case 0: columnIdx = rowIdx - gridSize; value = 1; break; case 1: columnIdx = rowIdx - 1; value = 1; break; case 2: columnIdx = rowIdx; value = -4; break; case 3: columnIdx = rowIdx + 1; value = 1; break; case 4: columnIdx = rowIdx + gridSize; value = 1; break; } } }; auto matrix = TNL::Matrices::LambdaMatrixFactory< double, Device, int >::create( matrixSize, matrixSize, matrixElements, rowLengths ); using MatrixType = decltype( matrix ); using MatrixType = decltype( TNL::Matrices::LambdaMatrixFactory< double, Device, int >::create( matrixElements, rowLengths ) ); MatrixType matrix( size, size, matrixElements, rowLengths ); /** * Use the `forRows` method to copy the matrix elements to a dense matrix. */ TNL::Matrices::DenseMatrix< double, Device, int > denseMatrix( size, size ); TNL::Matrices::DenseMatrix< double, Device > denseMatrix( matrixSize, matrixSize ); denseMatrix.setValue( 0.0 ); auto dense_view = denseMatrix.getView(); auto f = [=] __cuda_callable__ ( const typename MatrixType::RowViewType& row ) mutable { auto f = [=] __cuda_callable__ ( const typename MatrixType::RowView& row ) mutable { auto dense_row = dense_view.getRow( row.getRowIndex() ); for( int localIdx = 0; localIdx < row.getSize(); localIdx++ ) dense_row.setElement( row.getColumnIndex( localIdx ), row.getValue( localIdx ) ); }; matrix.forAllRows( f ); std::cout << "Lambda matrix looks as:" << std::endl << matrix << std::endl; std::cout << "Dense matrix looks as:" << std::endl << denseMatrix << std::endl; std::cout << "Laplace operator lambda matrix: " << std::endl << matrix << std::endl; std::cout << "Laplace operator dense matrix: " << std::endl << denseMatrix << std::endl; } int main( int argc, char* argv[] ) Loading Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_getRow.cpp 0 → 100644 +90 −0 Original line number Diff line number Diff line #include <iostream> #include <TNL/Matrices/LambdaMatrix.h> #include <TNL/Matrices/DenseMatrix.h> #include <TNL/Devices/Host.h> #include <TNL/Devices/Cuda.h> template< typename Device > void getRowExample() { /*** * Set matrix representing approximation of the Laplace operator on regular * grid using the finite difference method. */ const int gridSize( 4 ); const int matrixSize = gridSize * gridSize; auto rowLengths = [=] __cuda_callable__ ( const int rows, const int columns, const int rowIdx ) -> int { const int gridRow = rowIdx / gridSize; // coordinates in the numerical grid const int gridColumn = rowIdx % gridSize; if( gridRow == 0 || gridRow == gridSize - 1 || // boundary grid node gridColumn == 0 || gridColumn == gridSize - 1 ) return 1; return 5; }; auto matrixElements = [=] __cuda_callable__ ( const int rows, const int columns, const int rowIdx, const int localIdx, int& columnIdx, double& value) { const int gridRow = rowIdx / gridSize; // coordinates in the numerical grid const int gridColumn = rowIdx % gridSize; if( gridRow == 0 || gridRow == gridSize - 1 || // boundary grid node gridColumn == 0 || gridColumn == gridSize - 1 ) { columnIdx = rowIdx; // diagonal element .... value = 1.0; // ... is set to 1 } else // interior grid node { switch( localIdx ) // set diagonal element to 4 { // and the others to -1 case 0: columnIdx = rowIdx - gridSize; value = 1; break; case 1: columnIdx = rowIdx - 1; value = 1; break; case 2: columnIdx = rowIdx; value = -4; break; case 3: columnIdx = rowIdx + 1; value = 1; break; case 4: columnIdx = rowIdx + gridSize; value = 1; break; } } }; auto matrix = TNL::Matrices::LambdaMatrixFactory< double, Device, int >::create( matrixSize, matrixSize, matrixElements, rowLengths ); using MatrixType = decltype( matrix ); TNL::Matrices::DenseMatrix< double, Device > denseMatrix( matrixSize, matrixSize ); denseMatrix.setValue( 0.0 ); auto dense_view = denseMatrix.getView(); auto f = [=] __cuda_callable__ ( const int& rowIdx ) mutable { auto row = matrix.getRow( rowIdx ); auto dense_row = dense_view.getRow( rowIdx ); for( int localIdx = 0; localIdx < row.getSize(); localIdx++ ) dense_row.setElement( row.getColumnIndex( localIdx ), row.getValue( localIdx ) ); }; TNL::Algorithms::ParallelFor< Device >::exec( 0, matrixSize, f ); std::cout << "Laplace operator lambda matrix: " << std::endl << matrix << std::endl; std::cout << "Laplace operator dense matrix: " << std::endl << denseMatrix << std::endl; } int main( int argc, char* argv[] ) { std::cout << "Running example on CPU ... " << std::endl; getRowExample< TNL::Devices::Host >(); #ifdef HAVE_CUDA std::cout << "Running example on CUDA GPU ... " << std::endl; getRowExample< TNL::Devices::Cuda >(); #endif } Loading
Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_forRows.cpp +14 −16 Original line number Diff line number Diff line #include <iostream> #include <TNL/Matrices/SparseMatrix.h> #include <TNL/Algorithms/ParallelFor.h> #include <TNL/Matrices/DenseMatrix.h> #include <TNL/Devices/Host.h> #include <TNL/Devices/Cuda.h> template< typename Device > void forRowsExample() { using MatrixType = TNL::Matrices::SparseMatrix< double, Device >; using RowViewType = typename MatrixType::RowViewType; MatrixType matrix( { 1, 2, 3, 4, 5, }, 5 ); using MatrixType = TNL::Matrices::DenseMatrix< double, Device >; using RowView = typename MatrixType::RowView; MatrixType matrix( 5, 5 ); auto f = [] __cuda_callable__ ( RowViewType& row ) mutable { for( int localIdx = 0; localIdx <= row.getRowIndex(); // This is important, some matrix formats may allocate more matrix elements localIdx++ ) // than we requested. These padding elements are processed here as well. // and so we cannot use row.getSize() { row.setValue( localIdx, row.getRowIndex() - localIdx + 1.0 ); row.setColumnIndex( localIdx, localIdx ); } auto f = [=] __cuda_callable__ ( RowView& row ) mutable { const int& rowIdx = row.getRowIndex(); row.setElement( rowIdx, 10 * ( rowIdx + 1 ) ); }; matrix.forAllRows( f ); /*** * Set the matrix elements. */ matrix.forAllRows( f ); std::cout << matrix << std::endl; } int main( int argc, char* argv[] ) { std::cout << "Creating matrix on host: " << std::endl; std::cout << "Getting matrix rows on host: " << std::endl; forRowsExample< TNL::Devices::Host >(); #ifdef HAVE_CUDA std::cout << "Creating matrix on CUDA device: " << std::endl; std::cout << "Getting matrix rows on CUDA device: " << std::endl; forRowsExample< TNL::Devices::Cuda >(); #endif }
Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_forRows.cpp +14 −16 Original line number Diff line number Diff line #include <iostream> #include <TNL/Matrices/SparseMatrix.h> #include <TNL/Algorithms/ParallelFor.h> #include <TNL/Matrices/DenseMatrix.h> #include <TNL/Devices/Host.h> #include <TNL/Devices/Cuda.h> template< typename Device > void forRowsExample() { using MatrixType = TNL::Matrices::SparseMatrix< double, Device >; using RowViewType = typename MatrixType::RowViewType; MatrixType matrix( { 1, 2, 3, 4, 5, }, 5 ); using MatrixType = TNL::Matrices::DenseMatrix< double, Device >; using RowView = typename MatrixType::RowView; MatrixType matrix( 5, 5 ); auto view = matrix.getView(); auto f = [] __cuda_callable__ ( RowViewType& row ) mutable { for( int localIdx = 0; localIdx <= row.getRowIndex(); // This is important, some matrix formats may allocate more matrix elements localIdx++ ) // than we requested. These padding elements are processed here as well. // and so we cannot use row.getSize() { row.setValue( localIdx, row.getRowIndex() - localIdx + 1.0 ); row.setColumnIndex( localIdx, localIdx ); } auto f = [=] __cuda_callable__ ( RowView& row ) mutable { const int& rowIdx = row.getRowIndex(); row.setElement( rowIdx, 10 * ( rowIdx + 1 ) ); }; view.forAllRows( f ); /*** * Set the matrix elements. */ view.forAllRows( f ); std::cout << matrix << std::endl; } int main( int argc, char* argv[] ) { std::cout << "Creating matrix on host: " << std::endl; std::cout << "Getting matrix rows on host: " << std::endl; forRowsExample< TNL::Devices::Host >(); #ifdef HAVE_CUDA std::cout << "Creating matrix on CUDA device: " << std::endl; std::cout << "Getting matrix rows on CUDA device: " << std::endl; forRowsExample< TNL::Devices::Cuda >(); #endif }
Documentation/Examples/Matrices/LambdaMatrix/CMakeLists.txt +1 −0 Original line number Diff line number Diff line Loading @@ -4,6 +4,7 @@ set( COMMON_EXAMPLES LambdaMatrixExample_getNonzeroElementsCount LambdaMatrixExample_Laplace LambdaMatrixExample_Laplace_2 LambdaMatrixExample_getRow LambdaMatrixExample_reduceRows LambdaMatrixExample_reduceAllRows LambdaMatrixExample_forElements Loading
Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_forRows.cpp +55 −37 Original line number Diff line number Diff line Loading @@ -8,55 +8,73 @@ template< typename Device > void forRowsExample() { /** * Prepare lambda matrix of the following form: * * / 1 0 0 0 0 \ * | -2 1 -2 0 0 | * | 0 -2 1 -2 0 | * | 0 0 -2 1 -2 | * | 0 0 0 -2 1 | * \ 0 0 0 0 1 / /*** * Set matrix representing approximation of the Laplace operator on regular * grid using the finite difference method. */ int size = 5; auto rowLengths = [=] __cuda_callable__ ( const int rows, const int columns, const int rowIdx ) -> int { if( rowIdx == 0 || rowIdx == size - 1 ) const int gridSize( 4 ); const int matrixSize = gridSize * gridSize; auto rowLengths = [=] __cuda_callable__ ( const int rows, const int columns, const int rowIdx ) -> int { const int gridRow = rowIdx / gridSize; // coordinates in the numerical grid const int gridColumn = rowIdx % gridSize; if( gridRow == 0 || gridRow == gridSize - 1 || // boundary grid node gridColumn == 0 || gridColumn == gridSize - 1 ) return 1; return 3; return 5; }; auto matrixElements = [=] __cuda_callable__ ( const int rows, const int columns, const int rowIdx, const int localIdx, int& columnIdx, double& value) { if( rowIdx == 0 || rowIdx == size -1 ) const int gridRow = rowIdx / gridSize; // coordinates in the numerical grid const int gridColumn = rowIdx % gridSize; if( gridRow == 0 || gridRow == gridSize - 1 || // boundary grid node gridColumn == 0 || gridColumn == gridSize - 1 ) { columnIdx = rowIdx; value = 1.0; columnIdx = rowIdx; // diagonal element .... value = 1.0; // ... is set to 1 } else else // interior grid node { columnIdx = rowIdx + localIdx - 1; value = ( columnIdx == rowIdx ) ? -2.0 : 1.0; switch( localIdx ) // set diagonal element to 4 { // and the others to -1 case 0: columnIdx = rowIdx - gridSize; value = 1; break; case 1: columnIdx = rowIdx - 1; value = 1; break; case 2: columnIdx = rowIdx; value = -4; break; case 3: columnIdx = rowIdx + 1; value = 1; break; case 4: columnIdx = rowIdx + gridSize; value = 1; break; } } }; auto matrix = TNL::Matrices::LambdaMatrixFactory< double, Device, int >::create( matrixSize, matrixSize, matrixElements, rowLengths ); using MatrixType = decltype( matrix ); using MatrixType = decltype( TNL::Matrices::LambdaMatrixFactory< double, Device, int >::create( matrixElements, rowLengths ) ); MatrixType matrix( size, size, matrixElements, rowLengths ); /** * Use the `forRows` method to copy the matrix elements to a dense matrix. */ TNL::Matrices::DenseMatrix< double, Device, int > denseMatrix( size, size ); TNL::Matrices::DenseMatrix< double, Device > denseMatrix( matrixSize, matrixSize ); denseMatrix.setValue( 0.0 ); auto dense_view = denseMatrix.getView(); auto f = [=] __cuda_callable__ ( const typename MatrixType::RowViewType& row ) mutable { auto f = [=] __cuda_callable__ ( const typename MatrixType::RowView& row ) mutable { auto dense_row = dense_view.getRow( row.getRowIndex() ); for( int localIdx = 0; localIdx < row.getSize(); localIdx++ ) dense_row.setElement( row.getColumnIndex( localIdx ), row.getValue( localIdx ) ); }; matrix.forAllRows( f ); std::cout << "Lambda matrix looks as:" << std::endl << matrix << std::endl; std::cout << "Dense matrix looks as:" << std::endl << denseMatrix << std::endl; std::cout << "Laplace operator lambda matrix: " << std::endl << matrix << std::endl; std::cout << "Laplace operator dense matrix: " << std::endl << denseMatrix << std::endl; } int main( int argc, char* argv[] ) Loading
Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_getRow.cpp 0 → 100644 +90 −0 Original line number Diff line number Diff line #include <iostream> #include <TNL/Matrices/LambdaMatrix.h> #include <TNL/Matrices/DenseMatrix.h> #include <TNL/Devices/Host.h> #include <TNL/Devices/Cuda.h> template< typename Device > void getRowExample() { /*** * Set matrix representing approximation of the Laplace operator on regular * grid using the finite difference method. */ const int gridSize( 4 ); const int matrixSize = gridSize * gridSize; auto rowLengths = [=] __cuda_callable__ ( const int rows, const int columns, const int rowIdx ) -> int { const int gridRow = rowIdx / gridSize; // coordinates in the numerical grid const int gridColumn = rowIdx % gridSize; if( gridRow == 0 || gridRow == gridSize - 1 || // boundary grid node gridColumn == 0 || gridColumn == gridSize - 1 ) return 1; return 5; }; auto matrixElements = [=] __cuda_callable__ ( const int rows, const int columns, const int rowIdx, const int localIdx, int& columnIdx, double& value) { const int gridRow = rowIdx / gridSize; // coordinates in the numerical grid const int gridColumn = rowIdx % gridSize; if( gridRow == 0 || gridRow == gridSize - 1 || // boundary grid node gridColumn == 0 || gridColumn == gridSize - 1 ) { columnIdx = rowIdx; // diagonal element .... value = 1.0; // ... is set to 1 } else // interior grid node { switch( localIdx ) // set diagonal element to 4 { // and the others to -1 case 0: columnIdx = rowIdx - gridSize; value = 1; break; case 1: columnIdx = rowIdx - 1; value = 1; break; case 2: columnIdx = rowIdx; value = -4; break; case 3: columnIdx = rowIdx + 1; value = 1; break; case 4: columnIdx = rowIdx + gridSize; value = 1; break; } } }; auto matrix = TNL::Matrices::LambdaMatrixFactory< double, Device, int >::create( matrixSize, matrixSize, matrixElements, rowLengths ); using MatrixType = decltype( matrix ); TNL::Matrices::DenseMatrix< double, Device > denseMatrix( matrixSize, matrixSize ); denseMatrix.setValue( 0.0 ); auto dense_view = denseMatrix.getView(); auto f = [=] __cuda_callable__ ( const int& rowIdx ) mutable { auto row = matrix.getRow( rowIdx ); auto dense_row = dense_view.getRow( rowIdx ); for( int localIdx = 0; localIdx < row.getSize(); localIdx++ ) dense_row.setElement( row.getColumnIndex( localIdx ), row.getValue( localIdx ) ); }; TNL::Algorithms::ParallelFor< Device >::exec( 0, matrixSize, f ); std::cout << "Laplace operator lambda matrix: " << std::endl << matrix << std::endl; std::cout << "Laplace operator dense matrix: " << std::endl << denseMatrix << std::endl; } int main( int argc, char* argv[] ) { std::cout << "Running example on CPU ... " << std::endl; getRowExample< TNL::Devices::Host >(); #ifdef HAVE_CUDA std::cout << "Running example on CUDA GPU ... " << std::endl; getRowExample< TNL::Devices::Cuda >(); #endif }