Loading Documentation/Tutorials/Matrices/CMakeLists.txt +29 −8 Original line number Diff line number Diff line Loading @@ -94,15 +94,36 @@ IF( BUILD_CUDA ) ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_setElement.out OUTPUT SparseMatrixViewExample_setElement.out ) CUDA_ADD_EXECUTABLE( MatrixSetup_Benchmark_cuda MatrixSetup_Benchmark.cu ) ADD_CUSTOM_COMMAND( COMMAND MatrixSetup_Benchmark_cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MatrixSetup_Benchmark.out OUTPUT MatrixSetup_Benchmark.out ) CUDA_ADD_EXECUTABLE( DenseMatrixSetup_Benchmark_cuda DenseMatrixSetup_Benchmark.cu ) ADD_CUSTOM_COMMAND( COMMAND DenseMatrixSetup_Benchmark_cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixSetup_Benchmark.out OUTPUT DenseMatrixSetup_Benchmark.out ) CUDA_ADD_EXECUTABLE( SparseMatrixSetup_Benchmark_cuda SparseMatrixSetup_Benchmark.cu ) ADD_CUSTOM_COMMAND( COMMAND SparseMatrixSetup_Benchmark_cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixSetup_Benchmark.out OUTPUT SparseMatrixSetup_Benchmark.out ) CUDA_ADD_EXECUTABLE( MultidiagonalMatrixSetup_Benchmark_cuda MultidiagonalMatrixSetup_Benchmark.cu ) ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixSetup_Benchmark_cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixSetup_Benchmark.out OUTPUT MultidiagonalMatrixSetup_Benchmark.out ) ELSE() ADD_EXECUTABLE( MatrixSetup_Benchmark MatrixSetup_Benchmark_cuda.cpp ) ADD_CUSTOM_COMMAND( COMMAND MatrixSetup_Benchmark > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MatrixSetup_Benchmark.out OUTPUT MatrixSetup_Benchmark.out ) ADD_EXECUTABLE( DenseMatrixSetup_Benchmark DenseMatrixSetup_Benchmark_cuda.cpp ) ADD_CUSTOM_COMMAND( COMMAND DenseMatrixSetup_Benchmark > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixSetup_Benchmark.out OUTPUT DenseMatrixSetup_Benchmark.out ) ADD_EXECUTABLE( SparseMatrixSetup_Benchmark SparseMatrixSetup_Benchmark_cuda.cpp ) ADD_CUSTOM_COMMAND( COMMAND SparseMatrixSetup_Benchmark > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixSetup_Benchmark.out OUTPUT SparseMatrixSetup_Benchmark.out ) ADD_EXECUTABLE( MultidiagonalMatrixSetup_Benchmark MultidiagonalMatrixSetup_Benchmark_cuda.cpp ) ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixSetup_Benchmark > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixSetup_Benchmark.out OUTPUT MultidiagonalMatrixSetup_Benchmark.out ) ENDIF() IF( BUILD_CUDA ) Loading Documentation/Tutorials/Matrices/DenseMatrixSetup_Benchmark.cpp 0 → 100644 +123 −0 Original line number Diff line number Diff line #include <iostream> #include <TNL/Algorithms/ParallelFor.h> #include <TNL/Matrices/SparseMatrix.h> #include <TNL/Devices/Host.h> #include <TNL/Devices/Cuda.h> #include <TNL/Timer.h> const int testsCount = 5; template< typename Matrix > void setElement_on_host( const int matrixSize, Matrix& matrix ) { matrix.setDimensions( matrixSize, matrixSize ); for( int j = 0; j < matrixSize; j++ ) for( int i = 0; i < matrixSize; i++ ) matrix.setElement( i, j, i + j ); } template< typename Matrix > void setElement_on_device( const int matrixSize, Matrix& matrix ) { matrix.setDimensions( matrixSize, matrixSize ); auto matrixView = matrix.getView(); auto f = [=] __cuda_callable__ ( int i, int j ) mutable { matrixView.setElement( i, j, i + j ); }; TNL::Algorithms::ParallelFor2D< typename Matrix::DeviceType >::exec( 0, 0, matrixSize, matrixSize, f ); } template< typename Matrix > void getRow( const int matrixSize, Matrix& matrix ) { matrix.setDimensions( matrixSize, matrixSize ); auto matrixView = matrix.getView(); auto f = [=] __cuda_callable__ ( int rowIdx ) mutable { auto row = matrixView.getRow( rowIdx ); for( int i = 0; i < matrixSize; i++ ) row.setElement( i, rowIdx + i ); }; TNL::Algorithms::ParallelFor< typename Matrix::DeviceType >::exec( 0, matrixSize, f ); } template< typename Matrix > void forRows( const int matrixSize, Matrix& matrix ) { matrix.setDimensions( matrixSize, matrixSize ); auto f = [=] __cuda_callable__ ( int rowIdx, int localIdx, int& columnIdx, float& value, bool& compute ) mutable { value = rowIdx + columnIdx; }; matrix.forRows( 0, matrixSize, f ); } template< typename Device > void setupDenseMatrix() { std::cout << " Dense matrix test:" << std::endl; for( int matrixSize = 16; matrixSize <= 8192; matrixSize *= 2 ) { std::cout << " Matrix size = " << matrixSize << std::endl; TNL::Timer timer; std::cout << " setElement on host: "; timer.reset(); timer.start(); for( int i = 0; i < testsCount; i++ ) { TNL::Matrices::DenseMatrix< float, Device, int > matrix; setElement_on_host( matrixSize, matrix ); } timer.stop(); std::cout << timer.getRealTime() / ( double ) testsCount << " sec." << std::endl; std::cout << " setElement on device: "; timer.reset(); timer.start(); for( int i = 0; i < testsCount; i++ ) { TNL::Matrices::DenseMatrix< float, Device, int > matrix; setElement_on_device( matrixSize, matrix ); } timer.stop(); std::cout << timer.getRealTime() / ( double ) testsCount << " sec." << std::endl; std::cout << " getRow: "; timer.reset(); timer.start(); for( int i = 0; i < testsCount; i++ ) { TNL::Matrices::DenseMatrix< float, Device, int > matrix; getRow( matrixSize, matrix ); } timer.stop(); std::cout << timer.getRealTime() / ( double ) testsCount << " sec." << std::endl; std::cout << " forRows: "; timer.reset(); timer.start(); for( int i = 0; i < testsCount; i++ ) { TNL::Matrices::DenseMatrix< float, Device, int > matrix; forRows( matrixSize, matrix ); } timer.stop(); std::cout << timer.getRealTime() / ( double ) testsCount << " sec." << std::endl; } } int main( int argc, char* argv[] ) { std::cout << "Creating dense matrix on CPU ... " << std::endl; setupDenseMatrix< TNL::Devices::Host >(); #ifdef HAVE_CUDA std::cout << "Creating dense matrix on CUDA GPU ... " << std::endl; setupDenseMatrix< TNL::Devices::Cuda >(); #endif } Documentation/Tutorials/Matrices/DenseMatrixSetup_Benchmark.cu 0 → 120000 +1 −0 Original line number Diff line number Diff line DenseMatrixSetup_Benchmark.cpp No newline at end of file Documentation/Tutorials/Matrices/MatrixSetup_Benchmark.cudeleted 120000 → 0 +0 −1 Original line number Diff line number Diff line MatrixSetup_Benchmark.cpp No newline at end of file Documentation/Tutorials/Matrices/MultidiagonalMatrixSetup_Benchmark.cpp 0 → 100644 +221 −0 Original line number Diff line number Diff line #include <iostream> #include <TNL/Algorithms/ParallelFor.h> #include <TNL/Matrices/MultidiagonalMatrix.h> #include <TNL/Devices/Host.h> #include <TNL/Devices/Cuda.h> #include <TNL/Timer.h> const int testsCount = 5; template< typename Device > TNL::Containers::Vector< int, Device > getOffsets( const int gridSize ) { TNL::Containers::Vector< int, Device > offsets( 5 ); offsets.setElement( 0, -gridSize ); offsets.setElement( 1, -1 ); offsets.setElement( 2, 0 ); offsets.setElement( 3, 1 ); offsets.setElement( 4, gridSize ); return offsets; } template< typename Matrix > void setElement_on_host( const int gridSize, Matrix& matrix ) { /*** * Set matrix representing approximation of the Laplace operator on regular * grid using the finite difference method by means setElement method called * from the host system. */ const int matrixSize = gridSize * gridSize; matrix.setDimensions( matrixSize, matrixSize, getOffsets< typename Matrix::DeviceType >( gridSize ) ); for( int j = 0; j < gridSize; j++ ) for( int i = 0; i < gridSize; i++ ) { const int rowIdx = j * gridSize + i; if( i == 0 || j == 0 || i == gridSize - 1 || j == gridSize - 1 ) matrix.setElement( rowIdx, rowIdx, 1.0 ); else { matrix.setElement( rowIdx, rowIdx - gridSize, 1.0 ); matrix.setElement( rowIdx, rowIdx - 1, 1.0 ); matrix.setElement( rowIdx, rowIdx, -4.0 ); matrix.setElement( rowIdx, rowIdx + 1, 1.0 ); matrix.setElement( rowIdx, rowIdx + gridSize, 1.0 ); } } } template< typename Matrix > void setElement_on_device( const int gridSize, Matrix& matrix ) { /*** * Set matrix representing approximation of the Laplace operator on regular * grid using the finite difference method by means of setElement method called * from the native device. */ const int matrixSize = gridSize * gridSize; matrix.setDimensions( matrixSize, matrixSize, getOffsets< typename Matrix::DeviceType >( gridSize ) ); auto matrixView = matrix.getView(); auto f = [=] __cuda_callable__ ( int i, int j ) mutable { const int rowIdx = j * gridSize + i; if( i == 0 || j == 0 || i == gridSize - 1 || j == gridSize - 1 ) matrixView.setElement( rowIdx, rowIdx, 1.0 ); else { matrixView.setElement( rowIdx, rowIdx - gridSize, 1.0 ); matrixView.setElement( rowIdx, rowIdx - 1, 1.0 ); matrixView.setElement( rowIdx, rowIdx, -4.0 ); matrixView.setElement( rowIdx, rowIdx + 1, 1.0 ); matrixView.setElement( rowIdx, rowIdx + gridSize, 1.0 ); } }; TNL::Algorithms::ParallelFor2D< typename Matrix::DeviceType >::exec( 0, 0, gridSize, gridSize, f ); } template< typename Matrix > void getRow( const int gridSize, Matrix& matrix ) { /*** * Set matrix representing approximation of the Laplace operator on regular * grid using the finite difference method by means of getRow method. */ const int matrixSize = gridSize * gridSize; matrix.setDimensions( matrixSize, matrixSize, getOffsets< typename Matrix::DeviceType >( gridSize ) ); auto matrixView = matrix.getView(); auto f = [=] __cuda_callable__ ( int rowIdx ) mutable { const int i = rowIdx % gridSize; const int j = rowIdx / gridSize; auto row = matrixView.getRow( rowIdx ); if( i == 0 || j == 0 || i == gridSize - 1 || j == gridSize - 1 ) row.setElement( 2, 1.0 ); else { row.setElement( 0, 1.0 ); row.setElement( 1, 1.0 ); row.setElement( 2, -4.0 ); row.setElement( 3, 1.0 ); row.setElement( 4, 1.0 ); } }; TNL::Algorithms::ParallelFor< typename Matrix::DeviceType >::exec( 0, matrixSize, f ); } template< typename Matrix > void forRows( const int gridSize, Matrix& matrix ) { /*** * Set matrix representing approximation of the Laplace operator on regular * grid using the finite difference method by means of forRows method. */ const int matrixSize = gridSize * gridSize; matrix.setDimensions( matrixSize, matrixSize, getOffsets< typename Matrix::DeviceType >( gridSize ) ); auto f = [=] __cuda_callable__ ( int rowIdx, int localIdx, int columnIdx, float& value, bool& compute ) mutable { const int i = rowIdx % gridSize; const int j = rowIdx / gridSize; if( i == 0 || j == 0 || i == gridSize - 1 || j == gridSize - 1 && localIdx == 0 ) { columnIdx = rowIdx; value = 1.0; } else { switch( localIdx ) { case 0: columnIdx = rowIdx - gridSize; value = 1.0; break; case 1: columnIdx = rowIdx - 1; value = 1.0; break; case 2: columnIdx = rowIdx; value = -4.0; break; case 3: columnIdx = rowIdx + 1; value = 1.0; break; case 4: columnIdx = rowIdx + gridSize; value = 1.0; break; } } }; matrix.forRows( 0, matrixSize, f ); } template< typename Device > void laplaceOperatorMultidiagonalMatrix() { std::cout << " Sparse matrix test:" << std::endl; for( int gridSize = 16; gridSize <= 8192; gridSize *= 2 ) { std::cout << " Grid size = " << gridSize << std::endl; TNL::Timer timer; std::cout << " setElement on host: "; timer.reset(); timer.start(); for( int i = 0; i < testsCount; i++ ) { TNL::Matrices::MultidiagonalMatrix< float, Device, int > matrix; setElement_on_host( gridSize, matrix ); } timer.stop(); std::cout << timer.getRealTime() / ( double ) testsCount << " sec." << std::endl; std::cout << " setElement on device: "; timer.reset(); timer.start(); for( int i = 0; i < testsCount; i++ ) { TNL::Matrices::MultidiagonalMatrix< float, Device, int > matrix; setElement_on_device( gridSize, matrix ); } timer.stop(); std::cout << timer.getRealTime() / ( double ) testsCount << " sec." << std::endl; std::cout << " getRow: "; timer.reset(); timer.start(); for( int i = 0; i < testsCount; i++ ) { TNL::Matrices::MultidiagonalMatrix< float, Device, int > matrix; getRow( gridSize, matrix ); } timer.stop(); std::cout << timer.getRealTime() / ( double ) testsCount << " sec." << std::endl; std::cout << " forRows: "; timer.reset(); timer.start(); for( int i = 0; i < testsCount; i++ ) { TNL::Matrices::MultidiagonalMatrix< float, Device, int > matrix; forRows( gridSize, matrix ); } timer.stop(); std::cout << timer.getRealTime() / ( double ) testsCount << " sec." << std::endl; } } int main( int argc, char* argv[] ) { std::cout << "Creating Laplace operator matrix on CPU ... " << std::endl; laplaceOperatorMultidiagonalMatrix< TNL::Devices::Host >(); #ifdef HAVE_CUDA std::cout << "Creating Laplace operator matrix on CUDA GPU ... " << std::endl; laplaceOperatorMultidiagonalMatrix< TNL::Devices::Cuda >(); #endif } Loading
Documentation/Tutorials/Matrices/CMakeLists.txt +29 −8 Original line number Diff line number Diff line Loading @@ -94,15 +94,36 @@ IF( BUILD_CUDA ) ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_setElement.out OUTPUT SparseMatrixViewExample_setElement.out ) CUDA_ADD_EXECUTABLE( MatrixSetup_Benchmark_cuda MatrixSetup_Benchmark.cu ) ADD_CUSTOM_COMMAND( COMMAND MatrixSetup_Benchmark_cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MatrixSetup_Benchmark.out OUTPUT MatrixSetup_Benchmark.out ) CUDA_ADD_EXECUTABLE( DenseMatrixSetup_Benchmark_cuda DenseMatrixSetup_Benchmark.cu ) ADD_CUSTOM_COMMAND( COMMAND DenseMatrixSetup_Benchmark_cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixSetup_Benchmark.out OUTPUT DenseMatrixSetup_Benchmark.out ) CUDA_ADD_EXECUTABLE( SparseMatrixSetup_Benchmark_cuda SparseMatrixSetup_Benchmark.cu ) ADD_CUSTOM_COMMAND( COMMAND SparseMatrixSetup_Benchmark_cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixSetup_Benchmark.out OUTPUT SparseMatrixSetup_Benchmark.out ) CUDA_ADD_EXECUTABLE( MultidiagonalMatrixSetup_Benchmark_cuda MultidiagonalMatrixSetup_Benchmark.cu ) ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixSetup_Benchmark_cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixSetup_Benchmark.out OUTPUT MultidiagonalMatrixSetup_Benchmark.out ) ELSE() ADD_EXECUTABLE( MatrixSetup_Benchmark MatrixSetup_Benchmark_cuda.cpp ) ADD_CUSTOM_COMMAND( COMMAND MatrixSetup_Benchmark > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MatrixSetup_Benchmark.out OUTPUT MatrixSetup_Benchmark.out ) ADD_EXECUTABLE( DenseMatrixSetup_Benchmark DenseMatrixSetup_Benchmark_cuda.cpp ) ADD_CUSTOM_COMMAND( COMMAND DenseMatrixSetup_Benchmark > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixSetup_Benchmark.out OUTPUT DenseMatrixSetup_Benchmark.out ) ADD_EXECUTABLE( SparseMatrixSetup_Benchmark SparseMatrixSetup_Benchmark_cuda.cpp ) ADD_CUSTOM_COMMAND( COMMAND SparseMatrixSetup_Benchmark > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixSetup_Benchmark.out OUTPUT SparseMatrixSetup_Benchmark.out ) ADD_EXECUTABLE( MultidiagonalMatrixSetup_Benchmark MultidiagonalMatrixSetup_Benchmark_cuda.cpp ) ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixSetup_Benchmark > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixSetup_Benchmark.out OUTPUT MultidiagonalMatrixSetup_Benchmark.out ) ENDIF() IF( BUILD_CUDA ) Loading
Documentation/Tutorials/Matrices/DenseMatrixSetup_Benchmark.cpp 0 → 100644 +123 −0 Original line number Diff line number Diff line #include <iostream> #include <TNL/Algorithms/ParallelFor.h> #include <TNL/Matrices/SparseMatrix.h> #include <TNL/Devices/Host.h> #include <TNL/Devices/Cuda.h> #include <TNL/Timer.h> const int testsCount = 5; template< typename Matrix > void setElement_on_host( const int matrixSize, Matrix& matrix ) { matrix.setDimensions( matrixSize, matrixSize ); for( int j = 0; j < matrixSize; j++ ) for( int i = 0; i < matrixSize; i++ ) matrix.setElement( i, j, i + j ); } template< typename Matrix > void setElement_on_device( const int matrixSize, Matrix& matrix ) { matrix.setDimensions( matrixSize, matrixSize ); auto matrixView = matrix.getView(); auto f = [=] __cuda_callable__ ( int i, int j ) mutable { matrixView.setElement( i, j, i + j ); }; TNL::Algorithms::ParallelFor2D< typename Matrix::DeviceType >::exec( 0, 0, matrixSize, matrixSize, f ); } template< typename Matrix > void getRow( const int matrixSize, Matrix& matrix ) { matrix.setDimensions( matrixSize, matrixSize ); auto matrixView = matrix.getView(); auto f = [=] __cuda_callable__ ( int rowIdx ) mutable { auto row = matrixView.getRow( rowIdx ); for( int i = 0; i < matrixSize; i++ ) row.setElement( i, rowIdx + i ); }; TNL::Algorithms::ParallelFor< typename Matrix::DeviceType >::exec( 0, matrixSize, f ); } template< typename Matrix > void forRows( const int matrixSize, Matrix& matrix ) { matrix.setDimensions( matrixSize, matrixSize ); auto f = [=] __cuda_callable__ ( int rowIdx, int localIdx, int& columnIdx, float& value, bool& compute ) mutable { value = rowIdx + columnIdx; }; matrix.forRows( 0, matrixSize, f ); } template< typename Device > void setupDenseMatrix() { std::cout << " Dense matrix test:" << std::endl; for( int matrixSize = 16; matrixSize <= 8192; matrixSize *= 2 ) { std::cout << " Matrix size = " << matrixSize << std::endl; TNL::Timer timer; std::cout << " setElement on host: "; timer.reset(); timer.start(); for( int i = 0; i < testsCount; i++ ) { TNL::Matrices::DenseMatrix< float, Device, int > matrix; setElement_on_host( matrixSize, matrix ); } timer.stop(); std::cout << timer.getRealTime() / ( double ) testsCount << " sec." << std::endl; std::cout << " setElement on device: "; timer.reset(); timer.start(); for( int i = 0; i < testsCount; i++ ) { TNL::Matrices::DenseMatrix< float, Device, int > matrix; setElement_on_device( matrixSize, matrix ); } timer.stop(); std::cout << timer.getRealTime() / ( double ) testsCount << " sec." << std::endl; std::cout << " getRow: "; timer.reset(); timer.start(); for( int i = 0; i < testsCount; i++ ) { TNL::Matrices::DenseMatrix< float, Device, int > matrix; getRow( matrixSize, matrix ); } timer.stop(); std::cout << timer.getRealTime() / ( double ) testsCount << " sec." << std::endl; std::cout << " forRows: "; timer.reset(); timer.start(); for( int i = 0; i < testsCount; i++ ) { TNL::Matrices::DenseMatrix< float, Device, int > matrix; forRows( matrixSize, matrix ); } timer.stop(); std::cout << timer.getRealTime() / ( double ) testsCount << " sec." << std::endl; } } int main( int argc, char* argv[] ) { std::cout << "Creating dense matrix on CPU ... " << std::endl; setupDenseMatrix< TNL::Devices::Host >(); #ifdef HAVE_CUDA std::cout << "Creating dense matrix on CUDA GPU ... " << std::endl; setupDenseMatrix< TNL::Devices::Cuda >(); #endif }
Documentation/Tutorials/Matrices/DenseMatrixSetup_Benchmark.cu 0 → 120000 +1 −0 Original line number Diff line number Diff line DenseMatrixSetup_Benchmark.cpp No newline at end of file
Documentation/Tutorials/Matrices/MatrixSetup_Benchmark.cudeleted 120000 → 0 +0 −1 Original line number Diff line number Diff line MatrixSetup_Benchmark.cpp No newline at end of file
Documentation/Tutorials/Matrices/MultidiagonalMatrixSetup_Benchmark.cpp 0 → 100644 +221 −0 Original line number Diff line number Diff line #include <iostream> #include <TNL/Algorithms/ParallelFor.h> #include <TNL/Matrices/MultidiagonalMatrix.h> #include <TNL/Devices/Host.h> #include <TNL/Devices/Cuda.h> #include <TNL/Timer.h> const int testsCount = 5; template< typename Device > TNL::Containers::Vector< int, Device > getOffsets( const int gridSize ) { TNL::Containers::Vector< int, Device > offsets( 5 ); offsets.setElement( 0, -gridSize ); offsets.setElement( 1, -1 ); offsets.setElement( 2, 0 ); offsets.setElement( 3, 1 ); offsets.setElement( 4, gridSize ); return offsets; } template< typename Matrix > void setElement_on_host( const int gridSize, Matrix& matrix ) { /*** * Set matrix representing approximation of the Laplace operator on regular * grid using the finite difference method by means setElement method called * from the host system. */ const int matrixSize = gridSize * gridSize; matrix.setDimensions( matrixSize, matrixSize, getOffsets< typename Matrix::DeviceType >( gridSize ) ); for( int j = 0; j < gridSize; j++ ) for( int i = 0; i < gridSize; i++ ) { const int rowIdx = j * gridSize + i; if( i == 0 || j == 0 || i == gridSize - 1 || j == gridSize - 1 ) matrix.setElement( rowIdx, rowIdx, 1.0 ); else { matrix.setElement( rowIdx, rowIdx - gridSize, 1.0 ); matrix.setElement( rowIdx, rowIdx - 1, 1.0 ); matrix.setElement( rowIdx, rowIdx, -4.0 ); matrix.setElement( rowIdx, rowIdx + 1, 1.0 ); matrix.setElement( rowIdx, rowIdx + gridSize, 1.0 ); } } } template< typename Matrix > void setElement_on_device( const int gridSize, Matrix& matrix ) { /*** * Set matrix representing approximation of the Laplace operator on regular * grid using the finite difference method by means of setElement method called * from the native device. */ const int matrixSize = gridSize * gridSize; matrix.setDimensions( matrixSize, matrixSize, getOffsets< typename Matrix::DeviceType >( gridSize ) ); auto matrixView = matrix.getView(); auto f = [=] __cuda_callable__ ( int i, int j ) mutable { const int rowIdx = j * gridSize + i; if( i == 0 || j == 0 || i == gridSize - 1 || j == gridSize - 1 ) matrixView.setElement( rowIdx, rowIdx, 1.0 ); else { matrixView.setElement( rowIdx, rowIdx - gridSize, 1.0 ); matrixView.setElement( rowIdx, rowIdx - 1, 1.0 ); matrixView.setElement( rowIdx, rowIdx, -4.0 ); matrixView.setElement( rowIdx, rowIdx + 1, 1.0 ); matrixView.setElement( rowIdx, rowIdx + gridSize, 1.0 ); } }; TNL::Algorithms::ParallelFor2D< typename Matrix::DeviceType >::exec( 0, 0, gridSize, gridSize, f ); } template< typename Matrix > void getRow( const int gridSize, Matrix& matrix ) { /*** * Set matrix representing approximation of the Laplace operator on regular * grid using the finite difference method by means of getRow method. */ const int matrixSize = gridSize * gridSize; matrix.setDimensions( matrixSize, matrixSize, getOffsets< typename Matrix::DeviceType >( gridSize ) ); auto matrixView = matrix.getView(); auto f = [=] __cuda_callable__ ( int rowIdx ) mutable { const int i = rowIdx % gridSize; const int j = rowIdx / gridSize; auto row = matrixView.getRow( rowIdx ); if( i == 0 || j == 0 || i == gridSize - 1 || j == gridSize - 1 ) row.setElement( 2, 1.0 ); else { row.setElement( 0, 1.0 ); row.setElement( 1, 1.0 ); row.setElement( 2, -4.0 ); row.setElement( 3, 1.0 ); row.setElement( 4, 1.0 ); } }; TNL::Algorithms::ParallelFor< typename Matrix::DeviceType >::exec( 0, matrixSize, f ); } template< typename Matrix > void forRows( const int gridSize, Matrix& matrix ) { /*** * Set matrix representing approximation of the Laplace operator on regular * grid using the finite difference method by means of forRows method. */ const int matrixSize = gridSize * gridSize; matrix.setDimensions( matrixSize, matrixSize, getOffsets< typename Matrix::DeviceType >( gridSize ) ); auto f = [=] __cuda_callable__ ( int rowIdx, int localIdx, int columnIdx, float& value, bool& compute ) mutable { const int i = rowIdx % gridSize; const int j = rowIdx / gridSize; if( i == 0 || j == 0 || i == gridSize - 1 || j == gridSize - 1 && localIdx == 0 ) { columnIdx = rowIdx; value = 1.0; } else { switch( localIdx ) { case 0: columnIdx = rowIdx - gridSize; value = 1.0; break; case 1: columnIdx = rowIdx - 1; value = 1.0; break; case 2: columnIdx = rowIdx; value = -4.0; break; case 3: columnIdx = rowIdx + 1; value = 1.0; break; case 4: columnIdx = rowIdx + gridSize; value = 1.0; break; } } }; matrix.forRows( 0, matrixSize, f ); } template< typename Device > void laplaceOperatorMultidiagonalMatrix() { std::cout << " Sparse matrix test:" << std::endl; for( int gridSize = 16; gridSize <= 8192; gridSize *= 2 ) { std::cout << " Grid size = " << gridSize << std::endl; TNL::Timer timer; std::cout << " setElement on host: "; timer.reset(); timer.start(); for( int i = 0; i < testsCount; i++ ) { TNL::Matrices::MultidiagonalMatrix< float, Device, int > matrix; setElement_on_host( gridSize, matrix ); } timer.stop(); std::cout << timer.getRealTime() / ( double ) testsCount << " sec." << std::endl; std::cout << " setElement on device: "; timer.reset(); timer.start(); for( int i = 0; i < testsCount; i++ ) { TNL::Matrices::MultidiagonalMatrix< float, Device, int > matrix; setElement_on_device( gridSize, matrix ); } timer.stop(); std::cout << timer.getRealTime() / ( double ) testsCount << " sec." << std::endl; std::cout << " getRow: "; timer.reset(); timer.start(); for( int i = 0; i < testsCount; i++ ) { TNL::Matrices::MultidiagonalMatrix< float, Device, int > matrix; getRow( gridSize, matrix ); } timer.stop(); std::cout << timer.getRealTime() / ( double ) testsCount << " sec." << std::endl; std::cout << " forRows: "; timer.reset(); timer.start(); for( int i = 0; i < testsCount; i++ ) { TNL::Matrices::MultidiagonalMatrix< float, Device, int > matrix; forRows( gridSize, matrix ); } timer.stop(); std::cout << timer.getRealTime() / ( double ) testsCount << " sec." << std::endl; } } int main( int argc, char* argv[] ) { std::cout << "Creating Laplace operator matrix on CPU ... " << std::endl; laplaceOperatorMultidiagonalMatrix< TNL::Devices::Host >(); #ifdef HAVE_CUDA std::cout << "Creating Laplace operator matrix on CUDA GPU ... " << std::endl; laplaceOperatorMultidiagonalMatrix< TNL::Devices::Cuda >(); #endif }