...
 
Commits (204)
This diff is collapsed.
......@@ -23,6 +23,7 @@ option(WITH_OPENMP "Build with OpenMP support" ON)
option(WITH_MPI "Build with MPI support" ON)
option(WITH_GMP "Build with GMP support" OFF)
option(WITH_TESTS "Build tests" ON)
option(WITH_MATRIX_TESTS "Build tests for matrices" ON)
option(WITH_PROFILING "Enable code profiling compiler flags" OFF )
option(WITH_COVERAGE "Enable code coverage reports from unit tests" OFF)
option(WITH_EXAMPLES "Compile the 'src/Examples' directory" ON)
......@@ -82,7 +83,7 @@ set( CMAKE_CXX_STANDARD_REQUIRED ON )
set( CMAKE_CXX_EXTENSIONS OFF )
# set default build options
set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -Wall -Wno-unused-local-typedefs -Wno-unused-variable -Wno-unknown-pragmas" )
set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -Wall -Wno-unused-local-typedefs -Wno-unknown-pragmas" )
set( CMAKE_CXX_FLAGS_DEBUG "-g" )
set( CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG" )
# pass -rdynamic only in Debug mode
......@@ -142,7 +143,7 @@ if( DEFINED ENV{CI_JOB_NAME} OR ${CMAKE_GENERATOR} STREQUAL "Ninja" )
endif()
# gtest has to be built before we add the MPI flags
if( ${WITH_TESTS} )
if( ${WITH_TESTS} OR ${WITH_MATRIX_TESTS} )
enable_testing()
# build gtest libs
......@@ -358,6 +359,7 @@ message( " WITH_OPENMP = ${WITH_OPENMP}" )
message( " WITH_MPI = ${WITH_MPI}" )
message( " WITH_GMP = ${WITH_GMP}" )
message( " WITH_TESTS = ${WITH_TESTS}" )
message( " WITH_MATRIX_TESTS = ${WITH_MATRIX_TESTS}" )
message( " WITH_PROFILING = ${WITH_PROFILING}" )
message( " WITH_COVERAGE = ${WITH_COVERAGE}" )
message( " WITH_EXAMPLES = ${WITH_EXAMPLES}" )
......
ADD_SUBDIRECTORY( Algorithms )
ADD_SUBDIRECTORY( Containers )
ADD_SUBDIRECTORY( Pointers )
ADD_SUBDIRECTORY( Matrices )
ADD_EXECUTABLE( FileExample FileExample.cpp )
ADD_CUSTOM_COMMAND( COMMAND FileExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/FileExample.out OUTPUT FileExample.out )
......
ADD_SUBDIRECTORY( DenseMatrix )
ADD_SUBDIRECTORY( LambdaMatrix )
ADD_SUBDIRECTORY( MultidiagonalMatrix )
ADD_SUBDIRECTORY( SparseMatrix )
ADD_SUBDIRECTORY( TridiagonalMatrix )
This diff is collapsed.
#include <iostream>
#include <TNL/Matrices/DenseMatrix.h>
#include <TNL/Devices/Host.h>
template< typename Device >
void initializerListExample()
{
TNL::Matrices::DenseMatrix< double, Device > matrix {
{ 1, 2, 3, 4, 5, 6 },
{ 7, 8, 9, 10, 11, 12 },
{ 13, 14, 15, 16, 17, 18 }
};
std::cout << "General dense matrix: " << std::endl << matrix << std::endl;
TNL::Matrices::DenseMatrix< double, Device > triangularMatrix {
{ 1 },
{ 2, 3 },
{ 4, 5, 6 },
{ 7, 8, 9, 10 },
{ 11, 12, 13, 14, 15 }
};
std::cout << "Triangular dense matrix: " << std::endl << triangularMatrix << std::endl;
}
int main( int argc, char* argv[] )
{
std::cout << "Creating matrices on CPU ... " << std::endl;
initializerListExample< TNL::Devices::Host >();
#ifdef HAVE_CUDA
std::cout << "Creating matrices on CUDA GPU ... " << std::endl;
initializerListExample< TNL::Devices::Cuda >();
#endif
}
DenseMatrixExample_Constructor_init_list.cpp
\ No newline at end of file
#include <iostream>
#include <TNL/Matrices/DenseMatrix.h>
#include <TNL/Devices/Host.h>
template< typename Device >
void addElements()
{
TNL::Matrices::DenseMatrix< double, Device > matrix( 5, 5 );
for( int i = 0; i < 5; i++ )
matrix.setElement( i, i, i );
std::cout << "Initial matrix is: " << std::endl << matrix << std::endl;
for( int i = 0; i < 5; i++ )
for( int j = 0; j < 5; j++ )
matrix.addElement( i, j, 1.0, 5.0 );
std::cout << "Matrix after addition is: " << std::endl << matrix << std::endl;
}
int main( int argc, char* argv[] )
{
std::cout << "Add elements on host:" << std::endl;
addElements< TNL::Devices::Host >();
#ifdef HAVE_CUDA
std::cout << "Add elements on CUDA device:" << std::endl;
addElements< TNL::Devices::Cuda >();
#endif
}
DenseMatrixExample_addElement.cpp
\ No newline at end of file
#include <iostream>
#include <iomanip>
#include <functional>
#include <TNL/Matrices/DenseMatrix.h>
#include <TNL/Devices/Host.h>
#include <TNL/Devices/Cuda.h>
template< typename Device >
void allRowsReduction()
{
TNL::Matrices::DenseMatrix< double, Device > matrix {
{ 1, 0, 0, 0, 0 },
{ 1, 2, 0, 0, 0 },
{ 0, 1, 8, 0, 0 },
{ 0, 0, 1, 9, 0 },
{ 0, 0, 0, 0, 1 } };
/***
* Find largest element in each row.
*/
TNL::Containers::Vector< double, Device > rowMax( matrix.getRows() );
/***
* Prepare vector view and matrix view for lambdas.
*/
auto rowMaxView = rowMax.getView();
/***
* Fetch lambda just returns absolute value of matrix elements.
*/
auto fetch = [=] __cuda_callable__ ( int rowIdx, int columnIdx, const double& value ) -> double {
return TNL::abs( value );
};
/***
* Reduce lambda return maximum of given values.
*/
auto reduce = [=] __cuda_callable__ ( double& a, const double& b ) -> double {
return TNL::max( a, b );
};
/***
* Keep lambda store the largest value in each row to the vector rowMax.
*/
auto keep = [=] __cuda_callable__ ( int rowIdx, const double& value ) mutable {
rowMaxView[ rowIdx ] = value;
};
/***
* Compute the largest values in each row.
*/
matrix.allRowsReduction( fetch, reduce, keep, std::numeric_limits< double >::lowest() );
std::cout << "Max. elements in rows are: " << rowMax << std::endl;
}
int main( int argc, char* argv[] )
{
std::cout << "All rows reduction on host:" << std::endl;
allRowsReduction< TNL::Devices::Host >();
#ifdef HAVE_CUDA
std::cout << "All rows reduction on CUDA device:" << std::endl;
allRowsReduction< TNL::Devices::Cuda >();
#endif
}
DenseMatrixExample_allRowsReduction.cpp
\ No newline at end of file
#include <iostream>
#include <TNL/Matrices/DenseMatrix.h>
#include <TNL/Devices/Host.h>
#include <TNL/Devices/Cuda.h>
template< typename Device >
void forAllRowsExample()
{
TNL::Matrices::DenseMatrix< double, Device > matrix( 5, 5 );
auto f = [=] __cuda_callable__ ( int rowIdx, int columnIdx, int columnIdx_, double& value, bool& compute ) {
if( rowIdx < columnIdx )
compute = false;
else
value = rowIdx + columnIdx;
};
matrix.forAllRows( f );
std::cout << matrix << std::endl;
}
int main( int argc, char* argv[] )
{
std::cout << "Creating matrix on host: " << std::endl;
forAllRowsExample< TNL::Devices::Host >();
#ifdef HAVE_CUDA
std::cout << "Creating matrix on CUDA device: " << std::endl;
forAllRowsExample< TNL::Devices::Cuda >();
#endif
}
DenseMatrixExample_forAllRows.cpp
\ No newline at end of file
#include <iostream>
#include <TNL/Matrices/DenseMatrix.h>
#include <TNL/Devices/Host.h>
#include <TNL/Devices/Cuda.h>
template< typename Device >
void forRowsExample()
{
TNL::Matrices::DenseMatrix< double, Device > matrix( 5, 5 );
auto f = [=] __cuda_callable__ ( int rowIdx, int columnIdx, int columnIdx_, double& value, bool& compute ) {
if( rowIdx < columnIdx )
compute = false;
else
value = rowIdx + columnIdx;
};
matrix.forRows( 0, matrix.getRows(), f );
std::cout << matrix << std::endl;
}
int main( int argc, char* argv[] )
{
std::cout << "Creating matrix on host: " << std::endl;
forRowsExample< TNL::Devices::Host >();
#ifdef HAVE_CUDA
std::cout << "Creating matrix on CUDA device: " << std::endl;
forRowsExample< TNL::Devices::Cuda >();
#endif
}
DenseMatrixExample_forRows.cpp
\ No newline at end of file
#include <iostream>
#include <TNL/Matrices/DenseMatrix.h>
#include <TNL/Devices/Host.h>
#include <TNL/Devices/Cuda.h>
template< typename Device >
void getCompressedRowLengthsExample()
{
TNL::Matrices::DenseMatrix< double, Device > triangularMatrix {
{ 1 },
{ 2, 3 },
{ 4, 5, 6 },
{ 7, 8, 9, 10 },
{ 11, 12, 13, 14, 15 }
};
std::cout << triangularMatrix << std::endl;
TNL::Containers::Vector< int, Device > rowLengths;
triangularMatrix.getCompressedRowLengths( rowLengths );
std::cout << "Compressed row lengths are: " << rowLengths << std::endl;
}
int main( int argc, char* argv[] )
{
std::cout << "Getting compressed row lengths on host: " << std::endl;
getCompressedRowLengthsExample< TNL::Devices::Host >();
#ifdef HAVE_CUDA
std::cout << "Getting compressed row lengths on CUDA device: " << std::endl;
getCompressedRowLengthsExample< TNL::Devices::Cuda >();
#endif
}
DenseMatrixExample_getCompressedRowLengths.cpp
\ No newline at end of file
#include <iostream>
#include <functional>
#include <TNL/Algorithms/ParallelFor.h>
#include <TNL/Matrices/DenseMatrix.h>
#include <TNL/Devices/Host.h>
#include <TNL/Devices/Cuda.h>
#include <TNL/Pointers/SharedPointer.h>
template< typename Device >
void getRowExample()
{
using MatrixType = TNL::Matrices::DenseMatrix< double, Device >;
TNL::Pointers::SharedPointer< MatrixType > matrix {
{ 1, 0, 0, 0, 0 },
{ 1, 2, 0, 0, 0 },
{ 1, 2, 3, 0, 0 },
{ 1, 2, 3, 4, 0 },
{ 1, 2, 3, 4, 5 }
};
/***
* Fetch lambda function returns diagonal element in each row.
*/
auto fetch = [=] __cuda_callable__ ( int rowIdx ) mutable -> double {
auto row = matrix->getRow( rowIdx );
return row.getElement( rowIdx );
};
/***
* For the case when Device is CUDA device we need to synchronize smart
* pointers. To avoid this you may use DenseMatrixView. See
* DenseMatrixView::getConstRow example for details.
*/
TNL::Pointers::synchronizeSmartPointersOnDevice< Device >();
/***
* Compute the matrix trace.
*/
int trace = TNL::Algorithms::Reduction< Device >::reduce( 0, matrix->getRows(), std::plus<>{}, fetch, 0 );
std::cout << "Matrix trace is " << trace << "." << std::endl;
}
int main( int argc, char* argv[] )
{
std::cout << "Getting matrix rows on host: " << std::endl;
getRowExample< TNL::Devices::Host >();
#ifdef HAVE_CUDA
std::cout << "Getting matrix rows on CUDA device: " << std::endl;
getRowExample< TNL::Devices::Cuda >();
#endif
}
DenseMatrixExample_getConstRow.cpp
\ No newline at end of file
#include <iostream>
#include <iomanip>
#include <TNL/Matrices/DenseMatrix.h>
#include <TNL/Devices/Host.h>
template< typename Device >
void getElements()
{
TNL::Matrices::DenseMatrix< double, Device > matrix {
{ 1, 0, 0, 0, 0 },
{ -1, 2, -1, 0, 0 },
{ 0, -1, 2, -1, 0 },
{ 0, 0, -1, 2, -1 },
{ 0, 0, 0, 0, 1 } };
for( int i = 0; i < 5; i++ )
{
for( int j = 0; j < 5; j++ )
std::cout << std::setw( 5 ) << matrix.getElement( i, j );
std::cout << std::endl;
}
}
int main( int argc, char* argv[] )
{
std::cout << "Get elements on host:" << std::endl;
getElements< TNL::Devices::Host >();
#ifdef HAVE_CUDA
std::cout << "Get elements on CUDA device:" << std::endl;
getElements< TNL::Devices::Cuda >();
#endif
}
DenseMatrixExample_getElement.cpp
\ No newline at end of file
#include <iostream>
#include <TNL/Matrices/DenseMatrix.h>
#include <TNL/Devices/Host.h>
#include <TNL/Devices/Cuda.h>
template< typename Device >
void getElementsCountExample()
{
TNL::Matrices::DenseMatrix< double, Device > triangularMatrix {
{ 1 },
{ 2, 3 },
{ 4, 5, 6 },
{ 7, 8, 9, 10 },
{ 11, 12, 13, 14, 15 }
};
std::cout << "Matrix elements count is " << triangularMatrix.getAllocatedElementsCount() << "." << std::endl;
std::cout << "Non-zero matrix elements count is " << triangularMatrix.getNonzeroElementsCount() << "." << std::endl;
}
int main( int argc, char* argv[] )
{
std::cout << "Computing matrix elements on host: " << std::endl;
getElementsCountExample< TNL::Devices::Host >();
#ifdef HAVE_CUDA
std::cout << "Computing matrix elements on CUDA device: " << std::endl;
getElementsCountExample< TNL::Devices::Cuda >();
#endif
}
DenseMatrixExample_getElementsCount.cpp
\ No newline at end of file
DenseMatrixExample_getNonzeroElementsCount.cpp
\ No newline at end of file
#include <iostream>
#include <TNL/Algorithms/ParallelFor.h>
#include <TNL/Matrices/DenseMatrix.h>
#include <TNL/Devices/Host.h>
#include <TNL/Devices/Cuda.h>
#include <TNL/Pointers/SharedPointer.h>
template< typename Device >
void getRowExample()
{
using MatrixType = TNL::Matrices::DenseMatrix< double, Device >;
TNL::Pointers::SharedPointer< MatrixType > matrix( 5, 5 );
auto f = [=] __cuda_callable__ ( int rowIdx ) mutable {
auto row = matrix->getRow( rowIdx );
row.setElement( rowIdx, 10 * ( rowIdx + 1 ) );
};
/***
* For the case when Device is CUDA device we need to synchronize smart
* pointers. To avoid this you may use DenseMatrixView. See
* DenseMatrixView::getRow example for details.
*/
TNL::Pointers::synchronizeSmartPointersOnDevice< Device >();
/***
* Set the matrix elements.
*/
TNL::Algorithms::ParallelFor< Device >::exec( 0, matrix->getRows(), f );
std::cout << matrix << std::endl;
}
int main( int argc, char* argv[] )
{
std::cout << "Getting matrix rows on host: " << std::endl;
getRowExample< TNL::Devices::Host >();
#ifdef HAVE_CUDA
std::cout << "Getting matrix rows on CUDA device: " << std::endl;
getRowExample< TNL::Devices::Cuda >();
#endif
}
DenseMatrixExample_getRow.cpp
\ No newline at end of file
#include <iostream>
#include <iomanip>
#include <functional>
#include <TNL/Matrices/DenseMatrix.h>
#include <TNL/Devices/Host.h>
template< typename Device >
void rowsReduction()
{
TNL::Matrices::DenseMatrix< double, Device > matrix {
{ 1, 0, 0, 0, 0 },
{ 1, 2, 0, 0, 0 },
{ 0, 1, 8, 0, 0 },
{ 0, 0, 1, 9, 0 },
{ 0, 0, 0, 0, 1 } };
/***
* Find largest element in each row.
*/
TNL::Containers::Vector< double, Device > rowMax( matrix.getRows() );
/***
* Prepare vector view for lambdas.
*/
auto rowMaxView = rowMax.getView();
/***
* Fetch lambda just returns absolute value of matrix elements.
*/
auto fetch = [=] __cuda_callable__ ( int rowIdx, int columnIdx, const double& value ) -> double {
return TNL::abs( value );
};
/***
* Reduce lambda return maximum of given values.
*/
auto reduce = [=] __cuda_callable__ ( double& a, const double& b ) -> double {
return TNL::max( a, b );
};
/***
* Keep lambda store the largest value in each row to the vector rowMax.
*/
auto keep = [=] __cuda_callable__ ( int rowIdx, const double& value ) mutable {
rowMaxView[ rowIdx ] = value;
};
/***
* Compute the largest values in each row.
*/
matrix.rowsReduction( 0, matrix.getRows(), fetch, reduce, keep, std::numeric_limits< double >::lowest() );
std::cout << "Max. elements in rows are: " << rowMax << std::endl;
}
int main( int argc, char* argv[] )
{
std::cout << "Rows reduction on host:" << std::endl;
rowsReduction< TNL::Devices::Host >();
#ifdef HAVE_CUDA
std::cout << "Rows reduction on CUDA device:" << std::endl;
rowsReduction< TNL::Devices::Cuda >();
#endif
}
DenseMatrixExample_rowsReduction.cpp
\ No newline at end of file
#include <iostream>
#include <TNL/Algorithms/ParallelFor.h>
#include <TNL/Matrices/DenseMatrix.h>
#include <TNL/Devices/Host.h>
#include <TNL/Devices/Cuda.h>
#include <TNL/Pointers/SharedPointer.h>
#include <TNL/Pointers/SmartPointersRegister.h>
template< typename Device >
void setElements()
{
TNL::Pointers::SharedPointer< TNL::Matrices::DenseMatrix< double, Device > > matrix( 5, 5 );
for( int i = 0; i < 5; i++ )
matrix->setElement( i, i, i );
std::cout << "Matrix set from the host:" << std::endl;
std::cout << *matrix << std::endl;
auto f = [=] __cuda_callable__ ( int i ) mutable {
matrix->setElement( i, i, -i );
};
/***
* For the case when Device is CUDA device we need to synchronize smart
* pointers. To avoid this you may use DenseMatrixView. See
* DenseMatrixView::getRow example for details.
*/
TNL::Pointers::synchronizeSmartPointersOnDevice< Device >();
TNL::Algorithms::ParallelFor< Device >::exec( 0, 5, f );
std::cout << "Matrix set from its native device:" << std::endl;
std::cout << *matrix << std::endl;
}
int main( int argc, char* argv[] )
{
std::cout << "Set elements on host:" << std::endl;
setElements< TNL::Devices::Host >();
#ifdef HAVE_CUDA
std::cout << "Set elements on CUDA device:" << std::endl;
setElements< TNL::Devices::Cuda >();
#endif
}
DenseMatrixExample_setElement.cpp
\ No newline at end of file
#include <iostream>
#include <TNL/Matrices/DenseMatrix.h>
#include <TNL/Devices/Host.h>
#include <TNL/Devices/Cuda.h>
template< typename Device >
void setElementsExample()
{
TNL::Matrices::DenseMatrix< double, Device > matrix;
matrix.setElements( {
{ 1, 2, 3, 4, 5, 6 },
{ 7, 8, 9, 10, 11, 12 },
{ 13, 14, 15, 16, 17, 18 }
} );
std::cout << matrix << std::endl;
TNL::Matrices::DenseMatrix< double, Device > triangularMatrix;
triangularMatrix.setElements( {
{ 1 },
{ 2, 3 },
{ 4, 5, 6 },
{ 7, 8, 9, 10 },
{ 11, 12, 13, 14, 15 }
} );
std::cout << triangularMatrix << std::endl;
}
int main( int argc, char* argv[] )
{
std::cout << "Setting matrix elements on host: " << std::endl;
setElementsExample< TNL::Devices::Host >();
#ifdef HAVE_CUDA
std::cout << "Setting matrix elements on CUDA device: " << std::endl;
setElementsExample< TNL::Devices::Cuda >();
#endif
}
\ No newline at end of file
DenseMatrixExample_setElements.cpp
\ No newline at end of file
#include <iostream>
#include <TNL/Matrices/DenseMatrix.h>
#include <TNL/Devices/Host.h>
template< typename Device >
void addElements()
{
TNL::Matrices::DenseMatrix< double, Device > matrix( 5, 5 );
auto matrixView = matrix.getView();
for( int i = 0; i < 5; i++ )
matrixView.setElement( i, i, i );
std::cout << "Initial matrix is: " << std::endl << matrix << std::endl;
for( int i = 0; i < 5; i++ )
for( int j = 0; j < 5; j++ )
matrixView.addElement( i, j, 1.0, 5.0 );
std::cout << "Matrix after addition is: " << std::endl << matrix << std::endl;
}
int main( int argc, char* argv[] )
{
std::cout << "Add elements on host:" << std::endl;
addElements< TNL::Devices::Host >();
#ifdef HAVE_CUDA
std::cout << "Add elements on CUDA device:" << std::endl;
addElements< TNL::Devices::Cuda >();
#endif
}
DenseMatrixViewExample_addElement.cpp
\ No newline at end of file
#include <iostream>
#include <iomanip>
#include <functional>
#include <TNL/Matrices/DenseMatrix.h>
#include <TNL/Devices/Host.h>
template< typename Device >
void allRowsReduction()
{
TNL::Matrices::DenseMatrix< double, Device > matrix {
{ 1, 0, 0, 0, 0 },
{ 1, 2, 0, 0, 0 },
{ 0, 1, 8, 0, 0 },
{ 0, 0, 1, 9, 0 },
{ 0, 0, 0, 0, 1 } };
auto matrixView = matrix.getView();
/***
* Find largest element in each row.
*/
TNL::Containers::Vector< double, Device > rowMax( matrix.getRows() );
/***
* Prepare vector view and matrix view for lambdas.
*/
auto rowMaxView = rowMax.getView();
/***
* Fetch lambda just returns absolute value of matrix elements.
*/
auto fetch = [=] __cuda_callable__ ( int rowIdx, int columnIdx, const double& value ) -> double {
return TNL::abs( value );
};
/***
* Reduce lambda return maximum of given values.
*/
auto reduce = [=] __cuda_callable__ ( double& a, const double& b ) -> double {
return TNL::m